LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 22/22] powerpc/eeh: Connect EEH error interrupt handle
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The EEH error interrupts should have been exported by firmware
through device tree. The OS already installed the interrupt
handler (opal_interrupt()) for them. The patch checks if we have
any existing EEH errors and wakes the kernel thread up to process
that if possible.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/opal.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index aaa0dba..8aa99de 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -17,6 +17,8 @@
 #include <linux/interrupt.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
+#include <asm/io.h>
+#include <asm/eeh.h>
 
 #include "powernv.h"
 
@@ -271,6 +273,10 @@ static irqreturn_t opal_interrupt(int irq, void *data)
 	uint64_t events;
 
 	opal_handle_interrupt(virq_to_hw(irq), &events);
+#ifdef CONFIG_EEH
+	if (events & OPAL_EVENT_PCI_ERROR)
+		pci_err_event();
+#endif
 
 	/* XXX TODO: Do something with the events */
 
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 12/22] powerpc/eeh: EEH backend for P7IOC
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

For EEH on PowerNV platform, the overall architecture is a bit
different from that on pSeries platform. In order to support multiple
I/O chips in future, we split EEH to 3 layers for PowerNV platform:
EEH core, platform layer, I/O layer. It would give EEH implementation
on PowerNV much more flexibility in future.

The patch adds the EEH backend for P7IOC.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/Makefile   |    1 +
 arch/powerpc/platforms/powernv/eeh-ioda.c |   52 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci.h      |   22 +++++++++++-
 3 files changed, 74 insertions(+), 1 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/eeh-ioda.c

diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index bcc3cb4..09bd0cb 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,3 +3,4 @@ obj-y			+= opal-rtc.o opal-nvram.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
+obj-$(CONFIG_EEH)	+= eeh-ioda.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
new file mode 100644
index 0000000..ee1b538
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -0,0 +1,52 @@
+/*
+ * The file intends to implement the functions needed by EEH, which is
+ * built on IODA compliant chip. Actually, lots of functions related
+ * to EEH would be built based on the OPAL APIs.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/opal.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/tce.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+struct pnv_eeh_ops ioda_eeh_ops = {
+	.post_init		= NULL,
+	.set_option		= NULL,
+	.get_state		= NULL,
+	.reset			= NULL,
+	.get_log		= NULL,
+	.configure_bridge	= NULL
+};
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 7cfb7c8..f1af391 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -63,15 +63,32 @@ struct pnv_ioda_pe {
 	struct list_head	list;
 };
 
+/* IOC dependent EEH operations */
+#ifdef CONFIG_EEH
+struct pnv_eeh_ops {
+	int (*post_init)(struct pci_controller *hose);
+	int (*set_option)(struct eeh_pe *pe, int option);
+	int (*get_state)(struct eeh_pe *pe, int *state);
+	int (*reset)(struct eeh_pe *pe, int option);
+	int (*get_log)(struct eeh_pe *pe, int severity,
+		       char *drv_log, unsigned long len);
+	int (*configure_bridge)(struct eeh_pe *pe);
+};
+#endif /* CONFIG_EEH */
+
 struct pnv_phb {
 	struct pci_controller	*hose;
 	enum pnv_phb_type	type;
 	enum pnv_phb_model	model;
+	u64			hub_id;
 	u64			opal_id;
 	void __iomem		*regs;
 	int			initialized;
 	spinlock_t		lock;
-
+#ifdef CONFIG_EEH
+	struct pnv_eeh_ops	*eeh_ops;
+	int			eeh_enabled;
+#endif
 #ifdef CONFIG_PCI_MSI
 	unsigned long		*msi_map;
 	unsigned int		msi_base;
@@ -144,6 +161,9 @@ struct pnv_phb {
 };
 
 extern struct pci_ops pnv_pci_ops;
+#ifdef CONFIG_EEH
+extern struct pnv_eeh_ops ioda_eeh_ops;
+#endif
 
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 				      void *tce_mem, u64 tce_size,
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 21/22] powerpc/eeh: Process interrupts caused by EEH
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

On PowerNV platform, the EEH event is produced either by detect
on accessing config or I/O registers, or by interrupts dedicated
for EEH report. The patch adds support to process the interrupts
dedicated for EEH report.

Firstly, the kernel thread will be waken up to process incoming
interrupt. The PHBs will be scanned one by one to process all
existing EEH errors. Besides, There're mulple EEH errors that can
be reported from interrupts and we have differentiated actions
against them:

* If the IOC is dead, we will simply panic the system.
* If the PHB is dead, we also simply panic the system.
* If the PHB is fenced, EEH event will be sent to EEH core and
  the fenced PHB is expected to be resetted completely.
* If specific PE has been put into frozen state, EEH event will
  be sent to EEH core so that the PE will be resetted.
* If the error is informational one, we just output the related
  registers for debugging purpose and no more action will be
  taken.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h             |    8 +
 arch/powerpc/platforms/powernv/Makefile    |    2 +-
 arch/powerpc/platforms/powernv/pci-err.c   |  474 ++++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/eeh_event.c |    8 +
 4 files changed, 491 insertions(+), 1 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/pci-err.c

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 05b70dc..7d0dfbf 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -212,6 +212,14 @@ void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
 void eeh_remove_bus_device(struct pci_dev *, int);
 
+#ifdef CONFIG_PPC_POWERNV
+void pci_err_event(void);
+void pci_err_release(void);
+#else
+static inline void pci_err_event(void) { }
+static inline void pci_err_release(void) { }
+#endif
+
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
  *
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 7fe5951..912fa7c 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,4 +3,4 @@ obj-y			+= opal-rtc.o opal-nvram.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
-obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_EEH)	+= pci-err.o eeh-ioda.o eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/pci-err.c b/arch/powerpc/platforms/powernv/pci-err.c
new file mode 100644
index 0000000..86cb47a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-err.c
@@ -0,0 +1,474 @@
+/*
+ * The file instends to handle those interrupts dedicated for error
+ * detection from IOC chips. Currently, we only support P7IOC and
+ * need support more IOC chips in the future. The interrupts have
+ * been exported to hypervisor through "opal-interrupts" of "ibm,opal"
+ * OF node. When one of them comes in, the hypervisor simply turns
+ * to the firmware and expects the appropriate events returned. In
+ * turn, we will format one message and queue that in order to process
+ * it at later point.
+ *
+ * On the other hand, we need maintain information about the states
+ * of IO HUBs and their associated PHBs. The information would be
+ * shared by hypervisor and guests in future. While hypervisor or guests
+ * accessing IO HUBs, PHBs and PEs, the state should be checked and
+ * return approriate results. That would benefit EEH RTAS emulation in
+ * hypervisor as well.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/semaphore.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/kthread.h>
+#include <linux/msi.h>
+
+#include <asm/firmware.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/opal.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/eeh_event.h>
+#include <asm/eeh.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/* Debugging option */
+#ifdef PCI_ERR_DEBUG_ON
+#define PCI_ERR_DBG(args...)	pr_info(args)
+#else
+#define PCI_ERR_DBG(args...)
+#endif
+
+static struct task_struct *pci_err_thread;
+static struct semaphore pci_err_int_sem;
+static struct semaphore pci_err_seq_sem;
+static char *pci_err_diag;
+
+/**
+ * pci_err_event - Report PCI error event
+ * @type: event type
+ *
+ * The function is used for interrupt handler of PCI error IRQs to report
+ * event. As the result, the kthread will be started to handle the PCI
+ * error.
+ */
+void pci_err_event(void)
+{
+	/* Notify kthread to process error */
+	up(&pci_err_int_sem);
+}
+
+static void pci_err_take(void)
+{
+	down(&pci_err_seq_sem);
+}
+
+/**
+ * pci_err_release - Enable error report for sending events
+ *
+ * We're hanlding the EEH event one by one. Each time, there only has
+ * one EEH event caused by error IRQ. The function is called to enable
+ * error report in order to send more EEH events.
+ */
+void pci_err_release(void)
+{
+	up(&pci_err_seq_sem);
+}
+
+/*
+ * When we get global interrupts (e.g. P7IOC RGC), PCI error happens
+ * in critical component of the IOC or PHB. For the formal case, the
+ * firmware just returns OPAL_PCI_ERR_CLASS_HUB and we needn't proceed.
+ * For the late case, we probably need reset one particular PHB. For
+ * that, we're doing is to send EEH event to the toppset PE of that
+ * problematic PHB so that the PHB can be reset by the EEH core.
+ */
+static int pci_err_check_phb(struct pci_controller *hose)
+{
+	struct eeh_pe *phb_pe;
+
+	/* Find the PHB PE */
+	phb_pe = eeh_phb_pe_get(hose);
+	if (!phb_pe) {
+		pr_debug("%s Can't find PE for PHB#%d\n",
+			__func__, hose->global_number);
+		return -EEXIST;
+	}
+	PCI_ERR_DBG("PCI_ERR: PHB#%d PE found\n",
+		hose->global_number);
+
+	/*
+	 * Fence the PHB and send one event to EEH core
+	 * for further processing. We have to fence the
+	 * PHB here because the EEH core always return
+	 * normal state for PHB PE, so we can't do it
+	 * through EEH core.
+	 */
+	if (!(phb_pe->state & EEH_PE_ISOLATED)) {
+		PCI_ERR_DBG("PCI_ERR: Fence PHB#%x and send event "
+			    "to EEH core\n", hose->global_number);
+		eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+		eeh_send_failure_event(phb_pe, EEH_EVENT_INT);
+	} else {
+		pci_err_release();
+	}
+
+	return 0;
+}
+
+/*
+ * When we get interrupts from PHB, there are probablly some PEs that
+ * have been put into frozen state. What we need do is sent one message
+ * to the EEH device, no matter which one it is, so that the EEH core
+ * can check it out and do PE reset accordingly.
+ */
+static int pci_err_check_pe(struct pci_controller *hose, u16 pe_no)
+{
+	struct eeh_pe *phb_pe, *pe;
+	struct eeh_dev dev, *edev;
+
+	/* Find the PHB PE */
+	phb_pe = eeh_phb_pe_get(hose);
+	if (!phb_pe) {
+		pr_warning("%s Can't find PE for PHB#%d\n",
+			__func__, hose->global_number);
+		return -EEXIST;
+	}
+	PCI_ERR_DBG("PCI_ERR: PHB#%d PE found\n",
+		hose->global_number);
+
+	/*
+	 * If the PHB has been put into fenced state, we
+	 * needn't send the duplicate event because the
+	 * whole PHB is going to take reset.
+	 */
+	if (phb_pe->state & EEH_PE_ISOLATED)
+		return 0;
+
+	/* Find the PE according to PE# */
+	memset(&dev, 0, sizeof(struct eeh_dev));
+	dev.phb = hose;
+	dev.pe_config_addr = pe_no;
+	pe = eeh_pe_get(&dev);
+	if (!pe) {
+		pr_debug("%s: Can't find PE for PHB#%x - PE#%x\n",
+			__func__, hose->global_number, pe_no);
+		return -EEXIST;
+	}
+	PCI_ERR_DBG("PCI_ERR: PE (%x) found for PHB#%x - PE#%x\n",
+		pe->addr, hose->global_number, pe_no);
+
+	/*
+	 * It doesn't matter which EEH device to get
+	 * the message. Just pick up the one on the
+	 * toppest position.
+	 */
+	edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+	if (!edev) {
+		pr_err("%s: No EEH devices hooked on PHB#%x - PE#%x\n",
+			__func__, hose->global_number, pe_no);
+		return -EEXIST;
+	}
+	PCI_ERR_DBG("PCI_ERR: First EEH device found on PHB#%x - PE#%x\n",
+		hose->global_number, pe_no);
+
+	if (!eeh_dev_check_failure(edev, EEH_EVENT_INT))
+		pci_err_release();
+
+	return 0;
+}
+
+static void pci_err_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+	/* GEM */
+	pr_info("  GEM XFIR:        %016llx\n", data->gemXfir);
+	pr_info("  GEM RFIR:        %016llx\n", data->gemRfir);
+	pr_info("  GEM RIRQFIR:     %016llx\n", data->gemRirqfir);
+	pr_info("  GEM Mask:        %016llx\n", data->gemMask);
+	pr_info("  GEM RWOF:        %016llx\n", data->gemRwof);
+
+	/* LEM */
+	pr_info("  LEM FIR:         %016llx\n", data->lemFir);
+	pr_info("  LEM Error Mask:  %016llx\n", data->lemErrMask);
+	pr_info("  LEM Action 0:    %016llx\n", data->lemAction0);
+	pr_info("  LEM Action 1:    %016llx\n", data->lemAction1);
+	pr_info("  LEM WOF:         %016llx\n", data->lemWof);
+}
+
+static void pci_err_hub_diag_data(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoP7IOCErrorData *data;
+	long ret;
+
+	data = (struct OpalIoP7IOCErrorData *)pci_err_diag;
+	ret = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE);
+	if (ret != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get HUB#%llx diag-data, ret=%ld\n",
+			__func__, phb->hub_id, ret);
+		return;
+	}
+
+	/* Check the error type */
+	if (data->type <= OPAL_P7IOC_DIAG_TYPE_NONE ||
+	    data->type >= OPAL_P7IOC_DIAG_TYPE_LAST) {
+		pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+			__func__, phb->hub_id, data->type);
+		return;
+	}
+
+	switch (data->type) {
+	case OPAL_P7IOC_DIAG_TYPE_RGC:
+		pr_info("P7IOC diag-data for RGC\n\n");
+		pci_err_hub_diag_common(data);
+		pr_info("  RGC Status:      %016llx\n", data->rgc.rgcStatus);
+		pr_info("  RGC LDCP:        %016llx\n", data->rgc.rgcLdcp);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_BI:
+		pr_info("P7IOC diag-data for BI %s\n\n",
+			data->bi.biDownbound ? "Downbound" : "Upbound");
+		pci_err_hub_diag_common(data);
+		pr_info("  BI LDCP 0:       %016llx\n", data->bi.biLdcp0);
+		pr_info("  BI LDCP 1:       %016llx\n", data->bi.biLdcp1);
+		pr_info("  BI LDCP 2:       %016llx\n", data->bi.biLdcp2);
+		pr_info("  BI Fence Status: %016llx\n", data->bi.biFenceStatus);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_CI:
+		pr_info("P7IOC diag-data for CI Port %d\\nn",
+			data->ci.ciPort);
+		pci_err_hub_diag_common(data);
+		pr_info("  CI Port Status:  %016llx\n", data->ci.ciPortStatus);
+		pr_info("  CI Port LDCP:    %016llx\n", data->ci.ciPortLdcp);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_MISC:
+		pr_info("P7IOC diag-data for MISC\n\n");
+		pci_err_hub_diag_common(data);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_I2C:
+		pr_info("P7IOC diag-data for I2C\n\n");
+		pci_err_hub_diag_common(data);
+		break;
+	}
+}
+
+static void pci_err_phb_diag_data(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoP7IOCPhbErrorData *data;
+	int i;
+	long ret;
+
+	data = (struct OpalIoP7IOCPhbErrorData *)pci_err_diag;
+	ret = opal_pci_get_phb_diag_data(phb->opal_id, data, PAGE_SIZE);
+	if (ret != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get diag-data for PHB#%x, ret=%ld\n",
+			__func__, hose->global_number, ret);
+		return;
+	}
+
+	pr_info("PHB#%x Diag-data\n\n", hose->global_number);
+	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
+
+	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
+	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
+	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
+
+	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
+	pr_info("  slotStatus:           %08x\n", data->slotStatus);
+	pr_info("  linkStatus:           %08x\n", data->linkStatus);
+	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
+	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
+
+	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
+	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
+	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
+	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
+	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
+	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
+	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
+	pr_info("  sourceId:             %08x\n", data->sourceId);
+
+	pr_info("  errorClass:           %016llx\n", data->errorClass);
+	pr_info("  correlator:           %016llx\n", data->correlator);
+	pr_info("  p7iocPlssr:           %016llx\n", data->p7iocPlssr);
+	pr_info("  p7iocCsr:             %016llx\n", data->p7iocCsr);
+	pr_info("  lemFir:               %016llx\n", data->lemFir);
+	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
+	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
+	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
+	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
+	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
+	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
+	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
+	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
+	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
+	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
+	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
+	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
+	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
+	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
+	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
+	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
+	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
+	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+
+	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
+		if ((data->pestA[i] >> 63) == 0 &&
+		    (data->pestB[i] >> 63) == 0)
+			continue;
+
+		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
+		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+	}
+}
+
+/*
+ * Process PCI errors from IOC, PHB, or PE. Here's the list
+ * of expected error types and their severities, as well as
+ * the corresponding action.
+ *
+ * Type                        Severity                Action
+ * OPAL_EEH_ERROR_IOC  OPAL_EEH_SEV_IOC_DEAD   panic
+ * OPAL_EEH_ERROR_IOC  OPAL_EEH_SEV_INF        diag_data
+ * OPAL_EEH_ERROR_PHB  OPAL_EEH_SEV_PHB_DEAD   panic
+ * OPAL_EEH_ERROR_PHB  OPAL_EEH_SEV_PHB_FENCED eeh
+ * OPAL_EEH_ERROR_PHB  OPAL_EEH_SEV_INF        diag_data
+ * OPAL_EEH_ERROR_PE   OPAL_EEH_SEV_PE_ER      eeh
+ */
+static void pci_err_process(struct pci_controller *hose,
+			u16 err_type, u16 severity, u16 pe_no)
+{
+	PCI_ERR_DBG("PCI_ERR: Process error (%d, %d, %d) on PHB#%x\n",
+		err_type, severity, pe_no, hose->global_number);
+
+	switch (err_type) {
+	case OPAL_EEH_IOC_ERROR:
+		if (severity == OPAL_EEH_SEV_IOC_DEAD)
+			panic("Dead IOC of PHB#%x", hose->global_number);
+		else if (severity == OPAL_EEH_SEV_INF) {
+			pci_err_hub_diag_data(hose);
+			pci_err_release();
+		}
+
+		break;
+	case OPAL_EEH_PHB_ERROR:
+		if (severity == OPAL_EEH_SEV_PHB_DEAD)
+			panic("Dead PHB#%x", hose->global_number);
+		else if (severity == OPAL_EEH_SEV_PHB_FENCED)
+			pci_err_check_phb(hose);
+		else if (severity == OPAL_EEH_SEV_INF) {
+			pci_err_phb_diag_data(hose);
+			pci_err_release();
+		}
+
+		break;
+	case OPAL_EEH_PE_ERROR:
+		pci_err_check_pe(hose, pe_no);
+		break;
+	}
+}
+
+static int pci_err_handler(void *dummy)
+{
+	struct pnv_phb *phb;
+	struct pci_controller *hose, *tmp;
+	u64 frozen_pe_no;
+	u16 err_type, severity;
+	long ret;
+
+	while (!kthread_should_stop()) {
+		down(&pci_err_int_sem);
+		PCI_ERR_DBG("PCI_ERR: Get PCI error semaphore\n");
+
+		list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+			phb = hose->private_data;
+restart:
+			pci_err_take();
+			ret = opal_pci_next_error(phb->opal_id,
+					&frozen_pe_no, &err_type, &severity);
+
+			/* If OPAL API returns error, we needn't proceed */
+			if (ret != OPAL_SUCCESS) {
+				PCI_ERR_DBG("PCI_ERR: Invalid return value on "
+					    "PHB#%x (0x%lx) from opal_pci_next_error",
+					    hose->global_number, ret);
+				pci_err_release();
+				continue;
+			}
+
+			/* If the PHB doesn't have error, stop processing */
+			if (err_type == OPAL_EEH_NO_ERROR ||
+			    severity == OPAL_EEH_SEV_NO_ERROR) {
+				PCI_ERR_DBG("PCI_ERR: No error found on PHB#%x\n",
+					hose->global_number);
+				pci_err_release();
+				continue;
+			}
+
+			/*
+			 * Process the error until there're no pending
+			 * errors on the specific PHB.
+			 */
+			pci_err_process(hose, err_type, severity, frozen_pe_no);
+			goto restart;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * pci_err_init - Initialize PCI error handling component
+ *
+ * It should be done before OPAL interrupts got registered because
+ * that depends on this.
+ */
+static int __init pci_err_init(void)
+{
+	int ret = -ENOMEM;
+
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return ret;
+
+	pci_err_diag = (char *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (!pci_err_diag) {
+		pr_err("%s: Failed to alloc memory for diag data\n",
+			__func__);
+		return ret;
+	}
+
+	/* Initialize semaphore and start kthread */
+	sema_init(&pci_err_int_sem, 0);
+	sema_init(&pci_err_seq_sem, 1);
+	pci_err_thread = kthread_run(pci_err_handler, NULL, "PCI_ERR");
+	if (IS_ERR(pci_err_thread)) {
+		free_page((unsigned long)pci_err_diag);
+		ret = PTR_ERR(pci_err_thread);
+		pr_err("%s: Failed to start kthread, ret=%d\n",
+			__func__, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+arch_initcall(pci_err_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 1f86b80..e4c636e 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -84,6 +84,14 @@ static int eeh_event_handler(void * dummy)
 	eeh_handle_event(pe);
 	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 
+	/*
+	 * If it's the event caused by error reporting IRQ,
+	 * we need release the module so that precedent events
+	 * could be fired.
+	 */
+	if (event->flag & EEH_EVENT_INT)
+		pci_err_release();
+
 	kfree(event);
 	mutex_unlock(&eeh_event_mutex);
 
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 13/22] powerpc/eeh: I/O chip post initialization
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The post initialization (struct eeh_ops::post_init) is called after
the EEH probe is done. On the other hand, the EEH core post initialization
is designed to call platform and then I/O chip backend on PowerNV
platform.

The patch adds the backend for I/O chip to notify the platform
the specific PHB is ready to supply EEH service.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |   19 ++++++++++++++++++-
 1 files changed, 18 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index ee1b538..38c1efe 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -42,8 +42,25 @@
 #include "powernv.h"
 #include "pci.h"
 
+/**
+ * ioda_eeh_post_init - Chip dependent post initialization
+ * @hose: PCI controller
+ *
+ * The function will be called after eeh PEs and devices
+ * have been built. That means the EEH is ready to supply
+ * service with I/O cache.
+ */
+static int ioda_eeh_post_init(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+
+	phb->eeh_enabled = 1;
+
+	return 0;
+}
+
 struct pnv_eeh_ops ioda_eeh_ops = {
-	.post_init		= NULL,
+	.post_init		= ioda_eeh_post_init,
 	.set_option		= NULL,
 	.get_state		= NULL,
 	.reset			= NULL,
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 20/22] powerpc/eeh: Enable EEH check for config access
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch enables EEH check and let EEH core to process the EEH
errors for PowerNV platform while accessing config space. Originally,
the implementation already had mechanism to check EEH errors and
tried to recover from them. However, we never let EEH core to handle
the EEH errors.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci.c |   42 ++++++++++++++++++++++++++++++++-
 1 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b8b8e0b..6f464dc 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -31,6 +31,8 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/firmware.h>
+#include <asm/eeh_event.h>
+#include <asm/eeh.h>
 
 #include "powernv.h"
 #include "pci.h"
@@ -287,6 +289,10 @@ static int pnv_pci_read_config(struct pci_bus *bus,
 {
 	struct pci_controller *hose = pci_bus_to_host(bus);
 	struct pnv_phb *phb = hose->private_data;
+#ifdef CONFIG_EEH
+	struct device_node *busdn, *dn;
+	struct eeh_pe *phb_pe = NULL;
+#endif
 	u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
 	s64 rc;
 
@@ -319,8 +325,35 @@ static int pnv_pci_read_config(struct pci_bus *bus,
 	cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n",
 		bus->number, devfn, where, size, *val);
 
-	/* Check if the PHB got frozen due to an error (no response) */
+	/*
+	 * Check if the specified PE has been put into frozen
+	 * state. On the other hand, we needn't do that while
+	 * the PHB has been put into frozen state because of
+	 * PHB-fatal errors.
+	 */
+#ifdef CONFIG_EEH
+	phb_pe = eeh_phb_pe_get(hose);
+	if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
+		return PCIBIOS_SUCCESSFUL;
+
+	if (phb->eeh_enabled) {
+		if (*val == EEH_IO_ERROR_VALUE(size)) {
+			busdn = pci_bus_to_OF_node(bus);
+			for (dn = busdn->child; dn; dn = dn->sibling) {
+				struct pci_dn *pdn = PCI_DN(dn);
+
+				if (pdn && pdn->devfn == devfn &&
+					eeh_dev_check_failure(of_node_to_eeh_dev(dn),
+							EEH_EVENT_NORMAL))
+					return PCIBIOS_DEVICE_NOT_FOUND;
+			}
+		}
+	} else {
+		pnv_pci_config_check_eeh(phb, bus, bdfn);
+	}
+#else
 	pnv_pci_config_check_eeh(phb, bus, bdfn);
+#endif
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -351,8 +384,14 @@ static int pnv_pci_write_config(struct pci_bus *bus,
 	default:
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
+
 	/* Check if the PHB got frozen due to an error (no response) */
+#ifdef CONFIG_EEH
+	if (!phb->eeh_enabled)
+		pnv_pci_config_check_eeh(phb, bus, bdfn);
+#else
 	pnv_pci_config_check_eeh(phb, bus, bdfn);
+#endif
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -362,7 +401,6 @@ struct pci_ops pnv_pci_ops = {
 	.write = pnv_pci_write_config,
 };
 
-
 static void pnv_tce_invalidate(struct iommu_table *tbl,
 			       u64 *startp, u64 *endp)
 {
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 18/22] powerpc/eeh: PowerNV EEH backends
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch adds EEH backends for PowerNV platform. It's notable that
part of those EEH backends call to the I/O chip dependent backends.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/Makefile      |    2 +-
 arch/powerpc/platforms/powernv/eeh-powernv.c |  373 ++++++++++++++++++++++++++
 2 files changed, 374 insertions(+), 1 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/eeh-powernv.c

diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 09bd0cb..7fe5951 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,4 +3,4 @@ obj-y			+= opal-rtc.o opal-nvram.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
-obj-$(CONFIG_EEH)	+= eeh-ioda.o
+obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 0000000..7544a48
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,373 @@
+/*
+ * The file intends to implement the platform dependent EEH operations on
+ * powernv platform. Actually, the powernv was created in order to fully
+ * hypervisor support.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/ppc-pci.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/**
+ * powernv_eeh_init - EEH platform dependent initialization
+ *
+ * EEH platform dependent initialization on powernv
+ */
+static int powernv_eeh_init(void)
+{
+	/* Set EEH probe mode */
+	eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
+
+	return 0;
+}
+
+/**
+ * powernv_eeh_post_init - EEH platform dependent post initialization
+ *
+ * EEH platform dependent post initialization on powernv. When
+ * the function is called, the EEH PEs and devices should have
+ * been built. If the I/O cache staff has been built, EEH is
+ * ready to supply service.
+ */
+static int powernv_eeh_post_init(void)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	int ret = 0;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		if (phb->eeh_ops && phb->eeh_ops->post_init) {
+			ret = phb->eeh_ops->post_init(hose);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_dev_probe - Do probe on PCI device
+ * @dev: PCI device
+ * @flag: unused
+ *
+ * When EEH module is installed during system boot, all PCI devices
+ * are checked one by one to see if it supports EEH. The function
+ * is introduced for the purpose. By default, EEH has been enabled
+ * on all PCI devices. That's to say, we only need do necessary
+ * initialization on the corresponding eeh device and create PE
+ * accordingly.
+ *
+ * It's notable that's unsafe to retrieve the EEH device through
+ * the corresponding PCI device. During the PCI device hotplug, which
+ * was possiblly triggered by EEH core, the binding between EEH device
+ * and the PCI device isn't built yet.
+ */
+static void *powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+
+	/*
+	 * When probing the root bridge, which doesn't have any
+	 * subordinate PCI devices. We don't have OF node for
+	 * the root bridge. So it's not reasonable to continue
+	 * the probing.
+	 */
+	if (!dn || !edev)
+		return NULL;
+
+	/* Skip for PCI-ISA bridge */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return NULL;
+
+	/* Initialize eeh device */
+	edev->class_code	= dev->class;
+	edev->mode		= 0;
+	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);
+	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
+
+	/* Create PE */
+	eeh_add_to_parent_pe(edev);
+
+	/* Enable EEH explicitly */
+	eeh_subsystem_enabled = 1;
+
+	/* Save memory bars */
+	eeh_save_bars(edev);
+
+	return NULL;
+}
+
+/**
+ * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	/*
+	 * What we need do is pass it down for hardware
+	 * implementation to handle it.
+	 */
+	if (phb->eeh_ops && phb->eeh_ops->set_option)
+		ret = phb->eeh_ops->set_option(pe, option);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_get_pe_addr - Retrieve PE address
+ * @pe: EEH PE
+ *
+ * Retrieve the PE address according to the given tranditional
+ * PCI BDF (Bus/Device/Function) address.
+ */
+static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
+{
+	return pe->addr;
+}
+
+/**
+ * powernv_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @state: return value
+ *
+ * Retrieve the state of the specified PE. For IODA-compitable
+ * platform, it should be retrieved from IODA table. Therefore,
+ * we prefer passing down to hardware implementation to handle
+ * it.
+ */
+static int powernv_eeh_get_state(struct eeh_pe *pe, int *state)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = EEH_STATE_NOT_SUPPORT;
+
+	if (phb->eeh_ops && phb->eeh_ops->get_state)
+		ret = phb->eeh_ops->get_state(pe, state);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int powernv_eeh_reset(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	if (phb->eeh_ops && phb->eeh_ops->reset)
+		ret = phb->eeh_ops->reset(pe, option);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in microsecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+	int ret;
+	int mwait;
+
+	while (1) {
+		ret = powernv_eeh_get_state(pe, &mwait);
+
+		/*
+		 * If the PE's state is temporarily unavailable,
+		 * we have to wait for the specified time. Otherwise,
+		 * the PE's state will be returned immediately.
+		 */
+		if (ret != EEH_STATE_UNAVAILABLE)
+			return ret;
+
+		max_wait -= mwait;
+		msleep(mwait);
+	}
+
+	return EEH_STATE_NOT_SUPPORT;
+}
+
+/**
+ * powernv_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ */
+static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
+			char *drv_log, unsigned long len)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	if (phb->eeh_ops && phb->eeh_ops->get_log)
+		ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = 0;
+
+	if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
+		ret = phb->eeh_ops->configure_bridge(pe);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_read_config - Read PCI config space
+ * @dn: device node
+ * @where: PCI address
+ * @size: size to read
+ * @val: return value
+ *
+ * Read config space from the speicifed device
+ */
+static int powernv_eeh_read_config(struct device_node *dn, int where,
+				   int size, u32 *val)
+{
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	struct pci_controller *hose = edev->phb;
+
+	return hose->ops->read(dev->bus, dev->devfn, where, size, val);
+}
+
+/**
+ * powernv_eeh_write_config - Write PCI config space
+ * @dn: device node
+ * @where: PCI address
+ * @size: size to write
+ * @val: value to be written
+ *
+ * Write config space to the specified device
+ */
+static int powernv_eeh_write_config(struct device_node *dn, int where,
+				    int size, u32 val)
+{
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	struct pci_controller *hose = edev->phb;
+
+	hose = pci_bus_to_host(dev->bus);
+
+	return hose->ops->write(dev->bus, dev->devfn, where, size, val);
+}
+
+static struct eeh_ops powernv_eeh_ops = {
+	.name                   = "powernv",
+	.init                   = powernv_eeh_init,
+	.post_init              = powernv_eeh_post_init,
+	.of_probe               = NULL,
+	.dev_probe              = powernv_eeh_dev_probe,
+	.set_option             = powernv_eeh_set_option,
+	.get_pe_addr            = powernv_eeh_get_pe_addr,
+	.get_state              = powernv_eeh_get_state,
+	.reset                  = powernv_eeh_reset,
+	.wait_state             = powernv_eeh_wait_state,
+	.get_log                = powernv_eeh_get_log,
+	.configure_bridge       = powernv_eeh_configure_bridge,
+	.read_config            = powernv_eeh_read_config,
+	.write_config           = powernv_eeh_write_config
+};
+
+/**
+ * eeh_powernv_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on powernv platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_powernv_init(void)
+{
+	int ret = -EINVAL;
+
+	if (!machine_is(powernv))
+		return ret;
+
+	ret = eeh_ops_register(&powernv_eeh_ops);
+	if (!ret)
+		pr_info("EEH: PowerNV platform initialized\n");
+	else
+		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
+
+	return ret;
+}
+
+early_initcall(eeh_powernv_init);
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 19/22] powerpc/eeh: Initialization for PowerNV
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch initializes EEH for PowerNV platform. Because the OPAL
APIs requires HUB ID, we need trace that through struct pnv_phb.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c   |   13 +++++++++++--
 arch/powerpc/platforms/powernv/pci-p5ioc2.c |    6 ++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8e90e89..4af5fdf 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -782,6 +782,11 @@ static void pnv_pci_ioda_fixup(void)
 	pnv_pci_ioda_setup_PEs();
 	pnv_pci_ioda_setup_seg();
 	pnv_pci_ioda_setup_DMA();
+
+#ifdef CONFIG_EEH
+	eeh_addr_cache_build();
+	eeh_init();
+#endif
 }
 
 /*
@@ -852,7 +857,7 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
 	return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
 }
 
-void __init pnv_pci_init_ioda1_phb(struct device_node *np)
+void __init pnv_pci_init_ioda1_phb(struct device_node *np, u64 hub_id)
 {
 	struct pci_controller *hose;
 	static int primary = 1;
@@ -889,6 +894,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	hose->first_busno = 0;
 	hose->last_busno = 0xff;
 	hose->private_data = phb;
+	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = PNV_PHB_IODA1;
 
@@ -985,6 +991,9 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 		pr_devel(" M32_SAR  = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
 	}
 	phb->hose->ops = &pnv_pci_ops;
+#ifdef CONFIG_EEH
+	phb->eeh_ops = &ioda_eeh_ops;
+#endif
 
 	/* Setup RID -> PE mapping function */
 	phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
@@ -1034,6 +1043,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
 	for_each_child_of_node(np, phbn) {
 		/* Look for IODA1 PHBs */
 		if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
-			pnv_pci_init_ioda1_phb(phbn);
+			pnv_pci_init_ioda1_phb(phbn, hub_id);
 	}
 }
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 7db8771..deab179 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -93,7 +93,7 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
 	set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
 }
 
-static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
+static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
 					   void *tce_mem, u64 tce_size)
 {
 	struct pnv_phb *phb;
@@ -134,6 +134,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
 	phb->hose->first_busno = 0;
 	phb->hose->last_busno = 0xff;
 	phb->hose->private_data = phb;
+	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = PNV_PHB_P5IOC2;
 	phb->model = PNV_PHB_MODEL_P5IOC2;
@@ -227,7 +228,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
 	for_each_child_of_node(np, phbn) {
 		if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
 		    of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
-			pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb);
+			pnv_pci_init_p5ioc2_phb(phbn, hub_id,
+					tce_mem, tce_per_phb);
 			tce_mem += tce_per_phb;
 		}
 	}
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 17/22] powerpc/eeh: I/O chip PE log and bridge setup
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch adds backends to retrieve error log and configure p2p
bridges for the indicated PE.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |   57 ++++++++++++++++++++++++++++-
 1 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 8e29113..0551ffa 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -453,11 +453,64 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 	return ret;
 }
 
+/**
+ * ioda_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: Severity level of the log
+ * @drv_log: buffer to store the log
+ * @len: space of the log buffer
+ *
+ * The function is used to retrieve error log from P7IOC.
+ */
+static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
+		char *drv_log, unsigned long len)
+{
+	s64 ret;
+	unsigned long flags;
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+
+	spin_lock_irqsave(&phb->lock, flags);
+
+	ret = opal_pci_get_phb_diag_data(phb->opal_id,
+			phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+	if (ret) {
+		spin_unlock_irqrestore(&phb->lock, flags);
+		pr_warning("%s: Failed to retrieve log for PHB#%x-PE#%x\n",
+			__func__, hose->global_number, pe->addr);
+		return -EIO;
+	}
+
+	/*
+	 * FIXME: We probably need log the error in somewhere.
+	 * Lets make it up in future.
+	 */
+	/* pr_info("%s", phb->diag.blob); */
+
+	spin_unlock_irqrestore(&phb->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
+ * @pe: EEH PE
+ *
+ * For particular PE, it might have included PCI bridges. In order
+ * to make the PE work properly, those PCI bridges should be configured
+ * correctly. However, we need do nothing on P7IOC since the reset
+ * function will do everything that should be covered by the function.
+ */
+static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	return 0;
+}
+
 struct pnv_eeh_ops ioda_eeh_ops = {
 	.post_init		= ioda_eeh_post_init,
 	.set_option		= ioda_eeh_set_option,
 	.get_state		= ioda_eeh_get_state,
 	.reset			= ioda_eeh_reset,
-	.get_log		= NULL,
-	.configure_bridge	= NULL
+	.get_log		= ioda_eeh_get_log,
+	.configure_bridge	= ioda_eeh_configure_bridge
 };
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 15/22] powerpc/eeh: I/O chip EEH state retrieval
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch adds I/O chip backend to retrieve the state for the
indicated PE. While the PE state is temperarily unavailable,
we return the default wait time (1000ms).

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |  104 ++++++++++++++++++++++++++++-
 1 files changed, 103 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 1b5ddf4..cf7bb3d 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -122,10 +122,112 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 	return ret;
 }
 
+/**
+ * ioda_eeh_get_state - Retrieve the state of PE
+ * @pe: EEH PE
+ * @state: return value
+ *
+ * The PE's state should be retrieved from the PEEV, PEST
+ * IODA tables. Since the OPAL has exported the function
+ * to do it, it'd better to use that.
+ */
+static int ioda_eeh_get_state(struct eeh_pe *pe, int *state)
+{
+	s64 ret = 0;
+	u8 fstate;
+	u16 pcierr;
+	u32 pe_no;
+	int result;
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+
+	/*
+	 * Sanity check on PE address. The PHB PE address should
+	 * be zero.
+	 */
+	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
+		pr_err("%s: PE address %x out of range [0, %x] "
+		       "on PHB#%x\n",
+			__func__, pe->addr, phb->ioda.total_pe,
+			hose->global_number);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/*
+	 * When the PHB has fatal-error, the EEH core will retrieve
+	 * the state of the associated PE, which isn't existing.
+	 * Actually, the EEH core doesn't care the state. So we
+	 * just return normal state to keep EEH core moving forward.
+	 */
+	if (pe->type & EEH_PE_PHB) {
+		result = 0;
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_MMIO_ACTIVE;
+		result |= EEH_STATE_DMA_ACTIVE;
+		result |= EEH_STATE_MMIO_ENABLED;
+		result |= EEH_STATE_DMA_ENABLED;
+
+		return result;
+	}
+
+	/* Retrieve PE status through OPAL */
+	pe_no = pe->addr;
+	ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+			&fstate, &pcierr, NULL);
+	if (ret) {
+		pr_err("%s: Failed to get EEH status on "
+		       "PHB#%x-PE#%x\n, err=%lld\n",
+			__func__, hose->global_number, pe_no, ret);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/* Parse result out */
+	result = 0;
+	switch (fstate) {
+	case OPAL_EEH_STOPPED_NOT_FROZEN:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_MMIO_ACTIVE;
+		result |= EEH_STATE_DMA_ACTIVE;
+		result |= EEH_STATE_MMIO_ENABLED;
+		result |= EEH_STATE_DMA_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_MMIO_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_DMA_ACTIVE;
+		result |= EEH_STATE_DMA_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_DMA_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_MMIO_ACTIVE;
+		result |= EEH_STATE_MMIO_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_RESET:
+		result |= EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+		result |= EEH_STATE_UNAVAILABLE;
+		if (state)
+			*state = 1000;
+		break;
+	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+		result |= EEH_STATE_NOT_SUPPORT;
+		break;
+	default:
+		pr_warning("%s: Unexpected EEH status 0x%x "
+			   "on PHB#%x-PE#%x\n",
+			__func__, fstate, hose->global_number, pe_no);
+	}
+
+	return result;
+}
+
 struct pnv_eeh_ops ioda_eeh_ops = {
 	.post_init		= ioda_eeh_post_init,
 	.set_option		= ioda_eeh_set_option,
-	.get_state		= NULL,
+	.get_state		= ioda_eeh_get_state,
 	.reset			= NULL,
 	.get_log		= NULL,
 	.configure_bridge	= NULL
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 11/22] powerpc/eeh: Sync OPAL API with firmware
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch synchronizes OPAL APIs between kernel and firmware.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h                |  110 +++++++++++++++++++----
 arch/powerpc/platforms/powernv/opal-wrappers.S |    2 +
 2 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a4b28f1..3deb7b4 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -117,6 +117,11 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_SET_SLOT_LED_STATUS		55
 #define OPAL_GET_EPOW_STATUS			56
 #define OPAL_SET_SYSTEM_ATTENTION_LED		57
+#define OPAL_RESERVED1				58
+#define OPAL_RESERVED2				59
+#define OPAL_PCI_NEXT_ERROR			60
+#define OPAL_PCI_EEH_FREEZE_STATUS2		61
+#define OPAL_PCI_POLL				62
 
 #ifndef __ASSEMBLY__
 
@@ -124,6 +129,7 @@ extern int opal_enter_rtas(struct rtas_args *args,
 enum OpalVendorApiTokens {
 	OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999
 };
+
 enum OpalFreezeState {
 	OPAL_EEH_STOPPED_NOT_FROZEN = 0,
 	OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
@@ -133,55 +139,69 @@ enum OpalFreezeState {
 	OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
 	OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
 };
+
 enum OpalEehFreezeActionToken {
 	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
 	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
 	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3
 };
+
 enum OpalPciStatusToken {
-	OPAL_EEH_PHB_NO_ERROR = 0,
-	OPAL_EEH_PHB_FATAL = 1,
-	OPAL_EEH_PHB_RECOVERABLE = 2,
-	OPAL_EEH_PHB_BUS_ERROR = 3,
-	OPAL_EEH_PCI_NO_DEVSEL = 4,
-	OPAL_EEH_PCI_TA = 5,
-	OPAL_EEH_PCIEX_UR = 6,
-	OPAL_EEH_PCIEX_CA = 7,
-	OPAL_EEH_PCI_MMIO_ERROR = 8,
-	OPAL_EEH_PCI_DMA_ERROR = 9
+	OPAL_EEH_NO_ERROR	= 0,
+	OPAL_EEH_IOC_ERROR	= 1,
+	OPAL_EEH_PHB_ERROR	= 2,
+	OPAL_EEH_PE_ERROR	= 3,
+	OPAL_EEH_PE_MMIO_ERROR	= 4,
+	OPAL_EEH_PE_DMA_ERROR	= 5
 };
+
+enum OpalPciErrorSeverity {
+	OPAL_EEH_SEV_NO_ERROR	= 0,
+	OPAL_EEH_SEV_IOC_DEAD	= 1,
+	OPAL_EEH_SEV_PHB_DEAD	= 2,
+	OPAL_EEH_SEV_PHB_FENCED	= 3,
+	OPAL_EEH_SEV_PE_ER	= 4,
+	OPAL_EEH_SEV_INF	= 5
+};
+
 enum OpalShpcAction {
 	OPAL_SHPC_GET_LINK_STATE = 0,
 	OPAL_SHPC_GET_SLOT_STATE = 1
 };
+
 enum OpalShpcLinkState {
 	OPAL_SHPC_LINK_DOWN = 0,
 	OPAL_SHPC_LINK_UP = 1
 };
+
 enum OpalMmioWindowType {
 	OPAL_M32_WINDOW_TYPE = 1,
 	OPAL_M64_WINDOW_TYPE = 2,
 	OPAL_IO_WINDOW_TYPE = 3
 };
+
 enum OpalShpcSlotState {
 	OPAL_SHPC_DEV_NOT_PRESENT = 0,
 	OPAL_SHPC_DEV_PRESENT = 1
 };
+
 enum OpalExceptionHandler {
 	OPAL_MACHINE_CHECK_HANDLER = 1,
 	OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
 	OPAL_SOFTPATCH_HANDLER = 3
 };
+
 enum OpalPendingState {
-	OPAL_EVENT_OPAL_INTERNAL = 0x1,
-	OPAL_EVENT_NVRAM = 0x2,
-	OPAL_EVENT_RTC = 0x4,
-	OPAL_EVENT_CONSOLE_OUTPUT = 0x8,
-	OPAL_EVENT_CONSOLE_INPUT = 0x10,
-	OPAL_EVENT_ERROR_LOG_AVAIL = 0x20,
-	OPAL_EVENT_ERROR_LOG = 0x40,
-	OPAL_EVENT_EPOW = 0x80,
-	OPAL_EVENT_LED_STATUS = 0x100
+	OPAL_EVENT_OPAL_INTERNAL	= 0x1,
+	OPAL_EVENT_NVRAM		= 0x2,
+	OPAL_EVENT_RTC			= 0x4,
+	OPAL_EVENT_CONSOLE_OUTPUT	= 0x8,
+	OPAL_EVENT_CONSOLE_INPUT	= 0x10,
+	OPAL_EVENT_ERROR_LOG_AVAIL	= 0x20,
+	OPAL_EVENT_ERROR_LOG		= 0x40,
+	OPAL_EVENT_EPOW			= 0x80,
+	OPAL_EVENT_LED_STATUS		= 0x100,
+	OPAL_EVENT_PCI_ERROR		= 0x200
 };
 
 /* Machine check related definitions */
@@ -362,6 +382,55 @@ struct opal_machine_check_event {
 	} u;
 };
 
+enum {
+	OPAL_P7IOC_DIAG_TYPE_NONE	= 0,
+	OPAL_P7IOC_DIAG_TYPE_RGC	= 1,
+	OPAL_P7IOC_DIAG_TYPE_BI		= 2,
+	OPAL_P7IOC_DIAG_TYPE_CI		= 3,
+	OPAL_P7IOC_DIAG_TYPE_MISC	= 4,
+	OPAL_P7IOC_DIAG_TYPE_I2C	= 5,
+	OPAL_P7IOC_DIAG_TYPE_LAST	= 6
+};
+
+struct OpalIoP7IOCErrorData {
+	uint16_t type;
+
+	/* GEM */
+	uint64_t gemXfir;
+	uint64_t gemRfir;
+	uint64_t gemRirqfir;
+	uint64_t gemMask;
+	uint64_t gemRwof;
+
+	/* LEM */
+	uint64_t lemFir;
+	uint64_t lemErrMask;
+	uint64_t lemAction0;
+	uint64_t lemAction1;
+	uint64_t lemWof;
+
+	union {
+		struct OpalIoP7IOCRgcErrorData {
+			uint64_t rgcStatus;		/* 3E1C10 */
+			uint64_t rgcLdcp;		/* 3E1C18 */
+		}rgc;
+		struct OpalIoP7IOCBiErrorData {
+			uint64_t biLdcp0;		/* 3C0100, 3C0118 */
+			uint64_t biLdcp1;		/* 3C0108, 3C0120 */
+			uint64_t biLdcp2;		/* 3C0110, 3C0128 */
+			uint64_t biFenceStatus;		/* 3C0130, 3C0130 */
+
+			uint8_t  biDownbound;		/* BI Downbound or Upbound */
+		}bi;
+		struct OpalIoP7IOCCiErrorData {
+			uint64_t ciPortStatus;		/* 3Dn008 */
+			uint64_t ciPortLdcp;		/* 3Dn010 */
+
+			uint8_t	 ciPort;		/* Index of CI port: 0/1 */
+		}ci;
+	};
+};
+
 /**
  * This structure defines the overlay which will be used to store PHB error
  * data upon request.
@@ -535,6 +604,9 @@ int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t erro
 int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
 int64_t opal_get_epow_status(uint64_t *status);
 int64_t opal_set_system_attention_led(uint8_t led_action);
+int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
+			    uint16_t *pci_error_type, uint16_t *severity);
+int64_t opal_pci_poll(uint64_t phb_id);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3bb07e5..57e6fb4 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -107,3 +107,5 @@ OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
 OPAL_CALL(opal_set_slot_led_status,		OPAL_SET_SLOT_LED_STATUS);
 OPAL_CALL(opal_get_epow_status,			OPAL_GET_EPOW_STATUS);
 OPAL_CALL(opal_set_system_attention_led,	OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_next_error,			OPAL_PCI_NEXT_ERROR);
+OPAL_CALL(opal_pci_poll,			OPAL_PCI_POLL);
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 10/22] powerpc/eeh: Differentiate EEH event
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The EEH event is usually produced because of 0xFF's returned from
PCI config or I/O registers. PowerNV platform also can produce EEH
event through interrupts. The patch differentiates the EEH events
produced for different cases in order to process them differently
in future.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h             |    4 ++--
 arch/powerpc/include/asm/eeh_event.h       |    6 +++++-
 arch/powerpc/kernel/rtas_pci.c             |    3 ++-
 arch/powerpc/platforms/pseries/eeh.c       |    7 ++++---
 arch/powerpc/platforms/pseries/eeh_event.c |    4 +++-
 5 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 09ea298..05b70dc 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -205,7 +205,7 @@ int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
 unsigned long eeh_check_failure(const volatile void __iomem *token,
 				unsigned long val);
-int eeh_dev_check_failure(struct eeh_dev *edev);
+int eeh_dev_check_failure(struct eeh_dev *edev, int flag);
 void __init eeh_addr_cache_build(void);
 void eeh_add_device_tree_early(struct device_node *);
 void eeh_add_device_tree_late(struct pci_bus *);
@@ -246,7 +246,7 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token
 	return val;
 }
 
-#define eeh_dev_check_failure(x) (0)
+#define eeh_dev_check_failure(x, f) (0)
 
 static inline void eeh_addr_cache_build(void) { }
 
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index de67d83..7e00f23 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -26,12 +26,16 @@
  * to this struct is passed as the data pointer in a notify
  * callback.
  */
+#define EEH_EVENT_NORMAL	(1 << 0)
+#define EEH_EVENT_INT		(1 << 1)
+
 struct eeh_event {
+	int			flag;	/* Event flag		*/
 	struct list_head	list;	/* to form event queue	*/
 	struct eeh_pe		*pe;	/* EEH PE		*/
 };
 
-int eeh_send_failure_event(struct eeh_pe *pe);
+int eeh_send_failure_event(struct eeh_pe *pe, int flag);
 void eeh_handle_event(struct eeh_pe *pe);
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 71cb20d..81c1abb 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -39,6 +39,7 @@
 #include <asm/mpic.h>
 #include <asm/ppc-pci.h>
 #include <asm/eeh.h>
+#include <asm/eeh_event.h>
 
 /* RTAS tokens */
 static int read_pci_config;
@@ -81,7 +82,7 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	if (returnval == EEH_IO_ERROR_VALUE(size) &&
-	    eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node)))
+	    eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node), EEH_EVENT_NORMAL))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	return PCIBIOS_SUCCESSFUL;
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index c65e14c..5c486e9 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -272,6 +272,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 /**
  * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
  * @edev: eeh device
+ * @flag: EEH event flag
  *
  * Check for an EEH failure for the given device node.  Call this
  * routine if the result of a read was all 0xff's and you want to
@@ -283,7 +284,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
  *
  * It is safe to call this routine in an interrupt context.
  */
-int eeh_dev_check_failure(struct eeh_dev *edev)
+int eeh_dev_check_failure(struct eeh_dev *edev, int flag)
 {
 	int ret;
 	unsigned long flags;
@@ -376,7 +377,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 
-	eeh_send_failure_event(pe);
+	eeh_send_failure_event(pe, flag);
 
 	/* Most EEH events are due to device driver bugs.  Having
 	 * a stack trace will help the device-driver authors figure
@@ -417,7 +418,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 		return val;
 	}
 
-	eeh_dev_check_failure(edev);
+	eeh_dev_check_failure(edev, EEH_EVENT_NORMAL);
 
 	pci_dev_put(eeh_dev_to_pci_dev(edev));
 	return val;
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 185bedd..1f86b80 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -114,12 +114,13 @@ static void eeh_thread_launcher(struct work_struct *dummy)
 /**
  * eeh_send_failure_event - Generate a PCI error event
  * @pe: EEH PE
+ * @flag: EEH event flag
  *
  * This routine can be called within an interrupt context;
  * the actual event will be delivered in a normal context
  * (from a workqueue).
  */
-int eeh_send_failure_event(struct eeh_pe *pe)
+int eeh_send_failure_event(struct eeh_pe *pe, int flag)
 {
 	unsigned long flags;
 	struct eeh_event *event;
@@ -129,6 +130,7 @@ int eeh_send_failure_event(struct eeh_pe *pe)
 		pr_err("EEH: out of memory, event not handled\n");
 		return -ENOMEM;
 	}
+	event->flag = flag;
 	event->pe = pe;
 
 	/* We may or may not be called in an interrupt context */
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 09/22] powerpc/eeh: Delay EEH probe during hotplug
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

While doing EEH recovery, the PCI devices of the problematic PE
should be removed and then added to the system again. During the
so-called hotplug event, the PCI devices of the problematic PE
will be probed through early/late phase. We would delay EEH probe
on late point for PowerNV platform since the PCI device isn't
available in early phase.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/eeh.c |   16 +++++++++++++++-
 1 files changed, 15 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index ffe34c4..c65e14c 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -758,6 +758,14 @@ static void eeh_add_device_early(struct device_node *dn)
 {
 	struct pci_controller *phb;
 
+	/*
+	 * If we're doing EEH probe based on PCI device, we
+	 * would delay the probe until late stage because
+	 * the PCI device isn't available this moment.
+	 */
+	if (!eeh_probe_mode_devtree())
+		return;
+
 	if (!of_node_to_eeh_dev(dn))
 		return;
 	phb = of_node_to_eeh_dev(dn)->phb;
@@ -766,7 +774,6 @@ static void eeh_add_device_early(struct device_node *dn)
 	if (NULL == phb || 0 == phb->buid)
 		return;
 
-	/* FIXME: hotplug support on POWERNV */
 	eeh_ops->of_probe(dn, NULL);
 }
 
@@ -817,6 +824,13 @@ static void eeh_add_device_late(struct pci_dev *dev)
 	edev->pdev = dev;
 	dev->dev.archdata.edev = edev;
 
+	/*
+	 * We have to do the EEH probe here because the PCI device
+	 * hasn't been created yet in the early stage.
+	 */
+	if (eeh_probe_mode_dev())
+		eeh_ops->dev_probe(dev, NULL);
+
 	eeh_addr_cache_insert_dev(dev);
 }
 
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 08/22] powerpc/eeh: Refactor eeh_reset_pe_once()
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch changes the criteria used to judge if the PE has been
resetted successfully. We needn't the PE status is equal to the
combo: (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE).

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/eeh.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 39d2ea6..ffe34c4 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -527,7 +527,6 @@ static void eeh_reset_pe_once(struct eeh_pe *pe)
 	 * Partitionable Endpoint trumps hot-reset.
   	 */
 	eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
-
 	if (freset)
 		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 	else
@@ -565,6 +564,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe)
  */
 int eeh_reset_pe(struct eeh_pe *pe)
 {
+	int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	int i, rc;
 
 	/* Take three shots at resetting the bus */
@@ -572,7 +572,7 @@ int eeh_reset_pe(struct eeh_pe *pe)
 		eeh_reset_pe_once(pe);
 
 		rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-		if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
+		if ((rc & flags) == flags)
 			return 0;
 
 		if (rc < 0) {
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 01/22] powerpc/eeh: Enhance converting EEH dev
From: Gavin Shan @ 2013-03-01 14:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

Under some special circumstances, the EEH device doesn't have the
associated device tree node or PCI device. It is caused by the
PCI device corresponding to the device tree node can't support
EEH function.

The patch enhances converting EEH device to device tree node or
PCI device accordingly to avoid system crash.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index a80e32b..e32c3c5 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -95,12 +95,12 @@ struct eeh_dev {
 
 static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
 {
-	return edev->dn;
+	return edev ? edev->dn : NULL;
 }
 
 static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
 {
-	return edev->pdev;
+	return edev ? edev->pdev : NULL;
 }
 
 /*
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH v1 0/22] EEH support for PowerNV platform
From: Gavin Shan @ 2013-03-01 14:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

Initially, the series of patches is built based on 3.8+

The series of patches intends to support EEH for PowerNV platform. The EEH
core already supports multiple probe methods: device tree nodes and PCI
devices. For EEH on PowerNV, we're using PCI devices to do EEH probe, which
is different from the probe type used on pSeries platform. Another point I
should mention is that the overall EEH would be split up to 3 layers: EEH
core, platform layer and I/O chip layer. It would make the EEH on PowerNV
platform can achieve more flexibility and support more I/O chips in future.
Besides, the EEH event can be produced by detecting 0xFF's from reading
PCI config or I/O registers, or from interrupts dedicated for EEH error
reporting. So we have to handle the EEH error interrupts. On the other hand,
the EEH events will be processed by EEH core like pSeries platform does.

We don't have existing utility (e.g. errinjct) to test the patchset. In order
to conduct the test, you need copy over the eeh-debug.c to PowerNV platform
directory and change the makefile accordingly. Please contact me to get the
eeh-debug.c if you want run the test case. After that, you need write P7IOC
registers explicitly to trigger frozen PE or fenced PHB explicitly as the
following example shows. The patchset has been verified on Firebird-L machine
where I have 2 Emulex ethernet card on PHB#6. I keep pinging to one of the
ethernet cards from external and then use following commands to produce frozen
PE or fenced PHB errors. Eventually, the errors can be recovered and the ethernet
card is reachable after temporary connection lost.

Trigger frozen PE:

	echo "0xD10 0x0000000002000000" > /proc/IODA/PHB6/REG
	sleep 1
	echo "0xD10 0x0000000000000000" > /proc/IODA/PHB6/REG

Trigger fenced PHB:

	echo "0xD10 0x8000000000000000" > /proc/IODA/PHB6/REG

---

arch/powerpc/include/asm/eeh.h                 |   29 ++-
arch/powerpc/include/asm/eeh_event.h           |    6 +-
arch/powerpc/include/asm/opal.h                |  110 +++++-
arch/powerpc/kernel/rtas_pci.c                 |    3 +-
arch/powerpc/platforms/powernv/Makefile        |    1 +
arch/powerpc/platforms/powernv/eeh-ioda.c      |  516 ++++++++++++++++++++++++
arch/powerpc/platforms/powernv/eeh-powernv.c   |  373 +++++++++++++++++
arch/powerpc/platforms/powernv/opal-wrappers.S |    2 +
arch/powerpc/platforms/powernv/opal.c          |    6 +
arch/powerpc/platforms/powernv/pci-err.c       |  474 ++++++++++++++++++++++
arch/powerpc/platforms/powernv/pci-ioda.c      |   13 +-
arch/powerpc/platforms/powernv/pci-p5ioc2.c    |    6 +-
arch/powerpc/platforms/powernv/pci.c           |   42 ++-
arch/powerpc/platforms/powernv/pci.h           |   22 +-
arch/powerpc/platforms/pseries/eeh.c           |   61 +++-
arch/powerpc/platforms/pseries/eeh_dev.c       |   35 ++
arch/powerpc/platforms/pseries/eeh_event.c     |   12 +-
arch/powerpc/platforms/pseries/eeh_pe.c        |   31 ++-
18 files changed, 1697 insertions(+), 45 deletions(-)
create mode 100644 arch/powerpc/platforms/powernv/eeh-ioda.c
create mode 100644 arch/powerpc/platforms/powernv/eeh-powernv.c
create mode 100644 arch/powerpc/platforms/powernv/pci-err.c

Thanks,
Gavin

^ permalink raw reply

* [PATCH 07/22] powerpc/eeh: EEH post initialization operation
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch adds new EEH operation post_init. It's used to notify
the platform that EEH core has completed the EEH probe. Through
that, PowerNV will enable EEH eventually.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h       |    1 +
 arch/powerpc/platforms/pseries/eeh.c |   11 +++++++++++
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 1ff2aa9..09ea298 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -131,6 +131,7 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
 struct eeh_ops {
 	char *name;
 	int (*init)(void);
+	int (*post_init)(void);
 	void* (*of_probe)(struct device_node *dn, void *flag);
 	void* (*dev_probe)(struct pci_dev *dev, void *flag);
 	int (*set_option)(struct eeh_pe *pe, int option);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index abe26f8..39d2ea6 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -721,6 +721,17 @@ int __init eeh_init(void)
 		return -EINVAL;
 	}
 
+	/*
+	 * Call platform post-initialization. Actually, It's good chance
+	 * to inform platform that EEH is ready to supply service if the
+	 * I/O cache stuff has been built up.
+	 */
+	if (eeh_ops->post_init) {
+		ret = eeh_ops->post_init();
+		if (ret)
+			return ret;
+	}
+
 	if (eeh_subsystem_enabled)
 		pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
 	else
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 04/22] powerpc/eeh: Make eeh_pe_get public
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

While processing EEH event caused by interrupt sent from P7IOC,
we need function to retrieve the PE according to the indicated
EEH device.

The patch makes function eeh_pe_get() public so that other source
files can call it. Also, the patch fixes referring to wrong BDF
(Bus/Device/Function) address while searching PE in function
__eeh_pe_get().

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h          |    1 +
 arch/powerpc/platforms/pseries/eeh_pe.c |    4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 4b48178..9230aa4 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -186,6 +186,7 @@ typedef void *(*eeh_traverse_func)(void *data, void *flag);
 typedef void *(*eeh_pci_traverse_func)(struct pci_dev *dev, void *flag);
 int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
+struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
 int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
index 6e3eb43..93ed9cb 100644
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ b/arch/powerpc/platforms/pseries/eeh_pe.c
@@ -228,7 +228,7 @@ static void *__eeh_pe_get(void *data, void *flag)
 		return pe;
 
 	/* Try BDF address */
-	if (edev->pe_config_addr &&
+	if (edev->config_addr &&
 	   (edev->config_addr == pe->config_addr))
 		return pe;
 
@@ -246,7 +246,7 @@ static void *__eeh_pe_get(void *data, void *flag)
  * which is composed of PCI bus/device/function number, or unified
  * PE address.
  */
-static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
 {
 	struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
 	struct eeh_pe *pe;
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 02/22] powerpc/eeh: Function to tranverse PCI devices
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

For EEH on PowerNV platform, the PCI devices will be probed to
check if they support EEH functionality. Different from the case
of EEH for pSeries platform, we will probe real PCI device instead
of device tree node for EEH capability on PowerNV platform.

The patch introduces function eeh_pci_dev_traverse() to traverse
PCI devices for the indicated PCI bus from top to bottom.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h           |    3 ++
 arch/powerpc/platforms/pseries/eeh_dev.c |   35 ++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index e32c3c5..eeaeab6 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -183,6 +183,7 @@ static inline void eeh_unlock(void)
 #define EEH_MAX_ALLOWED_FREEZES 5
 
 typedef void *(*eeh_traverse_func)(void *data, void *flag);
+typedef void *(*eeh_pci_traverse_func)(struct pci_dev *dev, void *flag);
 int eeh_phb_pe_create(struct pci_controller *phb);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
 int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe);
@@ -191,6 +192,8 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
 void eeh_pe_restore_bars(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
+void eeh_pci_dev_traverse(struct pci_bus *bus,
+		eeh_pci_traverse_func fn, void *flag);
 void *eeh_dev_init(struct device_node *dn, void *data);
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 int __init eeh_ops_register(struct eeh_ops *ops);
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
index 1efa28f..12c445d 100644
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -41,6 +41,41 @@
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
+
+/**
+ * eeh_pci_dev_traverse - Traverse PCI devices for the indicated bus
+ * @bus: PCI bus
+ * @fn: callback function
+ * @flag: extra flag
+ *
+ * The function traverses the PCI devices for the indicated PCI bus
+ * from top to bottom fashion until the supplied callback function
+ * returns non-zero value on the specific PCI device.
+ */
+void eeh_pci_dev_traverse(struct pci_bus *bus,
+		eeh_pci_traverse_func fn, void *flag)
+{
+	struct pci_dev *dev;
+	void *ret;
+
+	if (!bus)
+		return;
+
+	/*
+	 * We should make sure the parent devices are scanned
+	 * prior to the child devices so that the parent PE
+	 * could be created before the child PEs.
+	 */
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		ret = fn(dev, flag);
+		if (ret)
+			return;
+
+		if (dev->subordinate)
+			eeh_pci_dev_traverse(dev->subordinate, fn, flag);
+	}
+}
+
 /**
  * eeh_dev_init - Create EEH device according to OF node
  * @dn: device node
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 05/22] powerpc/eeh: Trace PCI bus from PE
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

There're several types of PEs can be supported for now: PHB, Bus
Device. For PCI bus dependent PE, tracing the corresponding PCI
bus from PE (struct eeh_pe) would make the code more efficient.

The patch also enables the retrieval of PCI bus based on the
PCI device dependent PE.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h          |    1 +
 arch/powerpc/platforms/pseries/eeh_pe.c |   25 ++++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9230aa4..557d82a 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -62,6 +62,7 @@ struct eeh_pe {
 	int check_count;		/* Times of ignored error	*/
 	int freeze_count;		/* Times of froze up		*/
 	int false_positives;		/* Times of reported #ff's	*/
+	struct pci_bus *bus;		/* Top PCI bus for bus PE	*/
 	struct eeh_pe *parent;		/* Parent PE			*/
 	struct list_head child_list;	/* Link PE to the child list	*/
 	struct list_head edevs;		/* Link list of EEH devices	*/
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
index 93ed9cb..03f8223 100644
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ b/arch/powerpc/platforms/pseries/eeh_pe.c
@@ -304,6 +304,7 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
 int eeh_add_to_parent_pe(struct eeh_dev *edev)
 {
 	struct eeh_pe *pe, *parent;
+	struct eeh_dev *first_edev;
 
 	eeh_lock();
 
@@ -326,6 +327,21 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 		pe->type = EEH_PE_BUS;
 		edev->pe = pe;
 
+		/*
+		 * For PCI bus sensitive PE, we can reset the parent
+		 * bridge in order for hot-reset. However, the PCI
+		 * devices including the associated EEH devices might
+		 * be removed when EEH core is doing recovery. So that
+		 * won't safe to retrieve the bridge through downstream
+		 * EEH device. We have to trace the parent PCI bus, then
+		 * the parent bridge explicitly.
+		 */
+		if (eeh_probe_mode_dev() && !pe->bus) {
+			first_edev = list_first_entry(&pe->edevs,
+						struct eeh_dev, list);
+			pe->bus = eeh_dev_to_pci_dev(first_edev)->bus;
+		}
+
 		/* Put the edev to PE */
 		list_add_tail(&edev->list, &pe->edevs);
 		eeh_unlock();
@@ -639,13 +655,20 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
 
 	if (pe->type & EEH_PE_PHB) {
 		bus = pe->phb->bus;
-	} else if (pe->type & EEH_PE_BUS) {
+	} else if (pe->type & EEH_PE_BUS ||
+		   pe->type & EEH_PE_DEVICE) {
+		if (pe->bus) {
+			bus = pe->bus;
+			goto out;
+		}
+
 		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 		pdev = eeh_dev_to_pci_dev(edev);
 		if (pdev)
 			bus = pdev->bus;
 	}
 
+out:
 	eeh_unlock();
 
 	return bus;
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 06/22] powerpc/eeh: Make eeh_init public
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

For EEH on PowerNV platform, we will do EEH probe based on the
real PCI devices. The PCI devices are available after PCI probe.
So we have to call eeh_init() explicitly on PowerNV platform
after PCI probe.

The patch also enables eeh_init() to do EEH probe for PowerNV
platform.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h       |    6 ++++++
 arch/powerpc/platforms/pseries/eeh.c |   23 +++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 557d82a..1ff2aa9 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -199,6 +199,7 @@ void eeh_pci_dev_traverse(struct pci_bus *bus,
 		eeh_pci_traverse_func fn, void *flag);
 void *eeh_dev_init(struct device_node *dn, void *data);
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
+int __init eeh_init(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
 unsigned long eeh_check_failure(const volatile void __iomem *token,
@@ -227,6 +228,11 @@ void eeh_remove_bus_device(struct pci_dev *, int);
 
 #else /* !CONFIG_EEH */
 
+static inline int eeh_init(void)
+{
+	return 0;
+}
+
 static inline void *eeh_dev_init(struct device_node *dn, void *data)
 {
 	return NULL;
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 6b73d6c..abe26f8 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -674,11 +674,21 @@ int __exit eeh_ops_unregister(const char *name)
  * Even if force-off is set, the EEH hardware is still enabled, so that
  * newer systems can boot.
  */
-static int __init eeh_init(void)
+int __init eeh_init(void)
 {
 	struct pci_controller *hose, *tmp;
 	struct device_node *phb;
-	int ret;
+	static int cnt = 0;
+	int ret = 0;
+
+	/*
+	 * We have to delay the initialization on PowerNV after
+	 * the PCI hierarchy tree has been built because the PEs
+	 * are figured out based on PCI devices instead of device
+	 * tree nodes
+	 */
+	if (machine_is(powernv) && cnt++ <= 0)
+		return ret;
 
 	/* call platform initialization function */
 	if (!eeh_ops) {
@@ -700,6 +710,15 @@ static int __init eeh_init(void)
 			phb = hose->dn;
 			traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
 		}
+	} else if (eeh_probe_mode_dev()) {
+		list_for_each_entry_safe(hose, tmp,
+			&hose_list, list_node) {
+			eeh_pci_dev_traverse(hose->bus, eeh_ops->dev_probe, NULL);
+		}
+	} else {
+		pr_warning("%s: Invalid probe mode %d\n",
+			__func__, eeh_probe_mode);
+		return -EINVAL;
 	}
 
 	if (eeh_subsystem_enabled)
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 03/22] powerpc/eeh: Make eeh_phb_pe_get public
From: Gavin Shan @ 2013-03-01 14:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1362147440-14096-1-git-send-email-shangw@linux.vnet.ibm.com>

While processing EEH event caused by interrupt sent from P7IOC,
we need function to retrieve the PE according to the indicated
PCI host controller (struct pci_controller).

The patch makes function eeh_phb_pe_get() public so that other
source files can call it.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h          |    1 +
 arch/powerpc/platforms/pseries/eeh_pe.c |    2 +-
 2 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index eeaeab6..4b48178 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -185,6 +185,7 @@ static inline void eeh_unlock(void)
 typedef void *(*eeh_traverse_func)(void *data, void *flag);
 typedef void *(*eeh_pci_traverse_func)(struct pci_dev *dev, void *flag);
 int eeh_phb_pe_create(struct pci_controller *phb);
+struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
 int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
index fe43d1a..6e3eb43 100644
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ b/arch/powerpc/platforms/pseries/eeh_pe.c
@@ -95,7 +95,7 @@ int eeh_phb_pe_create(struct pci_controller *phb)
  * hierarchy tree is composed of PHB PEs. The function is used
  * to retrieve the corresponding PHB PE according to the given PHB.
  */
-static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
+struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
 {
 	struct eeh_pe *pe;
 
-- 
1.7.5.4

^ permalink raw reply related

* Re: [PATCH v6 00/46] CPU hotplug: stop_machine()-free CPU hotplug
From: Vincent Guittot @ 2013-03-01 12:05 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: linux-doc, peterz, fweisbec, linux-kernel, walken, mingo,
	linux-arch, linux, xiaoguangrong, wangyun, paulmck, nikunj,
	linux-pm, rusty, rostedt, rjw, namhyung, tglx, linux-arm-kernel,
	netdev, oleg, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Hi Srivatsa,

I have run some tests with genload on my ARM platform but even with
the mainline the cpu_down is quite short and stable ( around  4ms )
with 5 or 2 online cores. The duration is similar with your patches

I have maybe not used the right option for genload ? I have used
genload -m 10 which seems to generate the most system time. Which
command have you used for your tests ?

Vincent

On 18 February 2013 13:38, Srivatsa S. Bhat
<srivatsa.bhat@linux.vnet.ibm.com> wrote:
> Hi,
>
> This patchset removes CPU hotplug's dependence on stop_machine() from the CPU
> offline path and provides an alternative (set of APIs) to preempt_disable() to
> prevent CPUs from going offline, which can be invoked from atomic context.
> The motivation behind the removal of stop_machine() is to avoid its ill-effects
> and thus improve the design of CPU hotplug. (More description regarding this
> is available in the patches).
>
> All the users of preempt_disable()/local_irq_disable() who used to use it to
> prevent CPU offline, have been converted to the new primitives introduced in the
> patchset. Also, the CPU_DYING notifiers have been audited to check whether
> they can cope up with the removal of stop_machine() or whether they need to
> use new locks for synchronization (all CPU_DYING notifiers looked OK, without
> the need for any new locks).
>
> Applies on current mainline (v3.8-rc7+).
>
> This patchset is available in the following git branch:
>
> git://github.com/srivatsabhat/linux.git  stop-machine-free-cpu-hotplug-v6
>
>
> Overview of the patches:
> -----------------------
>
> Patches 1 to 7 introduce a generic, flexible Per-CPU Reader-Writer Locking
> scheme.
>
> Patch 8 uses this synchronization mechanism to build the
> get/put_online_cpus_atomic() APIs which can be used from atomic context, to
> prevent CPUs from going offline.
>
> Patch 9 is a cleanup; it converts preprocessor macros to static inline
> functions.
>
> Patches 10 to 43 convert various call-sites to use the new APIs.
>
> Patch 44 is the one which actually removes stop_machine() from the CPU
> offline path.
>
> Patch 45 decouples stop_machine() and CPU hotplug from Kconfig.
>
> Patch 46 updates the documentation to reflect the new APIs.
>
>
> Changes in v6:
> --------------
>
> * Fixed issues related to memory barriers, as pointed out by Paul and Oleg.
> * Fixed the locking issue related to clockevents_lock, which was being
>   triggered when cpu idle was enabled.
> * Some code restructuring to improve readability and to enhance some fastpath
>   optimizations.
> * Randconfig build-fixes, reported by Fengguang Wu.
>
>
> Changes in v5:
> --------------
>   Exposed a new generic locking scheme: Flexible Per-CPU Reader-Writer locks,
>   based on the synchronization schemes already discussed in the previous
>   versions, and used it in CPU hotplug, to implement the new APIs.
>
>   Audited the CPU_DYING notifiers in the kernel source tree and replaced
>   usages of preempt_disable() with the new get/put_online_cpus_atomic() APIs
>   where necessary.
>
>
> Changes in v4:
> --------------
>   The synchronization scheme has been simplified quite a bit, which makes it
>   look a lot less complex than before. Some highlights:
>
> * Implicit ACKs:
>
>   The earlier design required the readers to explicitly ACK the writer's
>   signal. The new design uses implicit ACKs instead. The reader switching
>   over to rwlock implicitly tells the writer to stop waiting for that reader.
>
> * No atomic operations:
>
>   Since we got rid of explicit ACKs, we no longer have the need for a reader
>   and a writer to update the same counter. So we can get rid of atomic ops
>   too.
>
> Changes in v3:
> --------------
> * Dropped the _light() and _full() variants of the APIs. Provided a single
>   interface: get/put_online_cpus_atomic().
>
> * Completely redesigned the synchronization mechanism again, to make it
>   fast and scalable at the reader-side in the fast-path (when no hotplug
>   writers are active). This new scheme also ensures that there is no
>   possibility of deadlocks due to circular locking dependency.
>   In summary, this provides the scalability and speed of per-cpu rwlocks
>   (without actually using them), while avoiding the downside (deadlock
>   possibilities) which is inherent in any per-cpu locking scheme that is
>   meant to compete with preempt_disable()/enable() in terms of flexibility.
>
>   The problem with using per-cpu locking to replace preempt_disable()/enable
>   was explained here:
>   https://lkml.org/lkml/2012/12/6/290
>
>   Basically we use per-cpu counters (for scalability) when no writers are
>   active, and then switch to global rwlocks (for lock-safety) when a writer
>   becomes active. It is a slightly complex scheme, but it is based on
>   standard principles of distributed algorithms.
>
> Changes in v2:
> -------------
> * Completely redesigned the synchronization scheme to avoid using any extra
>   cpumasks.
>
> * Provided APIs for 2 types of atomic hotplug readers: "light" (for
>   light-weight) and "full". We wish to have more "light" readers than
>   the "full" ones, to avoid indirectly inducing the "stop_machine effect"
>   without even actually using stop_machine().
>
>   And the patches show that it _is_ generally true: 5 patches deal with
>   "light" readers, whereas only 1 patch deals with a "full" reader.
>
>   Also, the "light" readers happen to be in very hot paths. So it makes a
>   lot of sense to have such a distinction and a corresponding light-weight
>   API.
>
> Links to previous versions:
> v5: http://lwn.net/Articles/533553/
> v4: https://lkml.org/lkml/2012/12/11/209
> v3: https://lkml.org/lkml/2012/12/7/287
> v2: https://lkml.org/lkml/2012/12/5/322
> v1: https://lkml.org/lkml/2012/12/4/88
>
> --
>  Paul E. McKenney (1):
>       cpu: No more __stop_machine() in _cpu_down()
>
> Srivatsa S. Bhat (45):
>       percpu_rwlock: Introduce the global reader-writer lock backend
>       percpu_rwlock: Introduce per-CPU variables for the reader and the writer
>       percpu_rwlock: Provide a way to define and init percpu-rwlocks at compile time
>       percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
>       percpu_rwlock: Make percpu-rwlocks IRQ-safe, optimally
>       percpu_rwlock: Rearrange the read-lock code to fastpath nested percpu readers
>       percpu_rwlock: Allow writers to be readers, and add lockdep annotations
>       CPU hotplug: Provide APIs to prevent CPU offline from atomic context
>       CPU hotplug: Convert preprocessor macros to static inline functions
>       smp, cpu hotplug: Fix smp_call_function_*() to prevent CPU offline properly
>       smp, cpu hotplug: Fix on_each_cpu_*() to prevent CPU offline properly
>       sched/timer: Use get/put_online_cpus_atomic() to prevent CPU offline
>       sched/migration: Use raw_spin_lock/unlock since interrupts are already disabled
>       sched/rt: Use get/put_online_cpus_atomic() to prevent CPU offline
>       tick: Use get/put_online_cpus_atomic() to prevent CPU offline
>       time/clocksource: Use get/put_online_cpus_atomic() to prevent CPU offline
>       clockevents: Use get/put_online_cpus_atomic() in clockevents_notify()
>       softirq: Use get/put_online_cpus_atomic() to prevent CPU offline
>       irq: Use get/put_online_cpus_atomic() to prevent CPU offline
>       net: Use get/put_online_cpus_atomic() to prevent CPU offline
>       block: Use get/put_online_cpus_atomic() to prevent CPU offline
>       crypto: pcrypt - Protect access to cpu_online_mask with get/put_online_cpus()
>       infiniband: ehca: Use get/put_online_cpus_atomic() to prevent CPU offline
>       [SCSI] fcoe: Use get/put_online_cpus_atomic() to prevent CPU offline
>       staging: octeon: Use get/put_online_cpus_atomic() to prevent CPU offline
>       x86: Use get/put_online_cpus_atomic() to prevent CPU offline
>       perf/x86: Use get/put_online_cpus_atomic() to prevent CPU offline
>       KVM: Use get/put_online_cpus_atomic() to prevent CPU offline from atomic context
>       kvm/vmx: Use get/put_online_cpus_atomic() to prevent CPU offline
>       x86/xen: Use get/put_online_cpus_atomic() to prevent CPU offline
>       alpha/smp: Use get/put_online_cpus_atomic() to prevent CPU offline
>       blackfin/smp: Use get/put_online_cpus_atomic() to prevent CPU offline
>       cris/smp: Use get/put_online_cpus_atomic() to prevent CPU offline
>       hexagon/smp: Use get/put_online_cpus_atomic() to prevent CPU offline
>       ia64: Use get/put_online_cpus_atomic() to prevent CPU offline
>       m32r: Use get/put_online_cpus_atomic() to prevent CPU offline
>       MIPS: Use get/put_online_cpus_atomic() to prevent CPU offline
>       mn10300: Use get/put_online_cpus_atomic() to prevent CPU offline
>       parisc: Use get/put_online_cpus_atomic() to prevent CPU offline
>       powerpc: Use get/put_online_cpus_atomic() to prevent CPU offline
>       sh: Use get/put_online_cpus_atomic() to prevent CPU offline
>       sparc: Use get/put_online_cpus_atomic() to prevent CPU offline
>       tile: Use get/put_online_cpus_atomic() to prevent CPU offline
>       CPU hotplug, stop_machine: Decouple CPU hotplug from stop_machine() in Kconfig
>       Documentation/cpu-hotplug: Remove references to stop_machine()
>
>   Documentation/cpu-hotplug.txt                 |   17 +-
>  arch/alpha/kernel/smp.c                       |   19 +-
>  arch/arm/Kconfig                              |    1
>  arch/blackfin/Kconfig                         |    1
>  arch/blackfin/mach-common/smp.c               |    6 -
>  arch/cris/arch-v32/kernel/smp.c               |    8 +
>  arch/hexagon/kernel/smp.c                     |    5
>  arch/ia64/Kconfig                             |    1
>  arch/ia64/kernel/irq_ia64.c                   |   13 +
>  arch/ia64/kernel/perfmon.c                    |    6 +
>  arch/ia64/kernel/smp.c                        |   23 ++
>  arch/ia64/mm/tlb.c                            |    6 -
>  arch/m32r/kernel/smp.c                        |   12 +
>  arch/mips/Kconfig                             |    1
>  arch/mips/kernel/cevt-smtc.c                  |    8 +
>  arch/mips/kernel/smp.c                        |   16 +-
>  arch/mips/kernel/smtc.c                       |    3
>  arch/mips/mm/c-octeon.c                       |    4
>  arch/mn10300/Kconfig                          |    1
>  arch/mn10300/kernel/smp.c                     |    2
>  arch/mn10300/mm/cache-smp.c                   |    5
>  arch/mn10300/mm/tlb-smp.c                     |   15 +
>  arch/parisc/Kconfig                           |    1
>  arch/parisc/kernel/smp.c                      |    4
>  arch/powerpc/Kconfig                          |    1
>  arch/powerpc/mm/mmu_context_nohash.c          |    2
>  arch/s390/Kconfig                             |    1
>  arch/sh/Kconfig                               |    1
>  arch/sh/kernel/smp.c                          |   12 +
>  arch/sparc/Kconfig                            |    1
>  arch/sparc/kernel/leon_smp.c                  |    2
>  arch/sparc/kernel/smp_64.c                    |    9 -
>  arch/sparc/kernel/sun4d_smp.c                 |    2
>  arch/sparc/kernel/sun4m_smp.c                 |    3
>  arch/tile/kernel/smp.c                        |    4
>  arch/x86/Kconfig                              |    1
>  arch/x86/include/asm/ipi.h                    |    5
>  arch/x86/kernel/apic/apic_flat_64.c           |   10 +
>  arch/x86/kernel/apic/apic_numachip.c          |    5
>  arch/x86/kernel/apic/es7000_32.c              |    5
>  arch/x86/kernel/apic/io_apic.c                |    7 -
>  arch/x86/kernel/apic/ipi.c                    |   10 +
>  arch/x86/kernel/apic/x2apic_cluster.c         |    4
>  arch/x86/kernel/apic/x2apic_uv_x.c            |    4
>  arch/x86/kernel/cpu/mcheck/therm_throt.c      |    4
>  arch/x86/kernel/cpu/perf_event_intel_uncore.c |    5
>  arch/x86/kvm/vmx.c                            |    8 +
>  arch/x86/mm/tlb.c                             |   14 +
>  arch/x86/xen/mmu.c                            |   11 +
>  arch/x86/xen/smp.c                            |    9 +
>  block/blk-softirq.c                           |    4
>  crypto/pcrypt.c                               |    4
>  drivers/infiniband/hw/ehca/ehca_irq.c         |    8 +
>  drivers/scsi/fcoe/fcoe.c                      |    7 +
>  drivers/staging/octeon/ethernet-rx.c          |    3
>  include/linux/cpu.h                           |    8 +
>  include/linux/percpu-rwlock.h                 |   74 +++++++
>  include/linux/stop_machine.h                  |    2
>  init/Kconfig                                  |    2
>  kernel/cpu.c                                  |   59 +++++-
>  kernel/irq/manage.c                           |    7 +
>  kernel/sched/core.c                           |   36 +++-
>  kernel/sched/fair.c                           |    5
>  kernel/sched/rt.c                             |    3
>  kernel/smp.c                                  |   65 ++++--
>  kernel/softirq.c                              |    3
>  kernel/time/clockevents.c                     |    3
>  kernel/time/clocksource.c                     |    5
>  kernel/time/tick-broadcast.c                  |    2
>  kernel/timer.c                                |    2
>  lib/Kconfig                                   |    3
>  lib/Makefile                                  |    1
>  lib/percpu-rwlock.c                           |  256 +++++++++++++++++++++++++
>  net/core/dev.c                                |    9 +
>  virt/kvm/kvm_main.c                           |   10 +
>  75 files changed, 776 insertions(+), 123 deletions(-)
>  create mode 100644 include/linux/percpu-rwlock.h
>  create mode 100644 lib/percpu-rwlock.c
>
>
>
> Regards,
> Srivatsa S. Bhat
> IBM Linux Technology Center
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 1/6 v8] iommu/fsl: Store iommu domain information pointer in archdata.
From: Alexey Kardashevskiy @ 2013-03-01 10:07 UTC (permalink / raw)
  To: Sethi Varun-B16395
  Cc: Wood Scott-B07421, Alex Williamson, Joerg Roedel,
	linux-kernel@vger.kernel.org list, Yoder Stuart-B08248,
	iommu@lists.linux-foundation.org, Paul Mackerras,
	linuxppc-dev@lists.ozlabs.org list, David Gibson
In-Reply-To: <C5ECD7A89D1DC44195F34B25E172658D3C802F@039-SN2MPN1-013.039d.mgd.msft.net>

btw the device struct already has a pointer to its iommu_group, and the 
iommu_group struct itself has a pointer void *iommu_data which you could 
use for anything you want (iommu_group_get_iommudata(), 
iommu_group_set_iommudata()).

By design you are expected to add iommu groups to a domain but not devices 
so I am not so sure that you really need a pointer to domain in the device 
struct.


On 01/03/13 19:55, Sethi Varun-B16395 wrote:
> Thanks for the clarification Alexey.
>
> Kumar,
> We are using this new field (for PAMU) to store the iommu domain (for iommu API) information for a device.
>
> Regards
> Varun
>
>> -----Original Message-----
>> From: Alexey Kardashevskiy [mailto:aik@ozlabs.ru]
>> Sent: Friday, March 01, 2013 6:55 AM
>> To: Kumar Gala
>> Cc: Sethi Varun-B16395; Benjamin Herrenschmidt; iommu@lists.linux-
>> foundation.org; linuxppc-dev@lists.ozlabs.org list; linux-
>> kernel@vger.kernel.org list; Wood Scott-B07421; Yoder Stuart-B08248;
>> Joerg Roedel; Paul Mackerras; David Gibson; Alex Williamson
>> Subject: Re: [PATCH 1/6 v8] iommu/fsl: Store iommu domain information
>> pointer in archdata.
>>
>> Hi!
>>
>> On POWERNV we use only the part of IOMMU API which handles devices and
>> groups. We do not use IOMMU domains as VFIO containers do everything we
>> need for VFIO and we do not implement iommu_ops as it is not very
>> relevant to our architecture (does not give dma window properties, etc).
>>
>> So your work does not overlap with my work :)
>>
>>
>> On 01/03/13 02:51, Kumar Gala wrote:
>>>
>>> On Feb 27, 2013, at 6:04 AM, Sethi Varun-B16395 wrote:
>>>
>>>> Hi Kumar,Ben,
>>>> I am implementing the Freescale PAMU (IOMMU) driver using the Linux
>> IOMMU API. In this particular patch, I have added a new field to
>> dev_archdata structure to store the dma domain information.
>>>> This field is updated whenever we attach a device to an iommu domain.
>>>>
>>>> Regards
>>>> Varun
>>>
>>> Would be good to see if this overlaps with Alexey's work for IOMMU
>> driver for powernv.
>>>
>>> - k
>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Joerg Roedel [mailto:joro@8bytes.org]
>>>>> Sent: Wednesday, February 27, 2013 5:01 PM
>>>>> To: Sethi Varun-B16395
>>>>> Cc: iommu@lists.linux-foundation.org; linuxppc-dev@lists.ozlabs.org;
>>>>> linux-kernel@vger.kernel.org; Wood Scott-B07421; Yoder Stuart-B08248
>>>>> Subject: Re: [PATCH 1/6 v8] iommu/fsl: Store iommu domain
>>>>> information pointer in archdata.
>>>>>
>>>>> On Mon, Feb 18, 2013 at 06:22:14PM +0530, Varun Sethi wrote:
>>>>>> Add a new field in the device (powerpc) archdata structure for
>>>>>> storing iommu domain information pointer. This pointer is stored
>>>>>> when the device is attached to a particular domain.
>>>>>>
>>>>>>
>>>>>> Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
>>>>>> ---
>>>>>> - no change.
>>>>>> arch/powerpc/include/asm/device.h |    4 ++++
>>>>>> 1 files changed, 4 insertions(+), 0 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/powerpc/include/asm/device.h
>>>>>> b/arch/powerpc/include/asm/device.h
>>>>>> index 77e97dd..6dc79fe 100644
>>>>>> --- a/arch/powerpc/include/asm/device.h
>>>>>> +++ b/arch/powerpc/include/asm/device.h
>>>>>> @@ -28,6 +28,10 @@ struct dev_archdata {
>>>>>> 		void		*iommu_table_base;
>>>>>> 	} dma_data;
>>>>>>
>>>>>> +	/* IOMMU domain information pointer. This would be set
>>>>>> +	 * when this device is attached to an iommu_domain.
>>>>>> +	 */
>>>>>> +	void			*iommu_domain;
>>>>>
>>>>> Please Cc the PowerPC Maintainers on this, so that they can have a
>>>>> look at it. This also must be put this into an #ifdef
>> CONFIG_IOMMU_API.


-- 
Alexey

^ permalink raw reply

* RE: [PATCH 1/6 v8] iommu/fsl: Store iommu domain information pointer in archdata.
From: Sethi Varun-B16395 @ 2013-03-01  8:55 UTC (permalink / raw)
  To: Alexey Kardashevskiy, Kumar Gala
  Cc: Wood Scott-B07421, Alex Williamson, Joerg Roedel,
	linux-kernel@vger.kernel.org list, Yoder Stuart-B08248,
	iommu@lists.linux-foundation.org, Paul Mackerras,
	linuxppc-dev@lists.ozlabs.org list, David Gibson
In-Reply-To: <51300367.6080300@ozlabs.ru>

Thanks for the clarification Alexey.

Kumar,
We are using this new field (for PAMU) to store the iommu domain (for iommu=
 API) information for a device.

Regards
Varun

> -----Original Message-----
> From: Alexey Kardashevskiy [mailto:aik@ozlabs.ru]
> Sent: Friday, March 01, 2013 6:55 AM
> To: Kumar Gala
> Cc: Sethi Varun-B16395; Benjamin Herrenschmidt; iommu@lists.linux-
> foundation.org; linuxppc-dev@lists.ozlabs.org list; linux-
> kernel@vger.kernel.org list; Wood Scott-B07421; Yoder Stuart-B08248;
> Joerg Roedel; Paul Mackerras; David Gibson; Alex Williamson
> Subject: Re: [PATCH 1/6 v8] iommu/fsl: Store iommu domain information
> pointer in archdata.
>=20
> Hi!
>=20
> On POWERNV we use only the part of IOMMU API which handles devices and
> groups. We do not use IOMMU domains as VFIO containers do everything we
> need for VFIO and we do not implement iommu_ops as it is not very
> relevant to our architecture (does not give dma window properties, etc).
>=20
> So your work does not overlap with my work :)
>=20
>=20
> On 01/03/13 02:51, Kumar Gala wrote:
> >
> > On Feb 27, 2013, at 6:04 AM, Sethi Varun-B16395 wrote:
> >
> >> Hi Kumar,Ben,
> >> I am implementing the Freescale PAMU (IOMMU) driver using the Linux
> IOMMU API. In this particular patch, I have added a new field to
> dev_archdata structure to store the dma domain information.
> >> This field is updated whenever we attach a device to an iommu domain.
> >>
> >> Regards
> >> Varun
> >
> > Would be good to see if this overlaps with Alexey's work for IOMMU
> driver for powernv.
> >
> > - k
> >
> >>
> >>> -----Original Message-----
> >>> From: Joerg Roedel [mailto:joro@8bytes.org]
> >>> Sent: Wednesday, February 27, 2013 5:01 PM
> >>> To: Sethi Varun-B16395
> >>> Cc: iommu@lists.linux-foundation.org; linuxppc-dev@lists.ozlabs.org;
> >>> linux-kernel@vger.kernel.org; Wood Scott-B07421; Yoder Stuart-B08248
> >>> Subject: Re: [PATCH 1/6 v8] iommu/fsl: Store iommu domain
> >>> information pointer in archdata.
> >>>
> >>> On Mon, Feb 18, 2013 at 06:22:14PM +0530, Varun Sethi wrote:
> >>>> Add a new field in the device (powerpc) archdata structure for
> >>>> storing iommu domain information pointer. This pointer is stored
> >>>> when the device is attached to a particular domain.
> >>>>
> >>>>
> >>>> Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
> >>>> ---
> >>>> - no change.
> >>>> arch/powerpc/include/asm/device.h |    4 ++++
> >>>> 1 files changed, 4 insertions(+), 0 deletions(-)
> >>>>
> >>>> diff --git a/arch/powerpc/include/asm/device.h
> >>>> b/arch/powerpc/include/asm/device.h
> >>>> index 77e97dd..6dc79fe 100644
> >>>> --- a/arch/powerpc/include/asm/device.h
> >>>> +++ b/arch/powerpc/include/asm/device.h
> >>>> @@ -28,6 +28,10 @@ struct dev_archdata {
> >>>> 		void		*iommu_table_base;
> >>>> 	} dma_data;
> >>>>
> >>>> +	/* IOMMU domain information pointer. This would be set
> >>>> +	 * when this device is attached to an iommu_domain.
> >>>> +	 */
> >>>> +	void			*iommu_domain;
> >>>
> >>> Please Cc the PowerPC Maintainers on this, so that they can have a
> >>> look at it. This also must be put this into an #ifdef
> CONFIG_IOMMU_API.
>=20
>=20
> --
> Alexey

^ permalink raw reply

* Re: [PATCH 0/3] Enable multiple MSI feature in pSeries
From: Michael Ellerman @ 2013-03-01  3:54 UTC (permalink / raw)
  To: Mike; +Cc: tglx, linuxppc-dev, linux-kernel
In-Reply-To: <1362107325.2712.2.camel@localhost>

On Fri, Mar 01, 2013 at 11:08:45AM +0800, Mike wrote:
> Hi all
> 
> Any comments? or any questions about my patchset?

You were going to get some performance numbers that show a definite
benefit for using more than one MSI.

cheers

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox