* [PATCH 07/21] pSeries platform PE state retrieval
2012-02-24 9:37 [PATCH v4 " Gavin Shan
@ 2012-02-24 9:38 ` Gavin Shan
0 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-24 9:38 UTC (permalink / raw)
To: linuxppc-dev; +Cc: kernel.crashing.org, shangw
On pSeries platform, there're 2 dedicated RTAS calls introduced to
retrieve the corresponding PE's state: ibm,read-slot-reset-state and
ibm,read-slot-reset-state2.
The patch implements the retrieval of PE's state according to the
given PE address. Besides, the implementation has been abstracted by
struct eeh_ops::get_state so that EEH core components could support
multiple platforms in future.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 8 ++
arch/powerpc/platforms/pseries/eeh.c | 96 ++++---------------------
arch/powerpc/platforms/pseries/eeh_driver.c | 2 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 70 ++++++++++++++++++-
4 files changed, 94 insertions(+), 82 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 76f7b3f..1d3c9e5 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -42,6 +42,14 @@ struct device_node;
#define EEH_OPT_ENABLE 1 /* EEH enable */
#define EEH_OPT_THAW_MMIO 2 /* MMIO enable */
#define EEH_OPT_THAW_DMA 3 /* DMA enable */
+#define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */
+#define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */
+#define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */
+#define EEH_STATE_MMIO_ACTIVE (1 << 3) /* Active MMIO */
+#define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */
+#define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */
+#define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */
+
struct eeh_ops {
char *name;
int (*init)(void);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 00797e0..8d11f1f 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -88,8 +88,6 @@
/* RTAS tokens */
static int ibm_set_slot_reset;
-static int ibm_read_slot_reset_state;
-static int ibm_read_slot_reset_state2;
static int ibm_slot_error_detail;
static int ibm_configure_bridge;
static int ibm_configure_pe;
@@ -289,37 +287,6 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
}
/**
- * eeh_read_slot_reset_state - Read the reset state of a device node's slot
- * @dn: device node to read
- * @rets: array to return results in
- *
- * Read the reset state of a device node's slot through platform dependent
- * function call.
- */
-static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
-{
- int token, outputs;
- int config_addr;
-
- if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
- token = ibm_read_slot_reset_state2;
- outputs = 4;
- } else {
- token = ibm_read_slot_reset_state;
- rets[2] = 0; /* fake PE Unavailable info */
- outputs = 3;
- }
-
- /* Use PE configuration address, if present */
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
-
- return rtas_call(token, 3, outputs, rets, config_addr,
- BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
-}
-
-/**
* eeh_wait_for_slot_status - Returns error status of slot
* @pdn: pci device node
* @max_wait_msecs: maximum number to millisecs to wait
@@ -335,21 +302,15 @@ static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
{
int rc;
- int rets[3];
int mwait;
while (1) {
- rc = eeh_read_slot_reset_state(pdn, rets);
- if (rc) return rc;
- if (rets[1] == 0) return -1; /* EEH is not supported */
-
- if (rets[0] != 5) return rets[0]; /* return actual status */
-
- if (rets[2] == 0) return -1; /* permanently unavailable */
+ rc = eeh_ops->get_state(pdn->node, &mwait);
+ if (rc != EEH_STATE_UNAVAILABLE)
+ return rc;
if (max_wait_msecs <= 0) break;
- mwait = rets[2];
if (mwait <= 0) {
printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
mwait);
@@ -522,7 +483,6 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag)
int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
{
int ret;
- int rets[3];
unsigned long flags;
struct pci_dn *pdn;
int rc = 0;
@@ -584,40 +544,18 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* function zero of a multi-function device.
* In any case they must share a common PHB.
*/
- ret = eeh_read_slot_reset_state(pdn, rets);
-
- /* If the call to firmware failed, punt */
- if (ret != 0) {
- printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n",
- ret, dn->full_name);
- false_positives++;
- pdn->eeh_false_positives ++;
- rc = 0;
- goto dn_unlock;
- }
+ ret = eeh_ops->get_state(pdn->node, NULL);
/* Note that config-io to empty slots may fail;
* they are empty when they don't have children.
+ * We will punt with the following conditions: Failure to get
+ * PE's state, EEH not support and Permanently unavailable
+ * state, PE is in good state.
*/
- if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
- false_positives++;
- pdn->eeh_false_positives ++;
- rc = 0;
- goto dn_unlock;
- }
-
- /* If EEH is not supported on this device, punt. */
- if (rets[1] != 1) {
- printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
- ret, dn->full_name);
- false_positives++;
- pdn->eeh_false_positives ++;
- rc = 0;
- goto dn_unlock;
- }
-
- /* If not the kind of error we know about, punt. */
- if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
+ if ((ret < 0) ||
+ (ret == EEH_STATE_NOT_SUPPORT) ||
+ (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
false_positives++;
pdn->eeh_false_positives ++;
rc = 0;
@@ -703,7 +641,8 @@ int eeh_pci_enable(struct pci_dn *pdn, int function)
function, rc, pdn->node->full_name);
rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
- if ((rc == 4) && (function == EEH_OPT_THAW_MMIO))
+ if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
+ (function == EEH_OPT_THAW_MMIO))
return 0;
return rc;
@@ -900,7 +839,7 @@ int eeh_reset_pe(struct pci_dn *pdn)
eeh_reset_pe_once(pdn);
rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
- if (rc == 0)
+ if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
return 0;
if (rc < 0) {
@@ -1057,7 +996,6 @@ void eeh_configure_bridge(struct pci_dn *pdn)
*/
static void *eeh_early_enable(struct device_node *dn, void *data)
{
- unsigned int rets[3];
int ret;
const u32 *class_code = of_get_property(dn, "class-code", NULL);
const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
@@ -1109,8 +1047,8 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
* where EEH is not supported. Verify support
* explicitly.
*/
- ret = eeh_read_slot_reset_state(pdn, rets);
- if ((ret == 0) && (rets[1] == 1))
+ ret = eeh_ops->get_state(pdn->node, NULL);
+ if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
enable = 1;
}
@@ -1232,8 +1170,6 @@ void __init eeh_init(void)
return;
ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
- ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
- ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
ibm_configure_bridge = rtas_token("ibm,configure-bridge");
ibm_configure_pe = rtas_token("ibm,configure-pe");
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 02eab3b..4c6e0c1c 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -397,7 +397,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
/* Get the current PCI slot state. This can take a long time,
* sometimes over 3 seconds for certain systems. */
rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0) {
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
printk(KERN_WARNING "EEH: Permanent failure\n");
goto hard_fail;
}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 2b9543a..39567b2 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -238,7 +238,75 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
*/
static int pseries_eeh_get_state(struct device_node *dn, int *state)
{
- return 0;
+ struct pci_dn *pdn;
+ int config_addr;
+ int ret;
+ int rets[4];
+ int result;
+
+ /* Figure out PE config address if possible */
+ pdn = PCI_DN(dn);
+ config_addr = pdn->eeh_config_addr;
+ if (pdn->eeh_pe_config_addr)
+ config_addr = pdn->eeh_pe_config_addr;
+
+ if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
+ ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid));
+ } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
+ /* Fake PE unavailable info */
+ rets[2] = 0;
+ ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid));
+ } else {
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ if (ret)
+ return ret;
+
+ /* Parse the result out */
+ result = 0;
+ if (rets[1]) {
+ switch(rets[0]) {
+ case 0:
+ result &= ~EEH_STATE_RESET_ACTIVE;
+ result |= EEH_STATE_MMIO_ACTIVE;
+ result |= EEH_STATE_DMA_ACTIVE;
+ break;
+ case 1:
+ result |= EEH_STATE_RESET_ACTIVE;
+ result |= EEH_STATE_MMIO_ACTIVE;
+ result |= EEH_STATE_DMA_ACTIVE;
+ break;
+ case 2:
+ result &= ~EEH_STATE_RESET_ACTIVE;
+ result &= ~EEH_STATE_MMIO_ACTIVE;
+ result &= ~EEH_STATE_DMA_ACTIVE;
+ break;
+ case 4:
+ result &= ~EEH_STATE_RESET_ACTIVE;
+ result &= ~EEH_STATE_MMIO_ACTIVE;
+ result &= ~EEH_STATE_DMA_ACTIVE;
+ result |= EEH_STATE_MMIO_ENABLED;
+ break;
+ case 5:
+ if (rets[2]) {
+ if (state) *state = rets[2];
+ result = EEH_STATE_UNAVAILABLE;
+ } else {
+ result = EEH_STATE_NOT_SUPPORT;
+ }
+ default:
+ result = EEH_STATE_NOT_SUPPORT;
+ }
+ } else {
+ result = EEH_STATE_NOT_SUPPORT;
+ }
+
+ return result;
}
/**
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v5 00/21] EEH reorganization
@ 2012-02-28 6:03 Gavin Shan
2012-02-28 6:03 ` [PATCH 01/21] Cleanup on comments of EEH core Gavin Shan
` (22 more replies)
0 siblings, 23 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
This series of patches is going to reorganize EEH so that it could support
multiple platforms in future. The requirements were raised from the aspects.
* The original EEH implementation only support pSeries platform, which
would be regarded as guest system. Platform powernv is coming and EEH
needs to be supported on powernv as well.
* Different platforms might be running based on variable firmware.Further
more, the firmware would supply different EEH interfaces to kernel.
Therefore, we have to do necessary abstraction on current EEH implementation.
In order to accomodate the requirements, the series of patches have reorganized
current EEH implementation.
* The original implementation looks not clean enough. Necessary cleanup
will be done in some of the patches.
* struct eeh_ops has been introduced so that EEH core components and platform
dependent implementation could be split up. That make it possible for EEH
to be supported on multiple platforms.
* struct eeh_dev has been introduced to replace struct pci_dn so that EEH module
works independently as much as possible.
* EEH global statistics will be maintained in a collective fashion.
v1 -> v2:
* If possible, to add "eeh_" prefix for function names.
* The format of leading function comments won't be changed in order not to
break kernel document automatic generation (e.g. by "make pdfdocs").
* The name of local variables won't be changed if there're no explicit reasons.
* Represent the PE's state in bitmap fasion.
* Some function names have been adjusted so that they look shorter and
meaningful.
* Platform operation name has been changed to "pseries".
* Merge those patches for cleanup if possible.
* The line length is kept as appropriately short if possible.
* Fixup on alignment & spacing issues.
v2 -> v3:
* Split cleanup patch into 2: one for comment cleanup and another one for
renaming function names.
* Try to use pr_warning/pr_info/pr_debug instead of printk() function call.
* Function names are adjusted a little bit so that they looks more meaningful
according to comments from Michael/Ben.
* Useful comment has been kept according to Michael's comments.
* struct eeh_ops::set_eeh has been changed to eeh_ops::set_option.
* struct eeh_ops::name has been changed to "char *".
* Remove file name from the source file.
* Copyright (C) format has been changed since "(C)" isn't encouraged to use.
* The header files included in the source file have been sorted alphabetically.
* eeh_platform_init() has been replaced by eeh_pseries_init() to avoid duplicate
functions when kernel supports multiple platforms.
* "F/W" has been changed to "Firmware".
* The maximal wait time to retrieve PE's state has been covered by macro.
* It also include changes according to the minor comments from Michael.
v3 -> v4:
* Fix some typo included in the commit messages.
* Reduce code nesting according to Ram's suggestions.
* Addtinal pr_warning on failure of configuring bridges.
v4 -> v5:
* OF node and PCI device are tracing the corresponding eeh device.
That has been changed to "struct eeh_dev *" instead of the original
"void *".
* The conversion between OF node, PCI device, eeh device is changed
to inline functions instead of the original macros.
* The "struct eeh_stats" has been moved from eeh.h to eeh.c. Besides,
the individual members of the struct have been changed to fixed-type
"unsigned int".
The series of patches (v5) has been verified on Firebird-L machine. In order to carry out
the test, you have to install IBM Power Tools from IBM internal yum source. Following
command is used to force EEH check on ethernet interface, which could be recovered eventually
by EEH and device driver successfully. You could keep pinging to the blade before issuing
the following command to force EEH. You should see the network interface can't be reached for
a moment and everything will be recovered couple of seconds after the forced EEH error. At the
same time, you should see EEH error log out of system console.
* errinjct eeh -v -f 0 -p U78AE.001.WZS00M9-P1-C18-L1-T2 -a 0x0 -m 0x0
-----
arch/powerpc/include/asm/device.h | 3 +
arch/powerpc/include/asm/eeh.h | 134 +++-
arch/powerpc/include/asm/eeh_event.h | 33 +-
arch/powerpc/include/asm/ppc-pci.h | 89 +--
arch/powerpc/kernel/of_platform.c | 3 +
arch/powerpc/kernel/rtas_pci.c | 3 +
arch/powerpc/platforms/pseries/Makefile | 3 +-
arch/powerpc/platforms/pseries/eeh.c | 1044 ++++++++++++--------------
arch/powerpc/platforms/pseries/eeh_cache.c | 44 +-
arch/powerpc/platforms/pseries/eeh_dev.c | 102 +++
arch/powerpc/platforms/pseries/eeh_driver.c | 213 +++---
arch/powerpc/platforms/pseries/eeh_event.c | 55 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 565 ++++++++++++++
arch/powerpc/platforms/pseries/eeh_sysfs.c | 25 +-
arch/powerpc/platforms/pseries/msi.c | 2 +-
arch/powerpc/platforms/pseries/pci_dlpar.c | 3 +
arch/powerpc/platforms/pseries/setup.c | 7 +-
include/linux/of.h | 10 +
include/linux/pci.h | 7 +
19 files changed, 1477 insertions(+), 868 deletions(-)
Thanks,
Gavin
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 01/21] Cleanup on comments of EEH core
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:03 ` [PATCH 02/21] Cleanup on function names " Gavin Shan
` (21 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
The EEH has been implemented on pSeries platform. The original
code looks a little bit nasty. The patch does cleanup on the
current EEH implementation so that it looks more clean.
* Duplicated comments have been removed from the corresponding
header files.
* Comments have been reorganized so that it looks more clean.
* The leading comments of functions are adjusted for a little
bit so that the result of "make pdfdocs" would be more
unified.
* Function definitions and calls have unified format as "xxx()".
That means the format "xxx ()" has been replaced by "xxx()".
* There're multiple functions implemented for resetting PE. The
position of those functions have been move around so that they
are adjacent to each other to reflect their relationship.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 26 +--
arch/powerpc/include/asm/ppc-pci.h | 71 +------
arch/powerpc/platforms/pseries/eeh.c | 400 +++++++++++++++++++++++-----------
3 files changed, 276 insertions(+), 221 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 66ea9b8..2328877 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -1,6 +1,6 @@
/*
- * eeh.h
* Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation.
+ * Copyright 2001-2012 IBM Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -40,8 +40,10 @@ extern int eeh_subsystem_enabled;
#define EEH_MODE_RECOVERING (1<<3)
#define EEH_MODE_IRQ_DISABLED (1<<4)
-/* Max number of EEH freezes allowed before we consider the device
- * to be permanently disabled. */
+/*
+ * Max number of EEH freezes allowed before we consider the device
+ * to be permanently disabled.
+ */
#define EEH_MAX_ALLOWED_FREEZES 5
void __init eeh_init(void);
@@ -49,26 +51,8 @@ unsigned long eeh_check_failure(const volatile void __iomem *token,
unsigned long val);
int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev);
void __init pci_addr_cache_build(void);
-
-/**
- * eeh_add_device_early
- * eeh_add_device_late
- *
- * Perform eeh initialization for devices added after boot.
- * Call eeh_add_device_early before doing any i/o to the
- * device (including config space i/o). Call eeh_add_device_late
- * to finish the eeh setup for this device.
- */
void eeh_add_device_tree_early(struct device_node *);
void eeh_add_device_tree_late(struct pci_bus *);
-
-/**
- * eeh_remove_device_recursive - undo EEH for device & children.
- * @dev: pci device to be removed
- *
- * As above, this removes the device; it also removes child
- * pci devices as well.
- */
void eeh_remove_bus_device(struct pci_dev *);
/**
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 6d42297..221d82f 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -47,92 +47,27 @@ extern int rtas_setup_phb(struct pci_controller *phb);
extern unsigned long pci_probe_only;
-/* ---- EEH internal-use-only related routines ---- */
#ifdef CONFIG_EEH
void pci_addr_cache_insert_device(struct pci_dev *dev);
void pci_addr_cache_remove_device(struct pci_dev *dev);
void pci_addr_cache_build(void);
struct pci_dev *pci_get_device_by_addr(unsigned long addr);
-
-/**
- * eeh_slot_error_detail -- record and EEH error condition to the log
- * @pdn: pci device node
- * @severity: EEH_LOG_TEMP_FAILURE or EEH_LOG_PERM_FAILURE
- *
- * Obtains the EEH error details from the RTAS subsystem,
- * and then logs these details with the RTAS error log system.
- */
#define EEH_LOG_TEMP_FAILURE 1
#define EEH_LOG_PERM_FAILURE 2
void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
-
-/**
- * rtas_pci_enable - enable IO transfers for this slot
- * @pdn: pci device node
- * @function: either EEH_THAW_MMIO or EEH_THAW_DMA
- *
- * Enable I/O transfers to this slot
- */
#define EEH_THAW_MMIO 2
#define EEH_THAW_DMA 3
int rtas_pci_enable(struct pci_dn *pdn, int function);
-
-/**
- * rtas_set_slot_reset -- unfreeze a frozen slot
- * @pdn: pci device node
- *
- * Clear the EEH-frozen condition on a slot. This routine
- * does this by asserting the PCI #RST line for 1/8th of
- * a second; this routine will sleep while the adapter is
- * being reset.
- *
- * Returns a non-zero value if the reset failed.
- */
int rtas_set_slot_reset (struct pci_dn *);
int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
-
-/**
- * eeh_restore_bars - Restore device configuration info.
- * @pdn: pci device node
- *
- * A reset of a PCI device will clear out its config space.
- * This routines will restore the config space for this
- * device, and is children, to values previously obtained
- * from the firmware.
- */
void eeh_restore_bars(struct pci_dn *);
-
-/**
- * rtas_configure_bridge -- firmware initialization of pci bridge
- * @pdn: pci device node
- *
- * Ask the firmware to configure all PCI bridges devices
- * located behind the indicated node. Required after a
- * pci device reset. Does essentially the same hing as
- * eeh_restore_bars, but for brdges, and lets firmware
- * do the work.
- */
void rtas_configure_bridge(struct pci_dn *);
-
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
-
-/**
- * eeh_mark_slot -- set mode flags for pertition endpoint
- * @pdn: pci device node
- *
- * mark and clear slots: find "partition endpoint" PE and set or
- * clear the flags for each subnode of the PE.
- */
-void eeh_mark_slot (struct device_node *dn, int mode_flag);
-void eeh_clear_slot (struct device_node *dn, int mode_flag);
-
-/**
- * find_device_pe -- Find the associated "Partiationable Endpoint" PE
- * @pdn: pci device node
- */
-struct device_node * find_device_pe(struct device_node *dn);
+void eeh_mark_slot(struct device_node *dn, int mode_flag);
+void eeh_clear_slot(struct device_node *dn, int mode_flag);
+struct device_node *find_device_pe(struct device_node *dn);
void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index c0b40af..5f6d37b 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -1,8 +1,8 @@
/*
- * eeh.c
* Copyright IBM Corporation 2001, 2005, 2006
* Copyright Dave Engebretsen & Todd Inglett 2001
* Copyright Linas Vepstas 2005, 2006
+ * Copyright 2001-2012 IBM Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,7 +22,7 @@
*/
#include <linux/delay.h>
-#include <linux/sched.h> /* for init_mm */
+#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/pci.h>
@@ -129,9 +129,16 @@ static unsigned long slot_resets;
#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
-/* --------------------------------------------------------------- */
-/* Below lies the EEH event infrastructure */
-
+/**
+ * rtas_slot_error_detail - Retrieve error log through RTAS call
+ * @pdn: device node
+ * @severity: temporary or permanent error log
+ * @driver_log: driver log to be combined with the retrieved error log
+ * @loglen: length of driver log
+ *
+ * This routine should be called to retrieve error log through the dedicated
+ * RTAS call.
+ */
static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
char *driver_log, size_t loglen)
{
@@ -163,7 +170,7 @@ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
}
/**
- * gather_pci_data - copy assorted PCI config space registers to buff
+ * gather_pci_data - Copy assorted PCI config space registers to buff
* @pdn: device to report data for
* @buf: point to buffer in which to log
* @len: amount of room in buffer
@@ -258,6 +265,16 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
return n;
}
+/**
+ * eeh_slot_error_detail - Generate combined log including driver log and error log
+ * @pdn: device node
+ * @severity: temporary or permanent error log
+ *
+ * This routine should be called to generate the combined log, which
+ * is comprised of driver log and error log. The driver log is figured
+ * out from the config space of the corresponding PCI device, while
+ * the error log is fetched through platform dependent function call.
+ */
void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
{
size_t loglen = 0;
@@ -275,6 +292,9 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
* read_slot_reset_state - Read the reset state of a device node's slot
* @dn: device node to read
* @rets: array to return results in
+ *
+ * Read the reset state of a device node's slot through platform dependent
+ * function call.
*/
static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
{
@@ -300,9 +320,9 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
}
/**
- * eeh_wait_for_slot_status - returns error status of slot
- * @pdn pci device node
- * @max_wait_msecs maximum number to millisecs to wait
+ * eeh_wait_for_slot_status - Returns error status of slot
+ * @pdn: pci device node
+ * @max_wait_msecs: maximum number to millisecs to wait
*
* Return negative value if a permanent error, else return
* Partition Endpoint (PE) status value.
@@ -332,16 +352,16 @@ eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
mwait = rets[2];
if (mwait <= 0) {
- printk (KERN_WARNING
- "EEH: Firmware returned bad wait value=%d\n", mwait);
+ printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
+ mwait);
mwait = 1000;
} else if (mwait > 300*1000) {
- printk (KERN_WARNING
- "EEH: Firmware is taking too long, time=%d\n", mwait);
+ printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n",
+ mwait);
mwait = 300*1000;
}
max_wait_msecs -= mwait;
- msleep (mwait);
+ msleep(mwait);
}
printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
@@ -349,8 +369,11 @@ eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
}
/**
- * eeh_token_to_phys - convert EEH address token to phys address
- * @token i/o token, should be address in the form 0xA....
+ * eeh_token_to_phys - Convert EEH address token to phys address
+ * @token: I/O token, should be address in the form 0xA....
+ *
+ * This routine should be called to convert virtual I/O address
+ * to physical one.
*/
static inline unsigned long eeh_token_to_phys(unsigned long token)
{
@@ -365,8 +388,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
return pa | (token & (PAGE_SIZE-1));
}
-/**
- * Return the "partitionable endpoint" (pe) under which this device lies
+/**
+ * find_device_pe - Retrieve the PE for the given device
+ * @dn: device node
+ *
+ * Return the PE under which this device lies
*/
struct device_node * find_device_pe(struct device_node *dn)
{
@@ -377,14 +403,18 @@ struct device_node * find_device_pe(struct device_node *dn)
return dn;
}
-/** Mark all devices that are children of this device as failed.
- * Mark the device driver too, so that it can see the failure
- * immediately; this is critical, since some drivers poll
- * status registers in interrupts ... If a driver is polling,
- * and the slot is frozen, then the driver can deadlock in
- * an interrupt context, which is bad.
+/**
+ * __eeh_mark_slot - Mark all child devices as failed
+ * @parent: parent device
+ * @mode_flag: failure flag
+ *
+ * Mark all devices that are children of this device as failed.
+ * Mark the device driver too, so that it can see the failure
+ * immediately; this is critical, since some drivers poll
+ * status registers in interrupts ... If a driver is polling,
+ * and the slot is frozen, then the driver can deadlock in
+ * an interrupt context, which is bad.
*/
-
static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
{
struct device_node *dn;
@@ -404,10 +434,18 @@ static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
}
}
-void eeh_mark_slot (struct device_node *dn, int mode_flag)
+/**
+ * eeh_mark_slot - Mark the indicated device and its children as failed
+ * @dn: parent device
+ * @mode_flag: failure flag
+ *
+ * Mark the indicated device and its child devices as failed.
+ * The device drivers are marked as failed as well.
+ */
+void eeh_mark_slot(struct device_node *dn, int mode_flag)
{
struct pci_dev *dev;
- dn = find_device_pe (dn);
+ dn = find_device_pe(dn);
/* Back up one, since config addrs might be shared */
if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
@@ -423,6 +461,13 @@ void eeh_mark_slot (struct device_node *dn, int mode_flag)
__eeh_mark_slot(dn, mode_flag);
}
+/**
+ * __eeh_clear_slot - Clear failure flag for the child devices
+ * @parent: parent device
+ * @mode_flag: flag to be cleared
+ *
+ * Clear failure flag for the child devices.
+ */
static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
{
struct device_node *dn;
@@ -436,12 +481,19 @@ static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
}
}
-void eeh_clear_slot (struct device_node *dn, int mode_flag)
+/**
+ * eeh_clear_slot - Clear failure flag for the indicated device and its children
+ * @dn: parent device
+ * @mode_flag: flag to be cleared
+ *
+ * Clear failure flag for the indicated device and its children.
+ */
+void eeh_clear_slot(struct device_node *dn, int mode_flag)
{
unsigned long flags;
raw_spin_lock_irqsave(&confirm_error_lock, flags);
- dn = find_device_pe (dn);
+ dn = find_device_pe(dn);
/* Back up one, since config addrs might be shared */
if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
@@ -453,43 +505,10 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag)
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
}
-void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
-{
- struct device_node *dn;
-
- for_each_child_of_node(parent, dn) {
- if (PCI_DN(dn)) {
-
- struct pci_dev *dev = PCI_DN(dn)->pcidev;
-
- if (dev && dev->driver)
- *freset |= dev->needs_freset;
-
- __eeh_set_pe_freset(dn, freset);
- }
- }
-}
-
-void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
-{
- struct pci_dev *dev;
- dn = find_device_pe(dn);
-
- /* Back up one, since config addrs might be shared */
- if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
- dn = dn->parent;
-
- dev = PCI_DN(dn)->pcidev;
- if (dev)
- *freset |= dev->needs_freset;
-
- __eeh_set_pe_freset(dn, freset);
-}
-
/**
- * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
- * @dn device node
- * @dev pci device, if known
+ * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @dn: device node
+ * @dev: pci device, if known
*
* Check for an EEH failure for the given device node. Call this
* routine if the result of a read was all 0xff's and you want to
@@ -548,11 +567,11 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
pdn->eeh_check_count ++;
if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
location = of_get_property(dn, "ibm,loc-code", NULL);
- printk (KERN_ERR "EEH: %d reads ignored for recovering device at "
+ printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
pdn->eeh_check_count, location,
eeh_driver_name(dev), eeh_pci_name(dev));
- printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@@ -579,7 +598,8 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
}
/* Note that config-io to empty slots may fail;
- * they are empty when they don't have children. */
+ * they are empty when they don't have children.
+ */
if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
false_positives++;
pdn->eeh_false_positives ++;
@@ -609,15 +629,17 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
/* Avoid repeated reports of this failure, including problems
* with other functions on this device, and functions under
- * bridges. */
- eeh_mark_slot (dn, EEH_MODE_ISOLATED);
+ * bridges.
+ */
+ eeh_mark_slot(dn, EEH_MODE_ISOLATED);
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
- eeh_send_failure_event (dn, dev);
+ eeh_send_failure_event(dn, dev);
/* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure
- * out what happened. So print that out. */
+ * out what happened. So print that out.
+ */
dump_stack();
return 1;
@@ -629,9 +651,9 @@ dn_unlock:
EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
/**
- * eeh_check_failure - check if all 1's data is due to EEH slot freeze
- * @token i/o token, should be address in the form 0xA....
- * @val value, should be all 1's (XXX why do we need this arg??)
+ * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @token: I/O token, should be address in the form 0xA....
+ * @val: value, should be all 1's (XXX why do we need this arg??)
*
* Check for an EEH failure at the given token address. Call this
* routine if the result of a read was all 0xff's and you want to
@@ -655,7 +677,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
}
dn = pci_device_to_OF_node(dev);
- eeh_dn_check_failure (dn, dev);
+ eeh_dn_check_failure(dn, dev);
pci_dev_put(dev);
return val;
@@ -663,14 +685,15 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
EXPORT_SYMBOL(eeh_check_failure);
-/* ------------------------------------------------------------- */
-/* The code below deals with error recovery */
/**
- * rtas_pci_enable - enable MMIO or DMA transfers for this slot
+ * rtas_pci_enable - Enable MMIO or DMA transfers for this slot
* @pdn pci device node
+ *
+ * This routine should be called to reenable frozen MMIO or DMA
+ * so that it would work correctly again. It's useful while doing
+ * recovery or log collection on the indicated device.
*/
-
int
rtas_pci_enable(struct pci_dn *pdn, int function)
{
@@ -692,7 +715,7 @@ rtas_pci_enable(struct pci_dn *pdn, int function)
printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
function, rc, pdn->node->full_name);
- rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
if ((rc == 4) && (function == EEH_THAW_MMIO))
return 0;
@@ -700,27 +723,25 @@ rtas_pci_enable(struct pci_dn *pdn, int function)
}
/**
- * rtas_pci_slot_reset - raises/lowers the pci #RST line
- * @pdn pci device node
+ * rtas_pci_slot_reset - Raises/Lowers the pci #RST line
+ * @pdn: pci device node
* @state: 1/0 to raise/lower the #RST
*
* Clear the EEH-frozen condition on a slot. This routine
* asserts the PCI #RST line if the 'state' argument is '1',
* and drops the #RST line if 'state is '0'. This routine is
* safe to call in an interrupt context.
- *
*/
-
static void
rtas_pci_slot_reset(struct pci_dn *pdn, int state)
{
int config_addr;
int rc;
- BUG_ON (pdn==NULL);
+ BUG_ON(pdn==NULL);
if (!pdn->phb) {
- printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
+ printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
pdn->node->full_name);
return;
}
@@ -752,12 +773,12 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
/**
* pcibios_set_pcie_slot_reset - Set PCI-E reset state
- * @dev: pci device struct
- * @state: reset state to enter
+ * @dev: pci device struct
+ * @state: reset state to enter
*
* Return value:
* 0 if success
- **/
+ */
int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
{
struct device_node *dn = pci_device_to_OF_node(dev);
@@ -781,10 +802,62 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
}
/**
- * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
- * @pdn: pci device node to be reset.
+ * __eeh_set_pe_freset - Check the required reset for child devices
+ * @parent: parent device
+ * @freset: return value
+ *
+ * Each device might have its preferred reset type: fundamental or
+ * hot reset. The routine is used to collect the information from
+ * the child devices so that they could be reset accordingly.
*/
+void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
+{
+ struct device_node *dn;
+
+ for_each_child_of_node(parent, dn) {
+ if (PCI_DN(dn)) {
+ struct pci_dev *dev = PCI_DN(dn)->pcidev;
+
+ if (dev && dev->driver)
+ *freset |= dev->needs_freset;
+
+ __eeh_set_pe_freset(dn, freset);
+ }
+ }
+}
+
+/**
+ * eeh_set_pe_freset - Check the required reset for the indicated device and its children
+ * @dn: parent device
+ * @freset: return value
+ *
+ * Each device might have its preferred reset type: fundamental or
+ * hot reset. The routine is used to collected the information for
+ * the indicated device and its children so that the bunch of the
+ * devices could be reset properly.
+ */
+void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+{
+ struct pci_dev *dev;
+ dn = find_device_pe(dn);
+
+ /* Back up one, since config addrs might be shared */
+ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+ dn = dn->parent;
+ dev = PCI_DN(dn)->pcidev;
+ if (dev)
+ *freset |= dev->needs_freset;
+
+ __eeh_set_pe_freset(dn, freset);
+}
+
+/**
+ * __rtas_set_slot_reset - Assert the pci #RST line for 1/4 second
+ * @pdn: pci device node to be reset.
+ *
+ * Assert the PCI #RST line for 1/4 second.
+ */
static void __rtas_set_slot_reset(struct pci_dn *pdn)
{
unsigned int freset = 0;
@@ -803,25 +876,35 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn)
rtas_pci_slot_reset(pdn, 1);
/* The PCI bus requires that the reset be held high for at least
- * a 100 milliseconds. We wait a bit longer 'just in case'. */
-
+ * a 100 milliseconds. We wait a bit longer 'just in case'.
+ */
#define PCI_BUS_RST_HOLD_TIME_MSEC 250
- msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
+ msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
/* We might get hit with another EEH freeze as soon as the
* pci slot reset line is dropped. Make sure we don't miss
- * these, and clear the flag now. */
- eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED);
+ * these, and clear the flag now.
+ */
+ eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
- rtas_pci_slot_reset (pdn, 0);
+ rtas_pci_slot_reset(pdn, 0);
/* After a PCI slot has been reset, the PCI Express spec requires
* a 1.5 second idle time for the bus to stabilize, before starting
- * up traffic. */
+ * up traffic.
+ */
#define PCI_BUS_SETTLE_TIME_MSEC 1800
- msleep (PCI_BUS_SETTLE_TIME_MSEC);
+ msleep(PCI_BUS_SETTLE_TIME_MSEC);
}
+/**
+ * rtas_set_slot_reset - Reset the indicated PE
+ * @pdn: PCI device node
+ *
+ * This routine should be called to reset indicated device, including
+ * PE. A PE might include multiple PCI devices and sometimes PCI bridges
+ * might be involved as well.
+ */
int rtas_set_slot_reset(struct pci_dn *pdn)
{
int i, rc;
@@ -846,7 +929,6 @@ int rtas_set_slot_reset(struct pci_dn *pdn)
return -1;
}
-/* ------------------------------------------------------- */
/** Save and restore of PCI BARs
*
* Although firmware will set up BARs during boot, it doesn't
@@ -863,7 +945,7 @@ int rtas_set_slot_reset(struct pci_dn *pdn)
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static inline void __restore_bars (struct pci_dn *pdn)
+static inline void __restore_bars(struct pci_dn *pdn)
{
int i;
u32 cmd;
@@ -879,17 +961,18 @@ static inline void __restore_bars (struct pci_dn *pdn)
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
- rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1,
+ rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- rtas_write_config (pdn, PCI_LATENCY_TIMER, 1,
+ rtas_write_config(pdn, PCI_LATENCY_TIMER, 1,
SAVED_BYTE(PCI_LATENCY_TIMER));
/* max latency, min grant, interrupt pin and line */
rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
/* Restore PERR & SERR bits, some devices require it,
- don't touch the other command bits */
+ * don't touch the other command bits
+ */
rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
if (pdn->config_space[1] & PCI_COMMAND_PARITY)
cmd |= PCI_COMMAND_PARITY;
@@ -903,7 +986,8 @@ static inline void __restore_bars (struct pci_dn *pdn)
}
/**
- * eeh_restore_bars - restore the PCI config space info
+ * eeh_restore_bars - Restore the PCI config space info
+ * @pdn: PCI device node
*
* This routine performs a recursive walk to the children
* of this device as well.
@@ -915,14 +999,15 @@ void eeh_restore_bars(struct pci_dn *pdn)
return;
if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
- __restore_bars (pdn);
+ __restore_bars(pdn);
for_each_child_of_node(pdn->node, dn)
- eeh_restore_bars (PCI_DN(dn));
+ eeh_restore_bars(PCI_DN(dn));
}
/**
- * eeh_save_bars - save device bars
+ * eeh_save_bars - Save device bars
+ * @pdn: PCI device node
*
* Save the values of the device bars. Unlike the restore
* routine, this routine is *not* recursive. This is because
@@ -940,6 +1025,14 @@ static void eeh_save_bars(struct pci_dn *pdn)
rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
}
+/**
+ * rtas_configure_bridge - Configure PCI bridges for the indicated PE
+ * @pdn: PCI device node
+ *
+ * PCI bridges might be included in PE. In order to make the PE work
+ * again. The included PCI bridges should be recovered after the PE
+ * encounters frozen state.
+ */
void
rtas_configure_bridge(struct pci_dn *pdn)
{
@@ -963,17 +1056,11 @@ rtas_configure_bridge(struct pci_dn *pdn)
BUID_HI(pdn->phb->buid),
BUID_LO(pdn->phb->buid));
if (rc) {
- printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
+ printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
rc, pdn->node->full_name);
}
}
-/* ------------------------------------------------------------- */
-/* The code below deals with enabling EEH for devices during the
- * early boot sequence. EEH must be enabled before any PCI probing
- * can be done.
- */
-
#define EEH_ENABLE 1
struct eeh_early_enable_info {
@@ -981,7 +1068,18 @@ struct eeh_early_enable_info {
unsigned int buid_lo;
};
-static int get_pe_addr (int config_addr,
+/**
+ * get_pe_addr - Retrieve PE address with given BDF address
+ * @config_addr: BDF address
+ * @info: BUID of the associated PHB
+ *
+ * There're 2 kinds of addresses existing in EEH core components:
+ * BDF address and PE address. Besides, there has dedicated platform
+ * dependent function call to retrieve the PE address according to
+ * the given BDF address. Further more, we prefer PE address on BDF
+ * address in EEH core components.
+ */
+static int get_pe_addr(int config_addr,
struct eeh_early_enable_info *info)
{
unsigned int rets[3];
@@ -990,12 +1088,12 @@ static int get_pe_addr (int config_addr,
/* Use latest config-addr token on power6 */
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
/* Make sure we have a PE in hand */
- ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets,
+ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
config_addr, info->buid_hi, info->buid_lo, 1);
if (ret || (rets[0]==0))
return 0;
- ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets,
+ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
config_addr, info->buid_hi, info->buid_lo, 0);
if (ret)
return 0;
@@ -1004,7 +1102,7 @@ static int get_pe_addr (int config_addr,
/* Use older config-addr token on power5 */
if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
- ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets,
+ ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
config_addr, info->buid_hi, info->buid_lo, 0);
if (ret)
return 0;
@@ -1013,7 +1111,15 @@ static int get_pe_addr (int config_addr,
return 0;
}
-/* Enable eeh for the given device node. */
+/**
+ * early_enable_eeh - Early enable EEH on the indicated device
+ * @dn: device node
+ * @data: BUID
+ *
+ * Enable EEH functionality on the specified PCI device. The function
+ * is expected to be called before real PCI probing is done. However,
+ * the PHBs have been initialized at this point.
+ */
static void *early_enable_eeh(struct device_node *dn, void *data)
{
unsigned int rets[3];
@@ -1047,7 +1153,8 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
pdn->class_code = *class_code;
/* Ok... see if this device supports EEH. Some do, some don't,
- * and the only way to find out is to check each and every one. */
+ * and the only way to find out is to check each and every one.
+ */
regs = of_get_property(dn, "reg", NULL);
if (regs) {
/* First register entry is addr (00BBSS00) */
@@ -1061,13 +1168,15 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
pdn->eeh_config_addr = regs[0];
/* If the newer, better, ibm,get-config-addr-info is supported,
- * then use that instead. */
+ * then use that instead.
+ */
pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info);
/* Some older systems (Power4) allow the
* ibm,set-eeh-option call to succeed even on nodes
* where EEH is not supported. Verify support
- * explicitly. */
+ * explicitly.
+ */
ret = read_slot_reset_state(pdn, rets);
if ((ret == 0) && (rets[1] == 1))
enable = 1;
@@ -1083,7 +1192,8 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
} else {
/* This device doesn't support EEH, but it may have an
- * EEH parent, in which case we mark it as supported. */
+ * EEH parent, in which case we mark it as supported.
+ */
if (dn->parent && PCI_DN(dn->parent)
&& (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
/* Parent supports EEH. */
@@ -1101,7 +1211,9 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
return NULL;
}
-/*
+/**
+ * eeh_init - EEH initialization
+ *
* Initialize EEH by trying to enable it for all of the adapters in the system.
* As a side effect we can determine here if eeh is supported at all.
* Note that we leave EEH on so failed config cycles won't cause a machine
@@ -1133,7 +1245,7 @@ void __init eeh_init(void)
ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
- ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
+ ibm_configure_bridge = rtas_token("ibm,configure-bridge");
ibm_configure_pe = rtas_token("ibm,configure-pe");
if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
@@ -1170,7 +1282,7 @@ void __init eeh_init(void)
}
/**
- * eeh_add_device_early - enable EEH for the indicated device_node
+ * eeh_add_device_early - Enable EEH for the indicated device_node
* @dn: device node for which to set up EEH
*
* This routine must be used to perform EEH initialization for PCI
@@ -1199,6 +1311,14 @@ static void eeh_add_device_early(struct device_node *dn)
early_enable_eeh(dn, &info);
}
+/**
+ * eeh_add_device_tree_early - Enable EEH for the indicated device
+ * @dn: device node
+ *
+ * This routine must be used to perform EEH initialization for the
+ * indicated PCI device that was added after system boot (e.g.
+ * hotplug, dlpar).
+ */
void eeh_add_device_tree_early(struct device_node *dn)
{
struct device_node *sib;
@@ -1210,7 +1330,7 @@ void eeh_add_device_tree_early(struct device_node *dn)
EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
/**
- * eeh_add_device_late - perform EEH initialization for the indicated pci device
+ * eeh_add_device_late - Perform EEH initialization for the indicated pci device
* @dev: pci device for which to set up EEH
*
* This routine must be used to complete EEH initialization for PCI
@@ -1234,13 +1354,21 @@ static void eeh_add_device_late(struct pci_dev *dev)
}
WARN_ON(pdn->pcidev);
- pci_dev_get (dev);
+ pci_dev_get(dev);
pdn->pcidev = dev;
pci_addr_cache_insert_device(dev);
eeh_sysfs_add_device(dev);
}
+/**
+ * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
+ * @bus: PCI bus
+ *
+ * This routine must be used to perform EEH initialization for PCI
+ * devices which are attached to the indicated PCI bus. The PCI bus
+ * is added after system boot through hotplug or dlpar.
+ */
void eeh_add_device_tree_late(struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -1257,7 +1385,7 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
/**
- * eeh_remove_device - undo EEH setup for the indicated pci device
+ * eeh_remove_device - Undo EEH setup for the indicated pci device
* @dev: pci device to be removed
*
* This routine should be called when a device is removed from
@@ -1281,12 +1409,20 @@ static void eeh_remove_device(struct pci_dev *dev)
return;
}
PCI_DN(dn)->pcidev = NULL;
- pci_dev_put (dev);
+ pci_dev_put(dev);
pci_addr_cache_remove_device(dev);
eeh_sysfs_remove_device(dev);
}
+/**
+ * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
+ * @dev: PCI device
+ *
+ * This routine must be called when a device is removed from the
+ * running system through hotplug or dlpar. The corresponding
+ * PCI address cache will be removed.
+ */
void eeh_remove_bus_device(struct pci_dev *dev)
{
struct pci_bus *bus = dev->subordinate;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 02/21] Cleanup on function names of EEH core
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
2012-02-28 6:03 ` [PATCH 01/21] Cleanup on comments of EEH core Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:03 ` [PATCH 03/21] Platform dependent EEH operations Gavin Shan
` (20 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
The EEH has been implemented on pSeries platform. The original
code looks a little bit nasty. The patch does cleanup on the
current EEH implementation so that it looks more clean.
* Try adding prefix "eeh" for functions.
* Some function names have been adjusted so that they looks
shorter and meaningful.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/ppc-pci.h | 8 +-
arch/powerpc/platforms/pseries/eeh.c | 102 +++++++++++++--------------
arch/powerpc/platforms/pseries/eeh_driver.c | 10 ++--
arch/powerpc/platforms/pseries/msi.c | 2 +-
4 files changed, 59 insertions(+), 63 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 221d82f..605a970 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -58,16 +58,16 @@ struct pci_dev *pci_get_device_by_addr(unsigned long addr);
void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
#define EEH_THAW_MMIO 2
#define EEH_THAW_DMA 3
-int rtas_pci_enable(struct pci_dn *pdn, int function);
-int rtas_set_slot_reset (struct pci_dn *);
+int eeh_pci_enable(struct pci_dn *pdn, int function);
+int eeh_reset_pe(struct pci_dn *);
int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
void eeh_restore_bars(struct pci_dn *);
-void rtas_configure_bridge(struct pci_dn *);
+void eeh_configure_bridge(struct pci_dn *);
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_mark_slot(struct device_node *dn, int mode_flag);
void eeh_clear_slot(struct device_node *dn, int mode_flag);
-struct device_node *find_device_pe(struct device_node *dn);
+struct device_node *eeh_find_device_pe(struct device_node *dn);
void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 5f6d37b..fa88589 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -130,7 +130,7 @@ static unsigned long slot_resets;
#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
/**
- * rtas_slot_error_detail - Retrieve error log through RTAS call
+ * eeh_rtas_slot_error_detail - Retrieve error log through RTAS call
* @pdn: device node
* @severity: temporary or permanent error log
* @driver_log: driver log to be combined with the retrieved error log
@@ -139,7 +139,7 @@ static unsigned long slot_resets;
* This routine should be called to retrieve error log through the dedicated
* RTAS call.
*/
-static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
+static void eeh_rtas_slot_error_detail(struct pci_dn *pdn, int severity,
char *driver_log, size_t loglen)
{
int config_addr;
@@ -170,7 +170,7 @@ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
}
/**
- * gather_pci_data - Copy assorted PCI config space registers to buff
+ * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
* @pdn: device to report data for
* @buf: point to buffer in which to log
* @len: amount of room in buffer
@@ -178,7 +178,7 @@ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
* This routine captures assorted PCI configuration space data,
* and puts them into a buffer for RTAS error logging.
*/
-static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
+static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
{
struct pci_dev *dev = pdn->pcidev;
u32 cfg;
@@ -258,7 +258,7 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
for_each_child_of_node(pdn->node, dn) {
pdn = PCI_DN(dn);
if (pdn)
- n += gather_pci_data(pdn, buf+n, len-n);
+ n += eeh_gather_pci_data(pdn, buf+n, len-n);
}
}
@@ -280,23 +280,23 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
size_t loglen = 0;
pci_regs_buf[0] = 0;
- rtas_pci_enable(pdn, EEH_THAW_MMIO);
- rtas_configure_bridge(pdn);
+ eeh_pci_enable(pdn, EEH_THAW_MMIO);
+ eeh_configure_bridge(pdn);
eeh_restore_bars(pdn);
- loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
+ loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
- rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
+ eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
}
/**
- * read_slot_reset_state - Read the reset state of a device node's slot
+ * eeh_read_slot_reset_state - Read the reset state of a device node's slot
* @dn: device node to read
* @rets: array to return results in
*
* Read the reset state of a device node's slot through platform dependent
* function call.
*/
-static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
+static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
{
int token, outputs;
int config_addr;
@@ -332,15 +332,14 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
* the max allowed wait time is exceeded, in which case
* a -2 is returned.
*/
-int
-eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
+int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
{
int rc;
int rets[3];
int mwait;
while (1) {
- rc = read_slot_reset_state(pdn, rets);
+ rc = eeh_read_slot_reset_state(pdn, rets);
if (rc) return rc;
if (rets[1] == 0) return -1; /* EEH is not supported */
@@ -389,12 +388,12 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
}
/**
- * find_device_pe - Retrieve the PE for the given device
+ * eeh_find_device_pe - Retrieve the PE for the given device
* @dn: device node
*
* Return the PE under which this device lies
*/
-struct device_node * find_device_pe(struct device_node *dn)
+struct device_node *eeh_find_device_pe(struct device_node *dn)
{
while ((dn->parent) && PCI_DN(dn->parent) &&
(PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
@@ -445,7 +444,7 @@ static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
void eeh_mark_slot(struct device_node *dn, int mode_flag)
{
struct pci_dev *dev;
- dn = find_device_pe(dn);
+ dn = eeh_find_device_pe(dn);
/* Back up one, since config addrs might be shared */
if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
@@ -493,7 +492,7 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag)
unsigned long flags;
raw_spin_lock_irqsave(&confirm_error_lock, flags);
- dn = find_device_pe(dn);
+ dn = eeh_find_device_pe(dn);
/* Back up one, since config addrs might be shared */
if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
@@ -538,7 +537,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
no_dn++;
return 0;
}
- dn = find_device_pe(dn);
+ dn = eeh_find_device_pe(dn);
pdn = PCI_DN(dn);
/* Access to IO BARs might get this far and still not want checking. */
@@ -585,11 +584,11 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* function zero of a multi-function device.
* In any case they must share a common PHB.
*/
- ret = read_slot_reset_state(pdn, rets);
+ ret = eeh_read_slot_reset_state(pdn, rets);
/* If the call to firmware failed, punt */
if (ret != 0) {
- printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
+ printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n",
ret, dn->full_name);
false_positives++;
pdn->eeh_false_positives ++;
@@ -687,15 +686,14 @@ EXPORT_SYMBOL(eeh_check_failure);
/**
- * rtas_pci_enable - Enable MMIO or DMA transfers for this slot
+ * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
* @pdn pci device node
*
* This routine should be called to reenable frozen MMIO or DMA
* so that it would work correctly again. It's useful while doing
* recovery or log collection on the indicated device.
*/
-int
-rtas_pci_enable(struct pci_dn *pdn, int function)
+int eeh_pci_enable(struct pci_dn *pdn, int function)
{
int config_addr;
int rc;
@@ -723,7 +721,7 @@ rtas_pci_enable(struct pci_dn *pdn, int function)
}
/**
- * rtas_pci_slot_reset - Raises/Lowers the pci #RST line
+ * eeh_slot_reset - Raises/Lowers the pci #RST line
* @pdn: pci device node
* @state: 1/0 to raise/lower the #RST
*
@@ -732,8 +730,7 @@ rtas_pci_enable(struct pci_dn *pdn, int function)
* and drops the #RST line if 'state is '0'. This routine is
* safe to call in an interrupt context.
*/
-static void
-rtas_pci_slot_reset(struct pci_dn *pdn, int state)
+static void eeh_slot_reset(struct pci_dn *pdn, int state)
{
int config_addr;
int rc;
@@ -786,13 +783,13 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
switch (state) {
case pcie_deassert_reset:
- rtas_pci_slot_reset(pdn, 0);
+ eeh_slot_reset(pdn, 0);
break;
case pcie_hot_reset:
- rtas_pci_slot_reset(pdn, 1);
+ eeh_slot_reset(pdn, 1);
break;
case pcie_warm_reset:
- rtas_pci_slot_reset(pdn, 3);
+ eeh_slot_reset(pdn, 3);
break;
default:
return -EINVAL;
@@ -839,7 +836,7 @@ void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
{
struct pci_dev *dev;
- dn = find_device_pe(dn);
+ dn = eeh_find_device_pe(dn);
/* Back up one, since config addrs might be shared */
if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
@@ -853,12 +850,12 @@ void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
}
/**
- * __rtas_set_slot_reset - Assert the pci #RST line for 1/4 second
+ * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
* @pdn: pci device node to be reset.
*
* Assert the PCI #RST line for 1/4 second.
*/
-static void __rtas_set_slot_reset(struct pci_dn *pdn)
+static void eeh_reset_pe_once(struct pci_dn *pdn)
{
unsigned int freset = 0;
@@ -871,9 +868,9 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn)
eeh_set_pe_freset(pdn->node, &freset);
if (freset)
- rtas_pci_slot_reset(pdn, 3);
+ eeh_slot_reset(pdn, 3);
else
- rtas_pci_slot_reset(pdn, 1);
+ eeh_slot_reset(pdn, 1);
/* The PCI bus requires that the reset be held high for at least
* a 100 milliseconds. We wait a bit longer 'just in case'.
@@ -887,7 +884,7 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn)
*/
eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
- rtas_pci_slot_reset(pdn, 0);
+ eeh_slot_reset(pdn, 0);
/* After a PCI slot has been reset, the PCI Express spec requires
* a 1.5 second idle time for the bus to stabilize, before starting
@@ -898,20 +895,20 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn)
}
/**
- * rtas_set_slot_reset - Reset the indicated PE
+ * eeh_reset_pe - Reset the indicated PE
* @pdn: PCI device node
*
* This routine should be called to reset indicated device, including
* PE. A PE might include multiple PCI devices and sometimes PCI bridges
* might be involved as well.
*/
-int rtas_set_slot_reset(struct pci_dn *pdn)
+int eeh_reset_pe(struct pci_dn *pdn)
{
int i, rc;
/* Take three shots at resetting the bus */
for (i=0; i<3; i++) {
- __rtas_set_slot_reset(pdn);
+ eeh_reset_pe_once(pdn);
rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
if (rc == 0)
@@ -938,14 +935,14 @@ int rtas_set_slot_reset(struct pci_dn *pdn)
*/
/**
- * __restore_bars - Restore the Base Address Registers
+ * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
* @pdn: pci device node
*
* Loads the PCI configuration space base address registers,
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static inline void __restore_bars(struct pci_dn *pdn)
+static inline void eeh_restore_one_device_bars(struct pci_dn *pdn)
{
int i;
u32 cmd;
@@ -999,7 +996,7 @@ void eeh_restore_bars(struct pci_dn *pdn)
return;
if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
- __restore_bars(pdn);
+ eeh_restore_one_device_bars(pdn);
for_each_child_of_node(pdn->node, dn)
eeh_restore_bars(PCI_DN(dn));
@@ -1026,15 +1023,14 @@ static void eeh_save_bars(struct pci_dn *pdn)
}
/**
- * rtas_configure_bridge - Configure PCI bridges for the indicated PE
+ * eeh_configure_bridge - Configure PCI bridges for the indicated PE
* @pdn: PCI device node
*
* PCI bridges might be included in PE. In order to make the PE work
* again. The included PCI bridges should be recovered after the PE
* encounters frozen state.
*/
-void
-rtas_configure_bridge(struct pci_dn *pdn)
+void eeh_configure_bridge(struct pci_dn *pdn)
{
int config_addr;
int rc;
@@ -1069,7 +1065,7 @@ struct eeh_early_enable_info {
};
/**
- * get_pe_addr - Retrieve PE address with given BDF address
+ * eeh_get_pe_addr - Retrieve PE address with given BDF address
* @config_addr: BDF address
* @info: BUID of the associated PHB
*
@@ -1079,7 +1075,7 @@ struct eeh_early_enable_info {
* the given BDF address. Further more, we prefer PE address on BDF
* address in EEH core components.
*/
-static int get_pe_addr(int config_addr,
+static int eeh_get_pe_addr(int config_addr,
struct eeh_early_enable_info *info)
{
unsigned int rets[3];
@@ -1112,7 +1108,7 @@ static int get_pe_addr(int config_addr,
}
/**
- * early_enable_eeh - Early enable EEH on the indicated device
+ * eeh_early_enable - Early enable EEH on the indicated device
* @dn: device node
* @data: BUID
*
@@ -1120,7 +1116,7 @@ static int get_pe_addr(int config_addr,
* is expected to be called before real PCI probing is done. However,
* the PHBs have been initialized at this point.
*/
-static void *early_enable_eeh(struct device_node *dn, void *data)
+static void *eeh_early_enable(struct device_node *dn, void *data)
{
unsigned int rets[3];
struct eeh_early_enable_info *info = data;
@@ -1170,14 +1166,14 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
/* If the newer, better, ibm,get-config-addr-info is supported,
* then use that instead.
*/
- pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info);
+ pdn->eeh_pe_config_addr = eeh_get_pe_addr(pdn->eeh_config_addr, info);
/* Some older systems (Power4) allow the
* ibm,set-eeh-option call to succeed even on nodes
* where EEH is not supported. Verify support
* explicitly.
*/
- ret = read_slot_reset_state(pdn, rets);
+ ret = eeh_read_slot_reset_state(pdn, rets);
if ((ret == 0) && (rets[1] == 1))
enable = 1;
}
@@ -1272,7 +1268,7 @@ void __init eeh_init(void)
info.buid_lo = BUID_LO(buid);
info.buid_hi = BUID_HI(buid);
- traverse_pci_devices(phb, early_enable_eeh, &info);
+ traverse_pci_devices(phb, eeh_early_enable, &info);
}
if (eeh_subsystem_enabled)
@@ -1308,7 +1304,7 @@ static void eeh_add_device_early(struct device_node *dn)
info.buid_hi = BUID_HI(phb->buid);
info.buid_lo = BUID_LO(phb->buid);
- early_enable_eeh(dn, &info);
+ eeh_early_enable(dn, &info);
}
/**
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 1b6cb10..5315350 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -282,7 +282,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
/* Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
* up if the reset failed for some reason. */
- rc = rtas_set_slot_reset(pe_dn);
+ rc = eeh_reset_pe(pe_dn);
if (rc)
return rc;
@@ -295,7 +295,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
struct pci_dn *ppe = PCI_DN(dn);
/* On Power4, always true because eeh_pe_config_addr=0 */
if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
- rtas_configure_bridge(ppe);
+ eeh_configure_bridge(ppe);
eeh_restore_bars(ppe);
}
dn = dn->sibling;
@@ -330,7 +330,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
- frozen_dn = find_device_pe(event->dn);
+ frozen_dn = eeh_find_device_pe(event->dn);
if (!frozen_dn) {
location = of_get_property(event->dn, "ibm,loc-code", NULL);
@@ -422,7 +422,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
/* If all devices reported they can proceed, then re-enable MMIO */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
- rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO);
+ rc = eeh_pci_enable(frozen_pdn, EEH_THAW_MMIO);
if (rc < 0)
goto hard_fail;
@@ -436,7 +436,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
/* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
- rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA);
+ rc = eeh_pci_enable(frozen_pdn, EEH_THAW_DMA);
if (rc < 0)
goto hard_fail;
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 38d24e7..109fdb7 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -217,7 +217,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
if (!dn)
return NULL;
- dn = find_device_pe(dn);
+ dn = eeh_find_device_pe(dn);
if (!dn)
return NULL;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 03/21] Platform dependent EEH operations
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
2012-02-28 6:03 ` [PATCH 01/21] Cleanup on comments of EEH core Gavin Shan
2012-02-28 6:03 ` [PATCH 02/21] Cleanup on function names " Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:03 ` [PATCH 04/21] pSeries platform EEH initialization Gavin Shan
` (19 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
EEH has been implemented on RTAS-compliant pSeries platform.
That's to say, the EEH operations will be implemented through RTAS
calls eventually. The situation limited feasible extension on EEH.
In order to support EEH on multiple platforms like pseries and powernv
simutaneously. We have to split the platform dependent EEH options
up out of current implementation.
The patch addresses supporting EEH on multiple platforms. The pseries
platform dependent EEH operations will be abstracted by struct eeh_ops.
EEH core components will be built based on the registered EEH operations.
With the mechanism, what the individual platform needs to do is implement
platform dependent EEH operations.
For now, the pseries platform is covered under the mechanism. That means
we have to think about other platforms to support EEH, like powernv.
Besides, we only have framework for the mechanism and we have to implement
it for pseries platform later.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 32 +++++
arch/powerpc/platforms/pseries/Makefile | 2 +-
arch/powerpc/platforms/pseries/eeh.c | 53 ++++++++
arch/powerpc/platforms/pseries/eeh_pseries.c | 183 ++++++++++++++++++++++++++
arch/powerpc/platforms/pseries/setup.c | 1 +
5 files changed, 270 insertions(+), 1 deletions(-)
create mode 100644 arch/powerpc/platforms/pseries/eeh_pseries.c
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 2328877..0666c52 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -31,6 +31,26 @@ struct device_node;
#ifdef CONFIG_EEH
+/*
+ * The struct is used to trace the registered EEH operation
+ * callback functions. Actually, those operation callback
+ * functions are heavily platform dependent. That means the
+ * platform should register its own EEH operation callback
+ * functions before any EEH further operations.
+ */
+struct eeh_ops {
+ char *name;
+ int (*init)(void);
+ int (*set_option)(struct device_node *dn, int option);
+ int (*get_pe_addr)(struct device_node *dn);
+ int (*get_state)(struct device_node *dn, int *state);
+ int (*reset)(struct device_node *dn, int option);
+ int (*wait_state)(struct device_node *dn, int max_wait);
+ int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len);
+ int (*configure_bridge)(struct device_node *dn);
+};
+
+extern struct eeh_ops *eeh_ops;
extern int eeh_subsystem_enabled;
/* Values for eeh_mode bits in device_node */
@@ -47,6 +67,11 @@ extern int eeh_subsystem_enabled;
#define EEH_MAX_ALLOWED_FREEZES 5
void __init eeh_init(void);
+#ifdef CONFIG_PPC_PSERIES
+int __init eeh_pseries_init(void);
+#endif
+int __init eeh_ops_register(struct eeh_ops *ops);
+int __exit eeh_ops_unregister(const char *name);
unsigned long eeh_check_failure(const volatile void __iomem *token,
unsigned long val);
int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev);
@@ -73,6 +98,13 @@ void eeh_remove_bus_device(struct pci_dev *);
#else /* !CONFIG_EEH */
static inline void eeh_init(void) { }
+#ifdef CONFIG_PPC_PSERIES
+static inline int eeh_pseries_init(void)
+{
+ return 0;
+}
+#endif /* CONFIG_PPC_PSERIES */
+
static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
{
return val;
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 236db46..9aa5581 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,7 +6,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
firmware.o power.o dlpar.o mobility.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCANLOG) += scanlog.o
-obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
+obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o eeh_pseries.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
obj-$(CONFIG_PSERIES_MSI) += msi.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index fa88589..b0e3fb0 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -97,6 +97,9 @@ static int ibm_get_config_addr_info2;
static int ibm_configure_bridge;
static int ibm_configure_pe;
+/* Platform dependent EEH operations */
+struct eeh_ops *eeh_ops = NULL;
+
int eeh_subsystem_enabled;
EXPORT_SYMBOL(eeh_subsystem_enabled);
@@ -1208,6 +1211,56 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
}
/**
+ * eeh_ops_register - Register platform dependent EEH operations
+ * @ops: platform dependent EEH operations
+ *
+ * Register the platform dependent EEH operation callback
+ * functions. The platform should call this function before
+ * any other EEH operations.
+ */
+int __init eeh_ops_register(struct eeh_ops *ops)
+{
+ if (!ops->name) {
+ pr_warning("%s: Invalid EEH ops name for %p\n",
+ __func__, ops);
+ return -EINVAL;
+ }
+
+ if (eeh_ops && eeh_ops != ops) {
+ pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
+ __func__, eeh_ops->name, ops->name);
+ return -EEXIST;
+ }
+
+ eeh_ops = ops;
+
+ return 0;
+}
+
+/**
+ * eeh_ops_unregister - Unreigster platform dependent EEH operations
+ * @name: name of EEH platform operations
+ *
+ * Unregister the platform dependent EEH operation callback
+ * functions.
+ */
+int __exit eeh_ops_unregister(const char *name)
+{
+ if (!name || !strlen(name)) {
+ pr_warning("%s: Invalid EEH ops name\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (eeh_ops && !strcmp(eeh_ops->name, name)) {
+ eeh_ops = NULL;
+ return 0;
+ }
+
+ return -EEXIST;
+}
+
+/**
* eeh_init - EEH initialization
*
* Initialize EEH by trying to enable it for all of the adapters in the system.
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
new file mode 100644
index 0000000..61a9050
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -0,0 +1,183 @@
+/*
+ * The file intends to implement the platform dependent EEH operations on pseries.
+ * Actually, the pseries platform is built based on RTAS heavily. That means the
+ * pseries platform dependent EEH operations will be built on RTAS calls. The functions
+ * are devired from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has
+ * been done.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011.
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+/**
+ * pseries_eeh_init - EEH platform dependent initialization
+ *
+ * EEH platform dependent initialization on pseries.
+ */
+static int pseries_eeh_init(void)
+{
+ return 0;
+}
+
+/**
+ * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @dn: device node
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int pseries_eeh_set_option(struct device_node *dn, int option)
+{
+ return 0;
+}
+
+/**
+ * pseries_eeh_get_pe_addr - Retrieve PE address
+ * @dn: device node
+ *
+ * Retrieve the assocated PE address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ * It's notable that zero'ed return value means invalid PE config
+ * address.
+ */
+static int pseries_eeh_get_pe_addr(struct device_node *dn)
+{
+ return 0;
+}
+
+/**
+ * pseries_eeh_get_state - Retrieve PE state
+ * @dn: PE associated device node
+ * @state: return value
+ *
+ * Retrieve the state of the specified PE. On RTAS compliant
+ * pseries platform, there already has one dedicated RTAS function
+ * for the purpose. It's notable that the associated PE config address
+ * might be ready when calling the function. Therefore, endeavour to
+ * use the PE config address if possible. Further more, there're 2
+ * RTAS calls for the purpose, we need to try the new one and back
+ * to the old one if the new one couldn't work properly.
+ */
+static int pseries_eeh_get_state(struct device_node *dn, int *state)
+{
+ return 0;
+}
+
+/**
+ * pseries_eeh_reset - Reset the specified PE
+ * @dn: PE associated device node
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int pseries_eeh_reset(struct device_node *dn, int option)
+{
+ return 0;
+}
+
+/**
+ * pseries_eeh_wait_state - Wait for PE state
+ * @dn: PE associated device node
+ * @max_wait: maximal period in microsecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
+{
+ return 0;
+}
+
+/**
+ * pseries_eeh_get_log - Retrieve error log
+ * @dn: device node
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ * Actually, the error will be retrieved through the dedicated
+ * RTAS call.
+ */
+static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
+{
+ return 0;
+}
+
+/**
+ * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @dn: PE associated device node
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pseries_eeh_configure_bridge(struct device_node *dn)
+{
+ return 0;
+}
+
+static struct eeh_ops pseries_eeh_ops = {
+ .name = "pseries",
+ .init = pseries_eeh_init,
+ .set_option = pseries_eeh_set_option,
+ .get_pe_addr = pseries_eeh_get_pe_addr,
+ .get_state = pseries_eeh_get_state,
+ .reset = pseries_eeh_reset,
+ .wait_state = pseries_eeh_wait_state,
+ .get_log = pseries_eeh_get_log,
+ .configure_bridge = pseries_eeh_configure_bridge
+};
+
+/**
+ * eeh_pseries_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on pseries platform. This function should be
+ * called before any EEH related functions.
+ */
+int __init eeh_pseries_init(void)
+{
+ return eeh_ops_register(&pseries_eeh_ops);
+}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f79f127..809d9d9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -382,6 +382,7 @@ static void __init pSeries_setup_arch(void)
/* Find and initialize PCI host bridges */
init_pci_config_tokens();
+ eeh_pseries_init();
find_and_init_phbs();
pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
eeh_init();
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 04/21] pSeries platform EEH initialization
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (2 preceding siblings ...)
2012-02-28 6:03 ` [PATCH 03/21] Platform dependent EEH operations Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:03 ` [PATCH 05/21] pSeries platform EEH operation Gavin Shan
` (18 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
The platform specific EEH operations have been abstracted by
struct eeh_ops. The individual platroms, including pSeries, needs
doing necessary initialization before the platform dependent EEH
operations work properly.
The patch is addressing that and do necessary platform initialization
for pSeries platform. More specificly, it will figure out the tokens
of EEH related RTAS calls.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh.c | 12 ++++++
arch/powerpc/platforms/pseries/eeh_pseries.c | 55 ++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index b0e3fb0..bb6de6c 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -1279,6 +1279,18 @@ void __init eeh_init(void)
{
struct device_node *phb, *np;
struct eeh_early_enable_info info;
+ int ret;
+
+ /* call platform initialization function */
+ if (!eeh_ops) {
+ pr_warning("%s: Platform EEH operation not found\n",
+ __func__);
+ return;
+ } else if ((ret = eeh_ops->init())) {
+ pr_warning("%s: Failed to call platform init function (%d)\n",
+ __func__, ret);
+ return;
+ }
raw_spin_lock_init(&confirm_error_lock);
spin_lock_init(&slot_errbuf_lock);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 61a9050..1a9410a 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -45,6 +45,17 @@
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
+/* RTAS tokens */
+static int ibm_set_eeh_option;
+static int ibm_set_slot_reset;
+static int ibm_read_slot_reset_state;
+static int ibm_read_slot_reset_state2;
+static int ibm_slot_error_detail;
+static int ibm_get_config_addr_info;
+static int ibm_get_config_addr_info2;
+static int ibm_configure_bridge;
+static int ibm_configure_pe;
+
/**
* pseries_eeh_init - EEH platform dependent initialization
*
@@ -52,6 +63,50 @@
*/
static int pseries_eeh_init(void)
{
+ /* figure out EEH RTAS function call tokens */
+ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
+ ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+ ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
+ ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
+ ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
+ ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
+ ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
+ ibm_configure_pe = rtas_token("ibm,configure-pe");
+ ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
+
+ /* necessary sanity check */
+ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n",
+ __func__);
+ return -EINVAL;
+ } else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("%s: RTAS service <ibm, set-slot-reset> invalid\n",
+ __func__);
+ return -EINVAL;
+ } else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+ ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("%s: RTAS service <ibm,read-slot-reset-state2> and "
+ "<ibm,read-slot-reset-state> invalid\n",
+ __func__);
+ return -EINVAL;
+ } else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n",
+ __func__);
+ return -EINVAL;
+ } else if (ibm_get_config_addr_info2 == RTAS_UNKNOWN_SERVICE &&
+ ibm_get_config_addr_info == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("%s: RTAS service <ibm,get-config-addr-info2> and "
+ "<ibm,get-config-addr-info> invalid\n",
+ __func__);
+ return -EINVAL;
+ } else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE &&
+ ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("%s: RTAS service <ibm,configure-pe> and "
+ "<ibm,configure-bridge> invalid\n",
+ __func__);
+ return -EINVAL;
+ }
+
return 0;
}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 05/21] pSeries platform EEH operation
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (3 preceding siblings ...)
2012-02-28 6:03 ` [PATCH 04/21] pSeries platform EEH initialization Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:03 ` [PATCH 06/21] pSeries platform EEH PE address retrieval Gavin Shan
` (17 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
There're 4 EEH operations that are covered by the dedicated RTAS
call <ibm,set-eeh-option>: enable or disable EEH, enable MMIO and
enable DMA. At early stage of system boot, the EEH would be tried
to enable on PCI device related device node. MMIO and DMA for
particular PE should be enabled when doing recovery on EEH errors
so that the PE could function properly again.
The patch implements it and abstract that through struct
eeh_ops::set_eeh. It would be help for EEH to support multiple
platforms in future.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 4 ++
arch/powerpc/include/asm/ppc-pci.h | 2 -
arch/powerpc/platforms/pseries/eeh.c | 26 ++--------------
arch/powerpc/platforms/pseries/eeh_driver.c | 4 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 39 +++++++++++++++++++++++++-
5 files changed, 48 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 0666c52..76f7b3f 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -38,6 +38,10 @@ struct device_node;
* platform should register its own EEH operation callback
* functions before any EEH further operations.
*/
+#define EEH_OPT_DISABLE 0 /* EEH disable */
+#define EEH_OPT_ENABLE 1 /* EEH enable */
+#define EEH_OPT_THAW_MMIO 2 /* MMIO enable */
+#define EEH_OPT_THAW_DMA 3 /* DMA enable */
struct eeh_ops {
char *name;
int (*init)(void);
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 605a970..6150349 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -56,8 +56,6 @@ struct pci_dev *pci_get_device_by_addr(unsigned long addr);
#define EEH_LOG_TEMP_FAILURE 1
#define EEH_LOG_PERM_FAILURE 2
void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
-#define EEH_THAW_MMIO 2
-#define EEH_THAW_DMA 3
int eeh_pci_enable(struct pci_dn *pdn, int function);
int eeh_reset_pe(struct pci_dn *);
int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index bb6de6c..70a9617 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -87,7 +87,6 @@
#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
/* RTAS tokens */
-static int ibm_set_eeh_option;
static int ibm_set_slot_reset;
static int ibm_read_slot_reset_state;
static int ibm_read_slot_reset_state2;
@@ -283,7 +282,7 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
size_t loglen = 0;
pci_regs_buf[0] = 0;
- eeh_pci_enable(pdn, EEH_THAW_MMIO);
+ eeh_pci_enable(pdn, EEH_OPT_THAW_MMIO);
eeh_configure_bridge(pdn);
eeh_restore_bars(pdn);
loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
@@ -698,26 +697,15 @@ EXPORT_SYMBOL(eeh_check_failure);
*/
int eeh_pci_enable(struct pci_dn *pdn, int function)
{
- int config_addr;
int rc;
- /* Use PE configuration address, if present */
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
-
- rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
- config_addr,
- BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid),
- function);
-
+ rc = eeh_ops->set_option(pdn->node, function);
if (rc)
printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
function, rc, pdn->node->full_name);
rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
- if ((rc == 4) && (function == EEH_THAW_MMIO))
+ if ((rc == 4) && (function == EEH_OPT_THAW_MMIO))
return 0;
return rc;
@@ -1158,9 +1146,7 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
if (regs) {
/* First register entry is addr (00BBSS00) */
/* Try to enable eeh */
- ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
- regs[0], info->buid_hi, info->buid_lo,
- EEH_ENABLE);
+ ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE);
enable = 0;
if (ret == 0) {
@@ -1299,7 +1285,6 @@ void __init eeh_init(void)
if (np == NULL)
return;
- ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
@@ -1309,9 +1294,6 @@ void __init eeh_init(void)
ibm_configure_bridge = rtas_token("ibm,configure-bridge");
ibm_configure_pe = rtas_token("ibm,configure-pe");
- if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
- return;
-
eeh_error_buf_size = rtas_token("rtas-error-log-max");
if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
eeh_error_buf_size = 1024;
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 5315350..02eab3b 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -422,7 +422,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
/* If all devices reported they can proceed, then re-enable MMIO */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
- rc = eeh_pci_enable(frozen_pdn, EEH_THAW_MMIO);
+ rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_MMIO);
if (rc < 0)
goto hard_fail;
@@ -436,7 +436,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
/* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
- rc = eeh_pci_enable(frozen_pdn, EEH_THAW_DMA);
+ rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_DMA);
if (rc < 0)
goto hard_fail;
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 1a9410a..c48a9e6 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -121,7 +121,44 @@ static int pseries_eeh_init(void)
*/
static int pseries_eeh_set_option(struct device_node *dn, int option)
{
- return 0;
+ int ret = 0;
+ struct pci_dn *pdn;
+ const u32 *reg;
+ int config_addr;
+
+ pdn = PCI_DN(dn);
+
+ /*
+ * When we're enabling or disabling EEH functioality on
+ * the particular PE, the PE config address is possibly
+ * unavailable. Therefore, we have to figure it out from
+ * the FDT node.
+ */
+ switch (option) {
+ case EEH_OPT_DISABLE:
+ case EEH_OPT_ENABLE:
+ reg = of_get_property(dn, "reg", NULL);
+ config_addr = reg[0];
+ break;
+
+ case EEH_OPT_THAW_MMIO:
+ case EEH_OPT_THAW_DMA:
+ config_addr = pdn->eeh_config_addr;
+ if (pdn->eeh_pe_config_addr)
+ config_addr = pdn->eeh_pe_config_addr;
+ break;
+
+ default:
+ pr_err("%s: Invalid option %d\n",
+ __func__, option);
+ return -EINVAL;
+ }
+
+ ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), option);
+
+ return ret;
}
/**
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 06/21] pSeries platform EEH PE address retrieval
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (4 preceding siblings ...)
2012-02-28 6:03 ` [PATCH 05/21] pSeries platform EEH operation Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:03 ` [PATCH 07/21] pSeries platform PE state retrieval Gavin Shan
` (16 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
There're 2 types of addresses used for EEH operations. The first
one would be BDF (Bus/Device/Function) address which is retrieved
from the reg property of the corresponding FDT node. Another one
is PE address that should be enquired from firmware through RTAS
call on pSeries platform. When issuing EEH operation, the PE address
has precedence over BDF address.
The patch implements retrieving PE address according to the given
BDF address on pSeries platform. Also, the struct eeh_early_enable_info
has been removed since the information can be figured out from
dn->pdn->phb->buid directly and that simplifies the code.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh.c | 67 +------------------------
arch/powerpc/platforms/pseries/eeh_pseries.c | 46 +++++++++++++++++-
2 files changed, 48 insertions(+), 65 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 70a9617..00797e0 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -91,8 +91,6 @@ static int ibm_set_slot_reset;
static int ibm_read_slot_reset_state;
static int ibm_read_slot_reset_state2;
static int ibm_slot_error_detail;
-static int ibm_get_config_addr_info;
-static int ibm_get_config_addr_info2;
static int ibm_configure_bridge;
static int ibm_configure_pe;
@@ -1048,56 +1046,6 @@ void eeh_configure_bridge(struct pci_dn *pdn)
}
}
-#define EEH_ENABLE 1
-
-struct eeh_early_enable_info {
- unsigned int buid_hi;
- unsigned int buid_lo;
-};
-
-/**
- * eeh_get_pe_addr - Retrieve PE address with given BDF address
- * @config_addr: BDF address
- * @info: BUID of the associated PHB
- *
- * There're 2 kinds of addresses existing in EEH core components:
- * BDF address and PE address. Besides, there has dedicated platform
- * dependent function call to retrieve the PE address according to
- * the given BDF address. Further more, we prefer PE address on BDF
- * address in EEH core components.
- */
-static int eeh_get_pe_addr(int config_addr,
- struct eeh_early_enable_info *info)
-{
- unsigned int rets[3];
- int ret;
-
- /* Use latest config-addr token on power6 */
- if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
- /* Make sure we have a PE in hand */
- ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- config_addr, info->buid_hi, info->buid_lo, 1);
- if (ret || (rets[0]==0))
- return 0;
-
- ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- config_addr, info->buid_hi, info->buid_lo, 0);
- if (ret)
- return 0;
- return rets[0];
- }
-
- /* Use older config-addr token on power5 */
- if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
- ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
- config_addr, info->buid_hi, info->buid_lo, 0);
- if (ret)
- return 0;
- return rets[0];
- }
- return 0;
-}
-
/**
* eeh_early_enable - Early enable EEH on the indicated device
* @dn: device node
@@ -1110,7 +1058,6 @@ static int eeh_get_pe_addr(int config_addr,
static void *eeh_early_enable(struct device_node *dn, void *data)
{
unsigned int rets[3];
- struct eeh_early_enable_info *info = data;
int ret;
const u32 *class_code = of_get_property(dn, "class-code", NULL);
const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
@@ -1155,7 +1102,7 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
/* If the newer, better, ibm,get-config-addr-info is supported,
* then use that instead.
*/
- pdn->eeh_pe_config_addr = eeh_get_pe_addr(pdn->eeh_config_addr, info);
+ pdn->eeh_pe_config_addr = eeh_ops->get_pe_addr(dn);
/* Some older systems (Power4) allow the
* ibm,set-eeh-option call to succeed even on nodes
@@ -1264,7 +1211,6 @@ int __exit eeh_ops_unregister(const char *name)
void __init eeh_init(void)
{
struct device_node *phb, *np;
- struct eeh_early_enable_info info;
int ret;
/* call platform initialization function */
@@ -1289,8 +1235,6 @@ void __init eeh_init(void)
ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
- ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
- ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
ibm_configure_bridge = rtas_token("ibm,configure-bridge");
ibm_configure_pe = rtas_token("ibm,configure-pe");
@@ -1313,9 +1257,7 @@ void __init eeh_init(void)
if (buid == 0 || PCI_DN(phb) == NULL)
continue;
- info.buid_lo = BUID_LO(buid);
- info.buid_hi = BUID_HI(buid);
- traverse_pci_devices(phb, eeh_early_enable, &info);
+ traverse_pci_devices(phb, eeh_early_enable, NULL);
}
if (eeh_subsystem_enabled)
@@ -1339,7 +1281,6 @@ void __init eeh_init(void)
static void eeh_add_device_early(struct device_node *dn)
{
struct pci_controller *phb;
- struct eeh_early_enable_info info;
if (!dn || !PCI_DN(dn))
return;
@@ -1349,9 +1290,7 @@ static void eeh_add_device_early(struct device_node *dn)
if (NULL == phb || 0 == phb->buid)
return;
- info.buid_hi = BUID_HI(phb->buid);
- info.buid_lo = BUID_LO(phb->buid);
- eeh_early_enable(dn, &info);
+ eeh_early_enable(dn, NULL);
}
/**
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index c48a9e6..2b9543a 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -176,7 +176,51 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
*/
static int pseries_eeh_get_pe_addr(struct device_node *dn)
{
- return 0;
+ struct pci_dn *pdn;
+ int ret = 0;
+ int rets[3];
+
+ pdn = PCI_DN(dn);
+
+ if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+ /*
+ * First of all, we need to make sure there has one PE
+ * associated with the device. Otherwise, PE address is
+ * meaningless.
+ */
+ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+ pdn->eeh_config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), 1);
+ if (ret || (rets[0] == 0))
+ return 0;
+
+ /* Retrieve the associated PE config address */
+ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+ pdn->eeh_config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), 0);
+ if (ret) {
+ pr_warning("%s: Failed to get PE address for %s\n",
+ __func__, dn->full_name);
+ return 0;
+ }
+
+ return rets[0];
+ }
+
+ if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+ ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+ pdn->eeh_config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), 0);
+ if (ret) {
+ pr_warning("%s: Failed to get PE address for %s\n",
+ __func__, dn->full_name);
+ return 0;
+ }
+
+ return rets[0];
+ }
+
+ return ret;
}
/**
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 07/21] pSeries platform PE state retrieval
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (5 preceding siblings ...)
2012-02-28 6:03 ` [PATCH 06/21] pSeries platform EEH PE address retrieval Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:03 ` [PATCH 08/21] pSeries platform EEH wait PE state Gavin Shan
` (15 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
On pSeries platform, there're 2 dedicated RTAS calls introduced to
retrieve the corresponding PE's state: ibm,read-slot-reset-state and
ibm,read-slot-reset-state2.
The patch implements the retrieval of PE's state according to the
given PE address. Besides, the implementation has been abstracted by
struct eeh_ops::get_state so that EEH core components could support
multiple platforms in future.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 8 ++
arch/powerpc/platforms/pseries/eeh.c | 96 ++++---------------------
arch/powerpc/platforms/pseries/eeh_driver.c | 2 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 70 ++++++++++++++++++-
4 files changed, 94 insertions(+), 82 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 76f7b3f..1d3c9e5 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -42,6 +42,14 @@ struct device_node;
#define EEH_OPT_ENABLE 1 /* EEH enable */
#define EEH_OPT_THAW_MMIO 2 /* MMIO enable */
#define EEH_OPT_THAW_DMA 3 /* DMA enable */
+#define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */
+#define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */
+#define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */
+#define EEH_STATE_MMIO_ACTIVE (1 << 3) /* Active MMIO */
+#define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */
+#define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */
+#define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */
+
struct eeh_ops {
char *name;
int (*init)(void);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 00797e0..8d11f1f 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -88,8 +88,6 @@
/* RTAS tokens */
static int ibm_set_slot_reset;
-static int ibm_read_slot_reset_state;
-static int ibm_read_slot_reset_state2;
static int ibm_slot_error_detail;
static int ibm_configure_bridge;
static int ibm_configure_pe;
@@ -289,37 +287,6 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
}
/**
- * eeh_read_slot_reset_state - Read the reset state of a device node's slot
- * @dn: device node to read
- * @rets: array to return results in
- *
- * Read the reset state of a device node's slot through platform dependent
- * function call.
- */
-static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
-{
- int token, outputs;
- int config_addr;
-
- if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
- token = ibm_read_slot_reset_state2;
- outputs = 4;
- } else {
- token = ibm_read_slot_reset_state;
- rets[2] = 0; /* fake PE Unavailable info */
- outputs = 3;
- }
-
- /* Use PE configuration address, if present */
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
-
- return rtas_call(token, 3, outputs, rets, config_addr,
- BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
-}
-
-/**
* eeh_wait_for_slot_status - Returns error status of slot
* @pdn: pci device node
* @max_wait_msecs: maximum number to millisecs to wait
@@ -335,21 +302,15 @@ static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
{
int rc;
- int rets[3];
int mwait;
while (1) {
- rc = eeh_read_slot_reset_state(pdn, rets);
- if (rc) return rc;
- if (rets[1] == 0) return -1; /* EEH is not supported */
-
- if (rets[0] != 5) return rets[0]; /* return actual status */
-
- if (rets[2] == 0) return -1; /* permanently unavailable */
+ rc = eeh_ops->get_state(pdn->node, &mwait);
+ if (rc != EEH_STATE_UNAVAILABLE)
+ return rc;
if (max_wait_msecs <= 0) break;
- mwait = rets[2];
if (mwait <= 0) {
printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
mwait);
@@ -522,7 +483,6 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag)
int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
{
int ret;
- int rets[3];
unsigned long flags;
struct pci_dn *pdn;
int rc = 0;
@@ -584,40 +544,18 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* function zero of a multi-function device.
* In any case they must share a common PHB.
*/
- ret = eeh_read_slot_reset_state(pdn, rets);
-
- /* If the call to firmware failed, punt */
- if (ret != 0) {
- printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n",
- ret, dn->full_name);
- false_positives++;
- pdn->eeh_false_positives ++;
- rc = 0;
- goto dn_unlock;
- }
+ ret = eeh_ops->get_state(pdn->node, NULL);
/* Note that config-io to empty slots may fail;
* they are empty when they don't have children.
+ * We will punt with the following conditions: Failure to get
+ * PE's state, EEH not support and Permanently unavailable
+ * state, PE is in good state.
*/
- if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
- false_positives++;
- pdn->eeh_false_positives ++;
- rc = 0;
- goto dn_unlock;
- }
-
- /* If EEH is not supported on this device, punt. */
- if (rets[1] != 1) {
- printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
- ret, dn->full_name);
- false_positives++;
- pdn->eeh_false_positives ++;
- rc = 0;
- goto dn_unlock;
- }
-
- /* If not the kind of error we know about, punt. */
- if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
+ if ((ret < 0) ||
+ (ret == EEH_STATE_NOT_SUPPORT) ||
+ (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
false_positives++;
pdn->eeh_false_positives ++;
rc = 0;
@@ -703,7 +641,8 @@ int eeh_pci_enable(struct pci_dn *pdn, int function)
function, rc, pdn->node->full_name);
rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
- if ((rc == 4) && (function == EEH_OPT_THAW_MMIO))
+ if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
+ (function == EEH_OPT_THAW_MMIO))
return 0;
return rc;
@@ -900,7 +839,7 @@ int eeh_reset_pe(struct pci_dn *pdn)
eeh_reset_pe_once(pdn);
rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
- if (rc == 0)
+ if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
return 0;
if (rc < 0) {
@@ -1057,7 +996,6 @@ void eeh_configure_bridge(struct pci_dn *pdn)
*/
static void *eeh_early_enable(struct device_node *dn, void *data)
{
- unsigned int rets[3];
int ret;
const u32 *class_code = of_get_property(dn, "class-code", NULL);
const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
@@ -1109,8 +1047,8 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
* where EEH is not supported. Verify support
* explicitly.
*/
- ret = eeh_read_slot_reset_state(pdn, rets);
- if ((ret == 0) && (rets[1] == 1))
+ ret = eeh_ops->get_state(pdn->node, NULL);
+ if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
enable = 1;
}
@@ -1232,8 +1170,6 @@ void __init eeh_init(void)
return;
ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
- ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
- ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
ibm_configure_bridge = rtas_token("ibm,configure-bridge");
ibm_configure_pe = rtas_token("ibm,configure-pe");
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 02eab3b..4c6e0c1c 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -397,7 +397,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
/* Get the current PCI slot state. This can take a long time,
* sometimes over 3 seconds for certain systems. */
rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0) {
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
printk(KERN_WARNING "EEH: Permanent failure\n");
goto hard_fail;
}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 2b9543a..39567b2 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -238,7 +238,75 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
*/
static int pseries_eeh_get_state(struct device_node *dn, int *state)
{
- return 0;
+ struct pci_dn *pdn;
+ int config_addr;
+ int ret;
+ int rets[4];
+ int result;
+
+ /* Figure out PE config address if possible */
+ pdn = PCI_DN(dn);
+ config_addr = pdn->eeh_config_addr;
+ if (pdn->eeh_pe_config_addr)
+ config_addr = pdn->eeh_pe_config_addr;
+
+ if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
+ ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid));
+ } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
+ /* Fake PE unavailable info */
+ rets[2] = 0;
+ ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid));
+ } else {
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ if (ret)
+ return ret;
+
+ /* Parse the result out */
+ result = 0;
+ if (rets[1]) {
+ switch(rets[0]) {
+ case 0:
+ result &= ~EEH_STATE_RESET_ACTIVE;
+ result |= EEH_STATE_MMIO_ACTIVE;
+ result |= EEH_STATE_DMA_ACTIVE;
+ break;
+ case 1:
+ result |= EEH_STATE_RESET_ACTIVE;
+ result |= EEH_STATE_MMIO_ACTIVE;
+ result |= EEH_STATE_DMA_ACTIVE;
+ break;
+ case 2:
+ result &= ~EEH_STATE_RESET_ACTIVE;
+ result &= ~EEH_STATE_MMIO_ACTIVE;
+ result &= ~EEH_STATE_DMA_ACTIVE;
+ break;
+ case 4:
+ result &= ~EEH_STATE_RESET_ACTIVE;
+ result &= ~EEH_STATE_MMIO_ACTIVE;
+ result &= ~EEH_STATE_DMA_ACTIVE;
+ result |= EEH_STATE_MMIO_ENABLED;
+ break;
+ case 5:
+ if (rets[2]) {
+ if (state) *state = rets[2];
+ result = EEH_STATE_UNAVAILABLE;
+ } else {
+ result = EEH_STATE_NOT_SUPPORT;
+ }
+ default:
+ result = EEH_STATE_NOT_SUPPORT;
+ }
+ } else {
+ result = EEH_STATE_NOT_SUPPORT;
+ }
+
+ return result;
}
/**
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 08/21] pSeries platform EEH wait PE state
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (6 preceding siblings ...)
2012-02-28 6:03 ` [PATCH 07/21] pSeries platform PE state retrieval Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:03 ` [PATCH 09/21] pSeries platform EEH reset PE Gavin Shan
` (14 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
On pSeries platform, the PE state might be temporarily unavailable.
In that case, the firmware will return the corresponding wait time.
That means the kernel has to wait for appropriate time in order to
get the PE state.
The patch does the implementation for that. Besides, the function
has been abstracted through struct eeh_ops::wait_state so that EEH core
components could support multiple platforms in future.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/ppc-pci.h | 1 -
arch/powerpc/platforms/pseries/eeh.c | 46 +------------------------
arch/powerpc/platforms/pseries/eeh_driver.c | 2 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 47 +++++++++++++++++++++++++-
4 files changed, 49 insertions(+), 47 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 6150349..1cfb2b0 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -58,7 +58,6 @@ struct pci_dev *pci_get_device_by_addr(unsigned long addr);
void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
int eeh_pci_enable(struct pci_dn *pdn, int function);
int eeh_reset_pe(struct pci_dn *);
-int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
void eeh_restore_bars(struct pci_dn *);
void eeh_configure_bridge(struct pci_dn *);
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 8d11f1f..b5b03d4 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -287,48 +287,6 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
}
/**
- * eeh_wait_for_slot_status - Returns error status of slot
- * @pdn: pci device node
- * @max_wait_msecs: maximum number to millisecs to wait
- *
- * Return negative value if a permanent error, else return
- * Partition Endpoint (PE) status value.
- *
- * If @max_wait_msecs is positive, then this routine will
- * sleep until a valid status can be obtained, or until
- * the max allowed wait time is exceeded, in which case
- * a -2 is returned.
- */
-int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
-{
- int rc;
- int mwait;
-
- while (1) {
- rc = eeh_ops->get_state(pdn->node, &mwait);
- if (rc != EEH_STATE_UNAVAILABLE)
- return rc;
-
- if (max_wait_msecs <= 0) break;
-
- if (mwait <= 0) {
- printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
- mwait);
- mwait = 1000;
- } else if (mwait > 300*1000) {
- printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n",
- mwait);
- mwait = 300*1000;
- }
- max_wait_msecs -= mwait;
- msleep(mwait);
- }
-
- printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
- return -2;
-}
-
-/**
* eeh_token_to_phys - Convert EEH address token to phys address
* @token: I/O token, should be address in the form 0xA....
*
@@ -640,7 +598,7 @@ int eeh_pci_enable(struct pci_dn *pdn, int function)
printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
function, rc, pdn->node->full_name);
- rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC);
if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
(function == EEH_OPT_THAW_MMIO))
return 0;
@@ -838,7 +796,7 @@ int eeh_reset_pe(struct pci_dn *pdn)
for (i=0; i<3; i++) {
eeh_reset_pe_once(pdn);
- rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC);
if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
return 0;
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 4c6e0c1c..584defe 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -396,7 +396,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
/* Get the current PCI slot state. This can take a long time,
* sometimes over 3 seconds for certain systems. */
- rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000);
+ rc = eeh_ops->wait_state(frozen_pdn->node, MAX_WAIT_FOR_RECOVERY*1000);
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
printk(KERN_WARNING "EEH: Permanent failure\n");
goto hard_fail;
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 39567b2..7b60131 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -331,7 +331,52 @@ static int pseries_eeh_reset(struct device_node *dn, int option)
*/
static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
{
- return 0;
+ int ret;
+ int mwait;
+
+ /*
+ * According to PAPR, the state of PE might be temporarily
+ * unavailable. Under the circumstance, we have to wait
+ * for indicated time determined by firmware. The maximal
+ * wait time is 5 minutes, which is acquired from the original
+ * EEH implementation. Also, the original implementation
+ * also defined the minimal wait time as 1 second.
+ */
+#define EEH_STATE_MIN_WAIT_TIME (1000)
+#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
+
+ while (1) {
+ ret = pseries_eeh_get_state(dn, &mwait);
+
+ /*
+ * If the PE's state is temporarily unavailable,
+ * we have to wait for the specified time. Otherwise,
+ * the PE's state will be returned immediately.
+ */
+ if (ret != EEH_STATE_UNAVAILABLE)
+ return ret;
+
+ if (max_wait <= 0) {
+ pr_warning("%s: Timeout when getting PE's state (%d)\n",
+ __func__, max_wait);
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ if (mwait <= 0) {
+ pr_warning("%s: Firmware returned bad wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MIN_WAIT_TIME;
+ } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+ pr_warning("%s: Firmware returned too long wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MAX_WAIT_TIME;
+ }
+
+ max_wait -= mwait;
+ msleep(mwait);
+ }
+
+ return EEH_STATE_NOT_SUPPORT;
}
/**
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 09/21] pSeries platform EEH reset PE
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (7 preceding siblings ...)
2012-02-28 6:03 ` [PATCH 08/21] pSeries platform EEH wait PE state Gavin Shan
@ 2012-02-28 6:03 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 10/21] pSeries platform EEH error log retrieval Gavin Shan
` (13 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:03 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
On RTAS compliant pSeries platform, there is a dedicated RTAS call
(ibm,set-slot-reset) to reset the specified PE. Furthermore, two
types of resets are supported: hot and fundamental. the type of
reset is to be used actually depends on the included PCI device's
requirements.
The patch implements resetting PE on pSeries platform through RTAS
call. Besides, it has been abstracted through struct eeh_ops::reset
so that EEH core components could support multiple platforms in future.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 3 +
arch/powerpc/platforms/pseries/eeh.c | 63 +++-----------------------
arch/powerpc/platforms/pseries/eeh_pseries.c | 25 ++++++++++-
3 files changed, 33 insertions(+), 58 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 1d3c9e5..894ea6c 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -49,6 +49,9 @@ struct device_node;
#define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */
#define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */
#define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */
+#define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */
+#define EEH_RESET_HOT 1 /* Hot reset */
+#define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */
struct eeh_ops {
char *name;
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index b5b03d4..4f329f5 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -87,7 +87,6 @@
#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
/* RTAS tokens */
-static int ibm_set_slot_reset;
static int ibm_slot_error_detail;
static int ibm_configure_bridge;
static int ibm_configure_pe;
@@ -607,54 +606,6 @@ int eeh_pci_enable(struct pci_dn *pdn, int function)
}
/**
- * eeh_slot_reset - Raises/Lowers the pci #RST line
- * @pdn: pci device node
- * @state: 1/0 to raise/lower the #RST
- *
- * Clear the EEH-frozen condition on a slot. This routine
- * asserts the PCI #RST line if the 'state' argument is '1',
- * and drops the #RST line if 'state is '0'. This routine is
- * safe to call in an interrupt context.
- */
-static void eeh_slot_reset(struct pci_dn *pdn, int state)
-{
- int config_addr;
- int rc;
-
- BUG_ON(pdn==NULL);
-
- if (!pdn->phb) {
- printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
- pdn->node->full_name);
- return;
- }
-
- /* Use PE configuration address, if present */
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
-
- rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
- config_addr,
- BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid),
- state);
-
- /* Fundamental-reset not supported on this PE, try hot-reset */
- if (rc == -8 && state == 3) {
- rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
- config_addr,
- BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid), 1);
- if (rc)
- printk(KERN_WARNING
- "EEH: Unable to reset the failed slot,"
- " #RST=%d dn=%s\n",
- rc, pdn->node->full_name);
- }
-}
-
-/**
* pcibios_set_pcie_slot_reset - Set PCI-E reset state
* @dev: pci device struct
* @state: reset state to enter
@@ -665,17 +616,16 @@ static void eeh_slot_reset(struct pci_dn *pdn, int state)
int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
{
struct device_node *dn = pci_device_to_OF_node(dev);
- struct pci_dn *pdn = PCI_DN(dn);
switch (state) {
case pcie_deassert_reset:
- eeh_slot_reset(pdn, 0);
+ eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
break;
case pcie_hot_reset:
- eeh_slot_reset(pdn, 1);
+ eeh_ops->reset(dn, EEH_RESET_HOT);
break;
case pcie_warm_reset:
- eeh_slot_reset(pdn, 3);
+ eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
break;
default:
return -EINVAL;
@@ -754,9 +704,9 @@ static void eeh_reset_pe_once(struct pci_dn *pdn)
eeh_set_pe_freset(pdn->node, &freset);
if (freset)
- eeh_slot_reset(pdn, 3);
+ eeh_ops->reset(pdn->node, EEH_RESET_FUNDAMENTAL);
else
- eeh_slot_reset(pdn, 1);
+ eeh_ops->reset(pdn->node, EEH_RESET_HOT);
/* The PCI bus requires that the reset be held high for at least
* a 100 milliseconds. We wait a bit longer 'just in case'.
@@ -770,7 +720,7 @@ static void eeh_reset_pe_once(struct pci_dn *pdn)
*/
eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
- eeh_slot_reset(pdn, 0);
+ eeh_ops->reset(pdn->node, EEH_RESET_DEACTIVATE);
/* After a PCI slot has been reset, the PCI Express spec requires
* a 1.5 second idle time for the bus to stabilize, before starting
@@ -1127,7 +1077,6 @@ void __init eeh_init(void)
if (np == NULL)
return;
- ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
ibm_configure_bridge = rtas_token("ibm,configure-bridge");
ibm_configure_pe = rtas_token("ibm,configure-pe");
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 7b60131..6643e06 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -318,7 +318,30 @@ static int pseries_eeh_get_state(struct device_node *dn, int *state)
*/
static int pseries_eeh_reset(struct device_node *dn, int option)
{
- return 0;
+ struct pci_dn *pdn;
+ int config_addr;
+ int ret;
+
+ /* Figure out PE address */
+ pdn = PCI_DN(dn);
+ config_addr = pdn->eeh_config_addr;
+ if (pdn->eeh_pe_config_addr)
+ config_addr = pdn->eeh_pe_config_addr;
+
+ /* Reset PE through RTAS call */
+ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), option);
+
+ /* If fundamental-reset not supported, try hot-reset */
+ if (option == EEH_RESET_FUNDAMENTAL &&
+ ret == -8) {
+ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), EEH_RESET_HOT);
+ }
+
+ return ret;
}
/**
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 10/21] pSeries platform EEH error log retrieval
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (8 preceding siblings ...)
2012-02-28 6:03 ` [PATCH 09/21] pSeries platform EEH reset PE Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 11/21] pSeries platform EEH configure bridge Gavin Shan
` (12 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
On RTAS compliant pSeries platform, one dedicated RTAS call has
been introduced to retrieve EEH temporary or permanent error log.
The patch implements the function of retriving EEH error log through
RTAS call. Besides, it has been abstracted by struct eeh_ops::get_log
so that EEH core components could support multiple platforms in future.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 2 +
arch/powerpc/include/asm/ppc-pci.h | 2 -
arch/powerpc/platforms/pseries/eeh.c | 63 +-------------------------
arch/powerpc/platforms/pseries/eeh_driver.c | 4 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 47 +++++++++++++++++++-
5 files changed, 51 insertions(+), 67 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 894ea6c..ad8f318 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -52,6 +52,8 @@ struct device_node;
#define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */
#define EEH_RESET_HOT 1 /* Hot reset */
#define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */
+#define EEH_LOG_TEMP 1 /* EEH temporary error log */
+#define EEH_LOG_PERM 2 /* EEH permanent error log */
struct eeh_ops {
char *name;
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 1cfb2b0..bd1a84f 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -53,8 +53,6 @@ void pci_addr_cache_insert_device(struct pci_dev *dev);
void pci_addr_cache_remove_device(struct pci_dev *dev);
void pci_addr_cache_build(void);
struct pci_dev *pci_get_device_by_addr(unsigned long addr);
-#define EEH_LOG_TEMP_FAILURE 1
-#define EEH_LOG_PERM_FAILURE 2
void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
int eeh_pci_enable(struct pci_dn *pdn, int function);
int eeh_reset_pe(struct pci_dn *);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 4f329f5..39fcecb 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -87,7 +87,6 @@
#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
/* RTAS tokens */
-static int ibm_slot_error_detail;
static int ibm_configure_bridge;
static int ibm_configure_pe;
@@ -100,14 +99,6 @@ EXPORT_SYMBOL(eeh_subsystem_enabled);
/* Lock to avoid races due to multiple reports of an error */
static DEFINE_RAW_SPINLOCK(confirm_error_lock);
-/* Buffer for reporting slot-error-detail rtas calls. Its here
- * in BSS, and not dynamically alloced, so that it ends up in
- * RMO where RTAS can access it.
- */
-static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
-static DEFINE_SPINLOCK(slot_errbuf_lock);
-static int eeh_error_buf_size;
-
/* Buffer for reporting pci register dumps. Its here in BSS, and
* not dynamically alloced, so that it ends up in RMO where RTAS
* can access it.
@@ -127,46 +118,6 @@ static unsigned long slot_resets;
#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
/**
- * eeh_rtas_slot_error_detail - Retrieve error log through RTAS call
- * @pdn: device node
- * @severity: temporary or permanent error log
- * @driver_log: driver log to be combined with the retrieved error log
- * @loglen: length of driver log
- *
- * This routine should be called to retrieve error log through the dedicated
- * RTAS call.
- */
-static void eeh_rtas_slot_error_detail(struct pci_dn *pdn, int severity,
- char *driver_log, size_t loglen)
-{
- int config_addr;
- unsigned long flags;
- int rc;
-
- /* Log the error with the rtas logger */
- spin_lock_irqsave(&slot_errbuf_lock, flags);
- memset(slot_errbuf, 0, eeh_error_buf_size);
-
- /* Use PE configuration address, if present */
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
-
- rc = rtas_call(ibm_slot_error_detail,
- 8, 1, NULL, config_addr,
- BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid),
- virt_to_phys(driver_log), loglen,
- virt_to_phys(slot_errbuf),
- eeh_error_buf_size,
- severity);
-
- if (rc == 0)
- log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
- spin_unlock_irqrestore(&slot_errbuf_lock, flags);
-}
-
-/**
* eeh_gather_pci_data - Copy assorted PCI config space registers to buff
* @pdn: device to report data for
* @buf: point to buffer in which to log
@@ -282,7 +233,7 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
eeh_restore_bars(pdn);
loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
- eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
+ eeh_ops->get_log(pdn->node, severity, pci_regs_buf, loglen);
}
/**
@@ -1071,26 +1022,14 @@ void __init eeh_init(void)
}
raw_spin_lock_init(&confirm_error_lock);
- spin_lock_init(&slot_errbuf_lock);
np = of_find_node_by_path("/rtas");
if (np == NULL)
return;
- ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
ibm_configure_bridge = rtas_token("ibm,configure-bridge");
ibm_configure_pe = rtas_token("ibm,configure-pe");
- eeh_error_buf_size = rtas_token("rtas-error-log-max");
- if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
- eeh_error_buf_size = 1024;
- }
- if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
- printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
- "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
- eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
- }
-
/* Enable EEH for all adapters. Note that eeh requires buid's */
for (phb = of_find_node_by_name(NULL, "pci"); phb;
phb = of_find_node_by_name(phb, "pci")) {
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 584defe..6840357 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -406,7 +406,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
* don't post the error log until after all dev drivers
* have been informed.
*/
- eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE);
+ eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP);
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
@@ -497,7 +497,7 @@ hard_fail:
location, drv_str, pci_str);
perm_error:
- eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE);
+ eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM);
/* Notify all devices that they're about to go down. */
pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 6643e06..7c8434f 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -56,6 +56,15 @@ static int ibm_get_config_addr_info2;
static int ibm_configure_bridge;
static int ibm_configure_pe;
+/*
+ * Buffer for reporting slot-error-detail rtas calls. Its here
+ * in BSS, and not dynamically alloced, so that it ends up in
+ * RMO where RTAS can access it.
+ */
+static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(slot_errbuf_lock);
+static int eeh_error_buf_size;
+
/**
* pseries_eeh_init - EEH platform dependent initialization
*
@@ -107,6 +116,19 @@ static int pseries_eeh_init(void)
return -EINVAL;
}
+ /* Initialize error log lock and size */
+ spin_lock_init(&slot_errbuf_lock);
+ eeh_error_buf_size = rtas_token("rtas-error-log-max");
+ if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("%s: unknown EEH error log size\n",
+ __func__);
+ eeh_error_buf_size = 1024;
+ } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+ pr_warning("%s: EEH error log size %d exceeds the maximal %d\n",
+ __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+ eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+ }
+
return 0;
}
@@ -415,7 +437,30 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
*/
static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
{
- return 0;
+ struct pci_dn *pdn;
+ int config_addr;
+ unsigned long flags;
+ int ret;
+
+ pdn = PCI_DN(dn);
+ spin_lock_irqsave(&slot_errbuf_lock, flags);
+ memset(slot_errbuf, 0, eeh_error_buf_size);
+
+ /* Figure out the PE address */
+ config_addr = pdn->eeh_config_addr;
+ if (pdn->eeh_pe_config_addr)
+ config_addr = pdn->eeh_pe_config_addr;
+
+ ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
+ BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid),
+ virt_to_phys(drv_log), len,
+ virt_to_phys(slot_errbuf), eeh_error_buf_size,
+ severity);
+ if (!ret)
+ log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
+ spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+
+ return ret;
}
/**
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 11/21] pSeries platform EEH configure bridge
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (9 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 10/21] pSeries platform EEH error log retrieval Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 12/21] Cleanup on comments of EEH aux components Gavin Shan
` (11 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
In order to enable particular PCI device, which has been included
in the parent PE. The involved PCI bridges should be enabled explicitly
if there has. On pSeries platform, there're dedicated RTAS calls
to fulfil the purpose.
The patch implements the function of configuring PCI bridges through
the dedicated RTAS calls. Besides, the function has been abstracted
by struct eeh_ops::configure_bridge so that the EEH core components
could support multiple platforms in future.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/ppc-pci.h | 1 -
arch/powerpc/platforms/pseries/eeh.c | 44 +-------------------------
arch/powerpc/platforms/pseries/eeh_driver.c | 2 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 29 ++++++++++++++++-
4 files changed, 30 insertions(+), 46 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index bd1a84f..b4b18d8 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -57,7 +57,6 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
int eeh_pci_enable(struct pci_dn *pdn, int function);
int eeh_reset_pe(struct pci_dn *);
void eeh_restore_bars(struct pci_dn *);
-void eeh_configure_bridge(struct pci_dn *);
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_mark_slot(struct device_node *dn, int mode_flag);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 39fcecb..bd4ed83 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -86,10 +86,6 @@
/* Time to wait for a PCI slot to report status, in milliseconds */
#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
-/* RTAS tokens */
-static int ibm_configure_bridge;
-static int ibm_configure_pe;
-
/* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL;
@@ -229,7 +225,7 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
pci_regs_buf[0] = 0;
eeh_pci_enable(pdn, EEH_OPT_THAW_MMIO);
- eeh_configure_bridge(pdn);
+ eeh_ops->configure_bridge(pdn->node);
eeh_restore_bars(pdn);
loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
@@ -810,41 +806,6 @@ static void eeh_save_bars(struct pci_dn *pdn)
}
/**
- * eeh_configure_bridge - Configure PCI bridges for the indicated PE
- * @pdn: PCI device node
- *
- * PCI bridges might be included in PE. In order to make the PE work
- * again. The included PCI bridges should be recovered after the PE
- * encounters frozen state.
- */
-void eeh_configure_bridge(struct pci_dn *pdn)
-{
- int config_addr;
- int rc;
- int token;
-
- /* Use PE configuration address, if present */
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
-
- /* Use new configure-pe function, if supported */
- if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
- token = ibm_configure_pe;
- else
- token = ibm_configure_bridge;
-
- rc = rtas_call(token, 3, 1, NULL,
- config_addr,
- BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid));
- if (rc) {
- printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
- rc, pdn->node->full_name);
- }
-}
-
-/**
* eeh_early_enable - Early enable EEH on the indicated device
* @dn: device node
* @data: BUID
@@ -1027,9 +988,6 @@ void __init eeh_init(void)
if (np == NULL)
return;
- ibm_configure_bridge = rtas_token("ibm,configure-bridge");
- ibm_configure_pe = rtas_token("ibm,configure-pe");
-
/* Enable EEH for all adapters. Note that eeh requires buid's */
for (phb = of_find_node_by_name(NULL, "pci"); phb;
phb = of_find_node_by_name(phb, "pci")) {
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 6840357..61450e1 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -295,7 +295,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
struct pci_dn *ppe = PCI_DN(dn);
/* On Power4, always true because eeh_pe_config_addr=0 */
if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
- eeh_configure_bridge(ppe);
+ eeh_ops->configure_bridge(dn);
eeh_restore_bars(ppe);
}
dn = dn->sibling;
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 7c8434f..4ed06b2 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -473,7 +473,34 @@ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_l
*/
static int pseries_eeh_configure_bridge(struct device_node *dn)
{
- return 0;
+ struct pci_dn *pdn;
+ int config_addr;
+ int ret;
+
+ /* Figure out the PE address */
+ pdn = PCI_DN(dn);
+ config_addr = pdn->eeh_config_addr;
+ if (pdn->eeh_pe_config_addr)
+ config_addr = pdn->eeh_pe_config_addr;
+
+ /* Use new configure-pe function, if supported */
+ if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
+ ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid));
+ } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
+ ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
+ config_addr, BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid));
+ } else {
+ return -EFAULT;
+ }
+
+ if (ret)
+ pr_warning("%s: Unable to configure bridge %d for %s\n",
+ __func__, ret, dn->full_name);
+
+ return ret;
}
static struct eeh_ops pseries_eeh_ops = {
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 12/21] Cleanup on comments of EEH aux components
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (10 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 11/21] pSeries platform EEH configure bridge Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 13/21] Cleanup on function names " Gavin Shan
` (10 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
There're several EEH aux components and the patch does some cleanup
for them so that they look more clean.
* Duplicated comments have been removed from the header file.
* Comments have been reorganized so that it looks more clean.
* The leading comments of functions are adjusted for a little
bit so that the result of "make pdfdocs" would be more
unified.
* Function calls "xxx ()" has been replaced by "xxx()".
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh_event.h | 34 ++-----
arch/powerpc/platforms/pseries/eeh_cache.c | 9 +-
arch/powerpc/platforms/pseries/eeh_driver.c | 136 ++++++++++++++++-----------
arch/powerpc/platforms/pseries/eeh_event.c | 23 ++---
arch/powerpc/platforms/pseries/eeh_sysfs.c | 2 +-
5 files changed, 107 insertions(+), 97 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index cc3cb04..25ebf6a 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -1,6 +1,4 @@
/*
- * eeh_event.h
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -22,32 +20,20 @@
#define ASM_POWERPC_EEH_EVENT_H
#ifdef __KERNEL__
-/** EEH event -- structure holding pci controller data that describes
- * a change in the isolation status of a PCI slot. A pointer
- * to this struct is passed as the data pointer in a notify callback.
+/*
+ * structure holding pci controller data that describes a
+ * change in the isolation status of a PCI slot. A pointer
+ * to this struct is passed as the data pointer in a notify
+ * callback.
*/
struct eeh_event {
- struct list_head list;
- struct device_node *dn; /* struct device node */
- struct pci_dev *dev; /* affected device */
+ struct list_head list; /* to form event queue */
+ struct device_node *dn; /* struct device node */
+ struct pci_dev *dev; /* affected device */
};
-/**
- * eeh_send_failure_event - generate a PCI error event
- * @dev pci device
- *
- * This routine builds a PCI error event which will be delivered
- * to all listeners on the eeh_notifier_chain.
- *
- * This routine can be called within an interrupt context;
- * the actual event will be delivered in a normal context
- * (from a workqueue).
- */
-int eeh_send_failure_event (struct device_node *dn,
- struct pci_dev *dev);
-
-/* Main recovery function */
-struct pci_dn * handle_eeh_events (struct eeh_event *);
+int eeh_send_failure_event(struct device_node *dn, struct pci_dev *dev);
+struct pci_dn *handle_eeh_events(struct eeh_event *);
#endif /* __KERNEL__ */
#endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index fc5ae76..850c00c 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -1,5 +1,4 @@
/*
- * eeh_cache.c
* PCI address cache; allows the lookup of PCI devices based on I/O address
*
* Copyright IBM Corporation 2004
@@ -47,8 +46,7 @@
* than any hash algo I could think of for this problem, even
* with the penalty of slow pointer chases for d-cache misses).
*/
-struct pci_io_addr_range
-{
+struct pci_io_addr_range {
struct rb_node rb_node;
unsigned long addr_lo;
unsigned long addr_hi;
@@ -56,8 +54,7 @@ struct pci_io_addr_range
unsigned int flags;
};
-static struct pci_io_addr_cache
-{
+static struct pci_io_addr_cache {
struct rb_root rb_root;
spinlock_t piar_lock;
} pci_io_addr_cache_root;
@@ -166,7 +163,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
#ifdef DEBUG
printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
- alo, ahi, pci_name (dev));
+ alo, ahi, pci_name(dev));
#endif
rb_link_node(&piar->rb_node, parent, p);
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 61450e1..3f25fab 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -33,8 +33,14 @@
#include <asm/prom.h>
#include <asm/rtas.h>
-
-static inline const char * pcid_name (struct pci_dev *pdev)
+/**
+ * eeh_pcid_name - Retrieve name of PCI device driver
+ * @pdev: PCI device
+ *
+ * This routine is used to retrieve the name of PCI device driver
+ * if that's valid.
+ */
+static inline const char *pcid_name(struct pci_dev *pdev)
{
if (pdev && pdev->dev.driver)
return pdev->dev.driver->name;
@@ -64,7 +70,14 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent)
#endif
/**
- * eeh_disable_irq - disable interrupt for the recovering device
+ * eeh_disable_irq - Disable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called when reporting temporary or permanent
+ * error to the particular PCI device to disable interrupt of that
+ * device. If the device has enabled MSI or MSI-X interrupt, we needn't
+ * do real work because EEH should freeze DMA transfers for those PCI
+ * devices encountering EEH errors, which includes MSI or MSI-X.
*/
static void eeh_disable_irq(struct pci_dev *dev)
{
@@ -73,7 +86,7 @@ static void eeh_disable_irq(struct pci_dev *dev)
/* Don't disable MSI and MSI-X interrupts. They are
* effectively disabled by the DMA Stopped state
* when an EEH error occurs.
- */
+ */
if (dev->msi_enabled || dev->msix_enabled)
return;
@@ -85,7 +98,11 @@ static void eeh_disable_irq(struct pci_dev *dev)
}
/**
- * eeh_enable_irq - enable interrupt for the recovering device
+ * eeh_enable_irq - Enable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called to enable interrupt while failed
+ * device could be resumed.
*/
static void eeh_enable_irq(struct pci_dev *dev)
{
@@ -97,15 +114,15 @@ static void eeh_enable_irq(struct pci_dev *dev)
}
}
-/* ------------------------------------------------------- */
/**
- * eeh_report_error - report pci error to each device driver
+ * eeh_report_error - Report pci error to each device driver
+ * @dev: PCI device
+ * @userdata: return value
*
* Report an EEH error to each device driver, collect up and
* merge the device driver responses. Cumulative response
* passed back in "userdata".
*/
-
static int eeh_report_error(struct pci_dev *dev, void *userdata)
{
enum pci_ers_result rc, *res = userdata;
@@ -122,7 +139,7 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
!driver->err_handler->error_detected)
return 0;
- rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
+ rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
/* A driver that needs a reset trumps all others */
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
@@ -132,13 +149,14 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
}
/**
- * eeh_report_mmio_enabled - tell drivers that MMIO has been enabled
+ * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
+ * @dev: PCI device
+ * @userdata: return value
*
* Tells each device driver that IO ports, MMIO and config space I/O
* are now enabled. Collects up and merges the device driver responses.
* Cumulative response passed back in "userdata".
*/
-
static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
{
enum pci_ers_result rc, *res = userdata;
@@ -149,7 +167,7 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
!driver->err_handler->mmio_enabled)
return 0;
- rc = driver->err_handler->mmio_enabled (dev);
+ rc = driver->err_handler->mmio_enabled(dev);
/* A driver that needs a reset trumps all others */
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
@@ -159,9 +177,15 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
}
/**
- * eeh_report_reset - tell device that slot has been reset
+ * eeh_report_reset - Tell device that slot has been reset
+ * @dev: PCI device
+ * @userdata: return value
+ *
+ * This routine must be called while EEH tries to reset particular
+ * PCI device so that the associated PCI device driver could take
+ * some actions, usually to save data the driver needs so that the
+ * driver can work again while the device is recovered.
*/
-
static int eeh_report_reset(struct pci_dev *dev, void *userdata)
{
enum pci_ers_result rc, *res = userdata;
@@ -188,9 +212,14 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
}
/**
- * eeh_report_resume - tell device to resume normal operations
+ * eeh_report_resume - Tell device to resume normal operations
+ * @dev: PCI device
+ * @userdata: return value
+ *
+ * This routine must be called to notify the device driver that it
+ * could resume so that the device driver can do some initialization
+ * to make the recovered device work again.
*/
-
static int eeh_report_resume(struct pci_dev *dev, void *userdata)
{
struct pci_driver *driver = dev->driver;
@@ -212,12 +241,13 @@ static int eeh_report_resume(struct pci_dev *dev, void *userdata)
}
/**
- * eeh_report_failure - tell device driver that device is dead.
+ * eeh_report_failure - Tell device driver that device is dead.
+ * @dev: PCI device
+ * @userdata: return value
*
* This informs the device driver that the device is permanently
* dead, and that no further recovery attempts will be made on it.
*/
-
static int eeh_report_failure(struct pci_dev *dev, void *userdata)
{
struct pci_driver *driver = dev->driver;
@@ -238,37 +268,16 @@ static int eeh_report_failure(struct pci_dev *dev, void *userdata)
return 0;
}
-/* ------------------------------------------------------- */
/**
- * handle_eeh_events -- reset a PCI device after hard lockup.
- *
- * pSeries systems will isolate a PCI slot if the PCI-Host
- * bridge detects address or data parity errors, DMA's
- * occurring to wild addresses (which usually happen due to
- * bugs in device drivers or in PCI adapter firmware).
- * Slot isolations also occur if #SERR, #PERR or other misc
- * PCI-related errors are detected.
+ * eeh_reset_device - Perform actual reset of a pci slot
+ * @pe_dn: PE associated device node
+ * @bus: PCI bus corresponding to the isolcated slot
*
- * Recovery process consists of unplugging the device driver
- * (which generated hotplug events to userspace), then issuing
- * a PCI #RST to the device, then reconfiguring the PCI config
- * space for all bridges & devices under this slot, and then
- * finally restarting the device drivers (which cause a second
- * set of hotplug events to go out to userspace).
+ * This routine must be called to do reset on the indicated PE.
+ * During the reset, udev might be invoked because those affected
+ * PCI devices will be removed and then added.
*/
-
-/**
- * eeh_reset_device() -- perform actual reset of a pci slot
- * @bus: pointer to the pci bus structure corresponding
- * to the isolated slot. A non-null value will
- * cause all devices under the bus to be removed
- * and then re-added.
- * @pe_dn: pointer to a "Partionable Endpoint" device node.
- * This is the top-level structure on which pci
- * bus resets can be performed.
- */
-
-static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
+static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus)
{
struct device_node *dn;
int cnt, rc;
@@ -281,12 +290,13 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
/* Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
- * up if the reset failed for some reason. */
+ * up if the reset failed for some reason.
+ */
rc = eeh_reset_pe(pe_dn);
if (rc)
return rc;
- /* Walk over all functions on this device. */
+ /* Walk over all functions on this device. */
dn = pe_dn->node;
if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
dn = dn->parent->child;
@@ -308,7 +318,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
* potentially weird things happen.
*/
if (bus) {
- ssleep (5);
+ ssleep(5);
pcibios_add_pci_devices(bus);
}
pe_dn->eeh_freeze_count = cnt;
@@ -321,7 +331,24 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
*/
#define MAX_WAIT_FOR_RECOVERY 150
-struct pci_dn * handle_eeh_events (struct eeh_event *event)
+/**
+ * eeh_handle_event - Reset a PCI device after hard lockup.
+ * @event: EEH event
+ *
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
+ */
+struct pci_dn *handle_eeh_events(struct eeh_event *event)
{
struct device_node *frozen_dn;
struct pci_dn *frozen_pdn;
@@ -350,9 +377,10 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
* which was always an EADS pci bridge. In the new style,
* there might not be any EADS bridges, and even when there are,
* the firmware marks them as "EEH incapable". So another
- * two-step is needed to find the pci bus.. */
+ * two-step is needed to find the pci bus..
+ */
if (!frozen_bus)
- frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);
+ frozen_bus = pcibios_find_pci_bus(frozen_dn->parent);
if (!frozen_bus) {
printk(KERN_ERR "EEH: Cannot find PCI bus "
@@ -395,7 +423,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
pci_walk_bus(frozen_bus, eeh_report_error, &result);
/* Get the current PCI slot state. This can take a long time,
- * sometimes over 3 seconds for certain systems. */
+ * sometimes over 3 seconds for certain systems.
+ */
rc = eeh_ops->wait_state(frozen_pdn->node, MAX_WAIT_FOR_RECOVERY*1000);
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
printk(KERN_WARNING "EEH: Permanent failure\n");
@@ -508,4 +537,3 @@ perm_error:
return NULL;
}
-/* ---------- end of file ---------- */
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index d2383cf..e98347c 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -1,6 +1,4 @@
/*
- * eeh_event.c
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -46,7 +44,7 @@ DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
DEFINE_MUTEX(eeh_event_mutex);
/**
- * eeh_event_handler - dispatch EEH events.
+ * eeh_event_handler - Dispatch EEH events.
* @dummy - unused
*
* The detection of a frozen slot can occur inside an interrupt,
@@ -61,7 +59,7 @@ static int eeh_event_handler(void * dummy)
struct eeh_event *event;
struct pci_dn *pdn;
- daemonize ("eehd");
+ daemonize("eehd");
set_current_state(TASK_INTERRUPTIBLE);
spin_lock_irqsave(&eeh_eventlist_lock, flags);
@@ -93,7 +91,7 @@ static int eeh_event_handler(void * dummy)
/* If there are no new errors after an hour, clear the counter. */
if (pdn && pdn->eeh_freeze_count>0) {
- msleep_interruptible (3600*1000);
+ msleep_interruptible(3600*1000);
if (pdn->eeh_freeze_count>0)
pdn->eeh_freeze_count--;
}
@@ -102,8 +100,11 @@ static int eeh_event_handler(void * dummy)
}
/**
- * eeh_thread_launcher
+ * eeh_thread_launcher - Start kernel thread to handle EEH events
* @dummy - unused
+ *
+ * This routine is called to start the kernel thread for processing
+ * EEH event.
*/
static void eeh_thread_launcher(struct work_struct *dummy)
{
@@ -112,14 +113,14 @@ static void eeh_thread_launcher(struct work_struct *dummy)
}
/**
- * eeh_send_failure_event - generate a PCI error event
- * @dev pci device
+ * eeh_send_failure_event - Generate a PCI error event
+ * @dev: pci device
*
* This routine can be called within an interrupt context;
* the actual event will be delivered in a normal context
* (from a workqueue).
*/
-int eeh_send_failure_event (struct device_node *dn,
+int eeh_send_failure_event(struct device_node *dn,
struct pci_dev *dev)
{
unsigned long flags;
@@ -135,7 +136,7 @@ int eeh_send_failure_event (struct device_node *dn,
}
event = kmalloc(sizeof(*event), GFP_ATOMIC);
if (event == NULL) {
- printk (KERN_ERR "EEH: out of memory, event not handled\n");
+ printk(KERN_ERR "EEH: out of memory, event not handled\n");
return 1;
}
@@ -154,5 +155,3 @@ int eeh_send_failure_event (struct device_node *dn,
return 0;
}
-
-/********************** END OF FILE ******************************/
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
index eb744ee..5e4eab1 100644
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c
@@ -28,7 +28,7 @@
#include <asm/pci-bridge.h>
/**
- * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic
+ * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
* @_name: name of file in sysfs directory
* @_memb: name of member in struct pci_dn to access
* @_format: printf format for display
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 13/21] Cleanup on function names of EEH aux components
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (11 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 12/21] Cleanup on comments of EEH aux components Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 14/21] Introduce EEH device Gavin Shan
` (9 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
The patch does some cleanup on the function names of EEH
aux components. Currently, only couple of function names from
eeh_cache have been adjusted so that:
* The function name has prefix "eeh_addr_cache".
* Move around pci_addr_cache_build() in the header file
to reflect function call sequence.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/ppc-pci.h | 4 ++--
arch/powerpc/platforms/pseries/eeh.c | 2 +-
arch/powerpc/platforms/pseries/eeh_cache.c | 8 ++++----
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index b4b18d8..c02d5a7 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -49,10 +49,10 @@ extern unsigned long pci_probe_only;
#ifdef CONFIG_EEH
+void pci_addr_cache_build(void);
void pci_addr_cache_insert_device(struct pci_dev *dev);
void pci_addr_cache_remove_device(struct pci_dev *dev);
-void pci_addr_cache_build(void);
-struct pci_dev *pci_get_device_by_addr(unsigned long addr);
+struct pci_dev *pci_addr_cache_get_device(unsigned long addr);
void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
int eeh_pci_enable(struct pci_dn *pdn, int function);
int eeh_reset_pe(struct pci_dn *);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index bd4ed83..646b520 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -511,7 +511,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
/* Finding the phys addr + pci device; this is pretty quick. */
addr = eeh_token_to_phys((unsigned long __force) token);
- dev = pci_get_device_by_addr(addr);
+ dev = pci_addr_cache_get_device(addr);
if (!dev) {
no_device++;
return val;
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index 850c00c..7c36a9c 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -59,7 +59,7 @@ static struct pci_io_addr_cache {
spinlock_t piar_lock;
} pci_io_addr_cache_root;
-static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
+static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
{
struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
@@ -83,7 +83,7 @@ static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
}
/**
- * pci_get_device_by_addr - Get device, given only address
+ * pci_addr_cache_get_device - Get device, given only address
* @addr: mmio (PIO) phys address or i/o port number
*
* Given an mmio phys address, or a port number, find a pci device
@@ -92,13 +92,13 @@ static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
* from zero (that is, they do *not* have pci_io_addr added in).
* It is safe to call this function within an interrupt.
*/
-struct pci_dev *pci_get_device_by_addr(unsigned long addr)
+struct pci_dev *pci_addr_cache_get_device(unsigned long addr)
{
struct pci_dev *dev;
unsigned long flags;
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
- dev = __pci_get_device_by_addr(addr);
+ dev = __pci_addr_cache_get_device(addr);
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
return dev;
}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 14/21] Introduce EEH device
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (12 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 13/21] Cleanup on function names " Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 15/21] Replace pci_dn with eeh_dev for EEH sysfs Gavin Shan
` (8 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
Original EEH implementation depends on struct pci_dn heavily. However,
EEH shouldn't depend on that actually because EEH needn't share much
information with other PCI components. That's to say, EEH should have
worked independently.
The patch introduces struct eeh_dev so that EEH core components needn't
be working based on struct pci_dn in future. Also, struct pci_dn, struct
eeh_dev instances are created in dynamic fasion and the binding with EEH
device, OF node, PCI device is implemented as well.
The EEH devices are created after PHBs are detected and initialized, but
PCI emunation hasn't started yet. Apart from that, PHB might be created
dynamically through DLPAR component and the EEH devices should be creatd
as well. Another case might be OF node is created dynamically by DR
(Dynamic Reconfiguration), which has been defined by PAPR. For those OF
nodes created by DR, EEH devices should be also created accordingly. The
binding between EEH device and OF node is done while the EEH device is
initially created.
The binding between EEH device and PCI device should be done after PCI
emunation is done. Besides, PCI hotplug also needs the binding so that
the EEH devices could be traced from the newly coming PCI buses or PCI
devices.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/device.h | 3 +
arch/powerpc/include/asm/eeh.h | 57 ++++++++++++++--
arch/powerpc/kernel/of_platform.c | 3 +
arch/powerpc/kernel/rtas_pci.c | 3 +
arch/powerpc/platforms/pseries/Makefile | 3 +-
arch/powerpc/platforms/pseries/eeh_dev.c | 102 ++++++++++++++++++++++++++++
arch/powerpc/platforms/pseries/pci_dlpar.c | 3 +
arch/powerpc/platforms/pseries/setup.c | 6 ++-
include/linux/of.h | 10 +++
include/linux/pci.h | 7 ++
10 files changed, 188 insertions(+), 9 deletions(-)
create mode 100644 arch/powerpc/platforms/pseries/eeh_dev.c
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index d57c08a..63d5ca4 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -31,6 +31,9 @@ struct dev_archdata {
#ifdef CONFIG_SWIOTLB
dma_addr_t max_direct_dma_addr;
#endif
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev;
+#endif
};
struct pdev_archdata {
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index ad8f318..daaad91 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -32,6 +32,43 @@ struct device_node;
#ifdef CONFIG_EEH
/*
+ * The struct is used to trace EEH state for the associated
+ * PCI device node or PCI device. In future, it might
+ * represent PE as well so that the EEH device to form
+ * another tree except the currently existing tree of PCI
+ * buses and PCI devices
+ */
+#define EEH_MODE_SUPPORTED (1<<0) /* EEH supported on the device */
+#define EEH_MODE_NOCHECK (1<<1) /* EEH check should be skipped */
+#define EEH_MODE_ISOLATED (1<<2) /* The device has been isolated */
+#define EEH_MODE_RECOVERING (1<<3) /* Recovering the device */
+#define EEH_MODE_IRQ_DISABLED (1<<4) /* Interrupt disabled */
+
+struct eeh_dev {
+ int mode; /* EEH mode */
+ int class_code; /* Class code of the device */
+ int config_addr; /* Config address */
+ int pe_config_addr; /* PE config address */
+ int check_count; /* Times of ignored error */
+ int freeze_count; /* Times of froze up */
+ int false_positives; /* Times of reported #ff's */
+ u32 config_space[16]; /* Saved PCI config space */
+ struct pci_controller *phb; /* Associated PHB */
+ struct device_node *dn; /* Associated device node */
+ struct pci_dev *pdev; /* Associated PCI device */
+};
+
+static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
+{
+ return edev->dn;
+}
+
+static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
+{
+ return edev->pdev;
+}
+
+/*
* The struct is used to trace the registered EEH operation
* callback functions. Actually, those operation callback
* functions are heavily platform dependent. That means the
@@ -70,19 +107,15 @@ struct eeh_ops {
extern struct eeh_ops *eeh_ops;
extern int eeh_subsystem_enabled;
-/* Values for eeh_mode bits in device_node */
-#define EEH_MODE_SUPPORTED (1<<0)
-#define EEH_MODE_NOCHECK (1<<1)
-#define EEH_MODE_ISOLATED (1<<2)
-#define EEH_MODE_RECOVERING (1<<3)
-#define EEH_MODE_IRQ_DISABLED (1<<4)
-
/*
* Max number of EEH freezes allowed before we consider the device
* to be permanently disabled.
*/
#define EEH_MAX_ALLOWED_FREEZES 5
+void * __devinit eeh_dev_init(struct device_node *dn, void *data);
+void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb);
+void __init eeh_dev_phb_init(void);
void __init eeh_init(void);
#ifdef CONFIG_PPC_PSERIES
int __init eeh_pseries_init(void);
@@ -113,6 +146,16 @@ void eeh_remove_bus_device(struct pci_dev *);
#define EEH_IO_ERROR_VALUE(size) (~0U >> ((4 - (size)) * 8))
#else /* !CONFIG_EEH */
+
+static inline void *eeh_dev_init(struct device_node *dn, void *data)
+{
+ return NULL;
+}
+
+static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { }
+
+static inline void eeh_dev_phb_init(void) { }
+
static inline void eeh_init(void) { }
#ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index e1612df..9239c3a 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -66,6 +66,9 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev)
/* Init pci_dn data structures */
pci_devs_phb_init_dynamic(phb);
+ /* Create EEH devices for the PHB */
+ eeh_dev_phb_init_dynamic(phb);
+
/* Register devices with EEH */
#ifdef CONFIG_EEH
if (dev->dev.of_node->child)
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 6cd8f01..517bd86 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -275,6 +275,9 @@ void __init find_and_init_phbs(void)
of_node_put(root);
pci_devs_phb_init();
+ /* Create EEH devices for all PHBs */
+ eeh_dev_phb_init();
+
/*
* pci_probe_only and pci_assign_all_buses can be set via properties
* in chosen.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 9aa5581..12dae0b 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,7 +6,8 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
firmware.o power.o dlpar.o mobility.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCANLOG) += scanlog.o
-obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o eeh_pseries.o
+obj-$(CONFIG_EEH) += eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \
+ eeh_event.o eeh_sysfs.o eeh_pseries.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
obj-$(CONFIG_PSERIES_MSI) += msi.o
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
new file mode 100644
index 0000000..f3aed7d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -0,0 +1,102 @@
+/*
+ * The file intends to implement dynamic creation of EEH device, which will
+ * be bound with OF node and PCI device simutaneously. The EEH devices would
+ * be foundamental information for EEH core components to work proerly. Besides,
+ * We have to support multiple situations where dynamic creation of EEH device
+ * is required:
+ *
+ * 1) Before PCI emunation starts, we need create EEH devices according to the
+ * PCI sensitive OF nodes.
+ * 2) When PCI emunation is done, we need do the binding between PCI device and
+ * the associated EEH device.
+ * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
+ * will be created while PCI sensitive OF node is detected from DR.
+ * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
+ * PHB is newly inserted, we also need create EEH devices accordingly.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+/**
+ * eeh_dev_init - Create EEH device according to OF node
+ * @dn: device node
+ * @data: PHB
+ *
+ * It will create EEH device according to the given OF node. The function
+ * might be called by PCI emunation, DR, PHB hotplug.
+ */
+void * __devinit eeh_dev_init(struct device_node *dn, void *data)
+{
+ struct pci_controller *phb = data;
+ struct eeh_dev *edev;
+
+ /* Allocate EEH device */
+ edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL);
+ if (!edev) {
+ pr_warning("%s: out of memory\n", __func__);
+ return NULL;
+ }
+
+ /* Associate EEH device with OF node */
+ dn->edev = edev;
+ edev->dn = dn;
+ edev->phb = phb;
+
+ return NULL;
+}
+
+/**
+ * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
+ * @phb: PHB
+ *
+ * Scan the PHB OF node and its child association, then create the
+ * EEH devices accordingly
+ */
+void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
+{
+ struct device_node *dn = phb->dn;
+
+ /* EEH device for PHB */
+ eeh_dev_init(dn, phb);
+
+ /* EEH devices for children OF nodes */
+ traverse_pci_devices(dn, eeh_dev_init, phb);
+}
+
+/**
+ * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
+ *
+ * Scan all the existing PHBs and create EEH devices for their OF
+ * nodes and their children OF nodes
+ */
+void __init eeh_dev_phb_init(void)
+{
+ struct pci_controller *phb, *tmp;
+
+ list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+ eeh_dev_phb_init_dynamic(phb);
+}
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 55d4ec1..fbb21fc 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -147,6 +147,9 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
pci_devs_phb_init_dynamic(phb);
+ /* Create EEH devices for the PHB */
+ eeh_dev_phb_init_dynamic(phb);
+
if (dn->child)
eeh_add_device_tree_early(dn);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 809d9d9..60f9462 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -261,8 +261,12 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
switch (action) {
case PSERIES_RECONFIG_ADD:
pci = np->parent->data;
- if (pci)
+ if (pci) {
update_dn_pci_info(np, pci->phb);
+
+ /* Create EEH device for the OF node */
+ eeh_dev_init(np, pci->phb);
+ }
break;
default:
err = NOTIFY_DONE;
diff --git a/include/linux/of.h b/include/linux/of.h
index a75a831..3e710d8 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -58,6 +58,9 @@ struct device_node {
struct kref kref;
unsigned long _flags;
void *data;
+#if defined(CONFIG_EEH)
+ struct eeh_dev *edev;
+#endif
#if defined(CONFIG_SPARC)
char *path_component_name;
unsigned int unique_id;
@@ -72,6 +75,13 @@ struct of_phandle_args {
uint32_t args[MAX_PHANDLE_ARGS];
};
+#if defined(CONFIG_EEH)
+static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
+{
+ return dn->edev;
+}
+#endif
+
#if defined(CONFIG_SPARC) || !defined(CONFIG_OF)
/* Dummy ref counting routines - to be implemented later */
static inline struct device_node *of_node_get(struct device_node *node)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a16b1df..cfeee2a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1647,6 +1647,13 @@ static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
#endif /* CONFIG_OF */
+#ifdef CONFIG_EEH
+static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
+{
+ return pdev->dev.archdata.edev;
+}
+#endif
+
/**
* pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device
* @pdev: the PCI device
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 15/21] Replace pci_dn with eeh_dev for EEH sysfs
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (13 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 14/21] Introduce EEH device Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 16/21] Replace pci_dn with eeh_dev for EEH address cache Gavin Shan
` (7 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
With original EEH implementation, all EEH related statistics have
been put into struct pci_dn. We've introduced struct eeh_dev to
replace struct pci_dn in EEH core components, including EEH sysfs
component.
The patch shows EEH statistics from struct eeh_dev instead of struct
pci_dn in EEH sysfs component. Besides, it also fixed the EEH device
retrieval from PCI device, which was introduced by the previous patch
in the series of patch.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh_sysfs.c | 23 ++++++++++-------------
1 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
index 5e4eab1..243b351 100644
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c
@@ -41,24 +41,21 @@ static ssize_t eeh_show_##_name(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
struct pci_dev *pdev = to_pci_dev(dev); \
- struct device_node *dn = pci_device_to_OF_node(pdev); \
- struct pci_dn *pdn; \
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \
\
- if (!dn || PCI_DN(dn) == NULL) \
- return 0; \
+ if (!edev) \
+ return 0; \
\
- pdn = PCI_DN(dn); \
- return sprintf(buf, _format "\n", pdn->_memb); \
+ return sprintf(buf, _format "\n", edev->_memb); \
} \
static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
-
-EEH_SHOW_ATTR(eeh_mode, eeh_mode, "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr, eeh_config_addr, "0x%x");
-EEH_SHOW_ATTR(eeh_pe_config_addr, eeh_pe_config_addr, "0x%x");
-EEH_SHOW_ATTR(eeh_check_count, eeh_check_count, "%d");
-EEH_SHOW_ATTR(eeh_freeze_count, eeh_freeze_count, "%d");
-EEH_SHOW_ATTR(eeh_false_positives, eeh_false_positives, "%d");
+EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
+EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
+EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
+EEH_SHOW_ATTR(eeh_check_count, check_count, "%d" );
+EEH_SHOW_ATTR(eeh_freeze_count, freeze_count, "%d" );
+EEH_SHOW_ATTR(eeh_false_positives, false_positives, "%d" );
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 16/21] Replace pci_dn with eeh_dev for EEH address cache
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (14 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 15/21] Replace pci_dn with eeh_dev for EEH sysfs Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 17/21] Replace pci_dn with eeh_dev for EEH core Gavin Shan
` (6 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
With original EEH implementation, struct pci_dn is used while building
PCI I/O address cache, which helps on searching the corresponding
PCI device according to the given physical I/O address. Besides, pci_dn
is associated with the corresponding PCI device while building its
I/O cache.
The patch replaces struct pci_dn with struct eeh_dev so that EEH address
cache won't depend on struct pci_dn. That will help EEH to become an
independent module in future. Besides, the binding of eeh_dev and PCI
device is done while building PCI device I/O cache.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh_cache.c | 27 ++++++++++++++++++++-------
1 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index 7c36a9c..e5ae1c6 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -175,7 +175,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
static void __pci_addr_cache_insert_device(struct pci_dev *dev)
{
struct device_node *dn;
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
int i;
dn = pci_device_to_OF_node(dev);
@@ -184,13 +184,19 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
return;
}
+ edev = of_node_to_eeh_dev(dn);
+ if (!edev) {
+ pr_warning("PCI: no EEH dev found for dn=%s\n",
+ dn->full_name);
+ return;
+ }
+
/* Skip any devices for which EEH is not enabled. */
- pdn = PCI_DN(dn);
- if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
- pdn->eeh_mode & EEH_MODE_NOCHECK) {
+ if (!(edev->mode & EEH_MODE_SUPPORTED) ||
+ edev->mode & EEH_MODE_NOCHECK) {
#ifdef DEBUG
- printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
- pci_name(dev), pdn->node->full_name);
+ pr_info("PCI: skip building address cache for=%s - %s\n",
+ pci_name(dev), dn->full_name);
#endif
return;
}
@@ -281,6 +287,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev)
void __init pci_addr_cache_build(void)
{
struct device_node *dn;
+ struct eeh_dev *edev;
struct pci_dev *dev = NULL;
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
@@ -291,8 +298,14 @@ void __init pci_addr_cache_build(void)
dn = pci_device_to_OF_node(dev);
if (!dn)
continue;
+
+ edev = of_node_to_eeh_dev(dn);
+ if (!edev)
+ continue;
+
pci_dev_get(dev); /* matching put is in eeh_remove_device() */
- PCI_DN(dn)->pcidev = dev;
+ dev->dev.archdata.edev = edev;
+ edev->pdev = dev;
eeh_sysfs_add_device(dev);
}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 17/21] Replace pci_dn with eeh_dev for EEH core
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (15 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 16/21] Replace pci_dn with eeh_dev for EEH address cache Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 18/21] Replace pci_dn with eeh_dev for EEH aux components Gavin Shan
` (5 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
The original EEH implementation is heavily depending on struct pci_dn.
We have to put EEH related information to pci_dn. Actually, we could
split struct pci_dn so that the EEH sensitive information to form an
individual struct, then EEH looks more independent.
The patch replaces pci_dn with eeh_dev for EEH core.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/ppc-pci.h | 8 +-
arch/powerpc/platforms/pseries/eeh.c | 269 ++++++++++++++++++----------------
2 files changed, 144 insertions(+), 133 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index c02d5a7..e660b37 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -53,10 +53,10 @@ void pci_addr_cache_build(void);
void pci_addr_cache_insert_device(struct pci_dev *dev);
void pci_addr_cache_remove_device(struct pci_dev *dev);
struct pci_dev *pci_addr_cache_get_device(unsigned long addr);
-void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
-int eeh_pci_enable(struct pci_dn *pdn, int function);
-int eeh_reset_pe(struct pci_dn *);
-void eeh_restore_bars(struct pci_dn *);
+void eeh_slot_error_detail(struct eeh_dev *edev, int severity);
+int eeh_pci_enable(struct eeh_dev *edev, int function);
+int eeh_reset_pe(struct eeh_dev *);
+void eeh_restore_bars(struct eeh_dev *);
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_mark_slot(struct device_node *dn, int mode_flag);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 646b520..aec10f6 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -115,28 +115,29 @@ static unsigned long slot_resets;
/**
* eeh_gather_pci_data - Copy assorted PCI config space registers to buff
- * @pdn: device to report data for
+ * @edev: device to report data for
* @buf: point to buffer in which to log
* @len: amount of room in buffer
*
* This routine captures assorted PCI configuration space data,
* and puts them into a buffer for RTAS error logging.
*/
-static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
+static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
{
- struct pci_dev *dev = pdn->pcidev;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
u32 cfg;
int cap, i;
int n = 0;
- n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
- printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
+ n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
+ printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
- rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
+ rtas_read_config(PCI_DN(dn), PCI_VENDOR_ID, 4, &cfg);
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
- rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
+ rtas_read_config(PCI_DN(dn), PCI_COMMAND, 4, &cfg);
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
@@ -147,11 +148,11 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
/* Gather bridge-specific registers */
if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
- rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
+ rtas_read_config(PCI_DN(dn), PCI_SEC_STATUS, 2, &cfg);
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
- rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
+ rtas_read_config(PCI_DN(dn), PCI_BRIDGE_CONTROL, 2, &cfg);
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
}
@@ -159,11 +160,11 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
/* Dump out the PCI-X command and status regs */
cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
if (cap) {
- rtas_read_config(pdn, cap, 4, &cfg);
+ rtas_read_config(PCI_DN(dn), cap, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
- rtas_read_config(pdn, cap+4, 4, &cfg);
+ rtas_read_config(PCI_DN(dn), cap+4, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
}
@@ -176,7 +177,7 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
"EEH: PCI-E capabilities and status follow:\n");
for (i=0; i<=8; i++) {
- rtas_read_config(pdn, cap+4*i, 4, &cfg);
+ rtas_read_config(PCI_DN(dn), cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
}
@@ -188,7 +189,7 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
"EEH: PCI-E AER capability register set follows:\n");
for (i=0; i<14; i++) {
- rtas_read_config(pdn, cap+4*i, 4, &cfg);
+ rtas_read_config(PCI_DN(dn), cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
}
@@ -197,12 +198,11 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
/* Gather status on devices under the bridge */
if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
- struct device_node *dn;
+ struct device_node *child;
- for_each_child_of_node(pdn->node, dn) {
- pdn = PCI_DN(dn);
- if (pdn)
- n += eeh_gather_pci_data(pdn, buf+n, len-n);
+ for_each_child_of_node(dn, child) {
+ if (of_node_to_eeh_dev(child))
+ n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n);
}
}
@@ -211,7 +211,7 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
/**
* eeh_slot_error_detail - Generate combined log including driver log and error log
- * @pdn: device node
+ * @edev: device to report error log for
* @severity: temporary or permanent error log
*
* This routine should be called to generate the combined log, which
@@ -219,17 +219,17 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
* out from the config space of the corresponding PCI device, while
* the error log is fetched through platform dependent function call.
*/
-void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
+void eeh_slot_error_detail(struct eeh_dev *edev, int severity)
{
size_t loglen = 0;
pci_regs_buf[0] = 0;
- eeh_pci_enable(pdn, EEH_OPT_THAW_MMIO);
- eeh_ops->configure_bridge(pdn->node);
- eeh_restore_bars(pdn);
- loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
+ eeh_pci_enable(edev, EEH_OPT_THAW_MMIO);
+ eeh_ops->configure_bridge(eeh_dev_to_of_node(edev));
+ eeh_restore_bars(edev);
+ loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
- eeh_ops->get_log(pdn->node, severity, pci_regs_buf, loglen);
+ eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen);
}
/**
@@ -260,8 +260,8 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
*/
struct device_node *eeh_find_device_pe(struct device_node *dn)
{
- while ((dn->parent) && PCI_DN(dn->parent) &&
- (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+ while (dn->parent && of_node_to_eeh_dev(dn->parent) &&
+ (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
dn = dn->parent;
}
return dn;
@@ -284,11 +284,11 @@ static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
struct device_node *dn;
for_each_child_of_node(parent, dn) {
- if (PCI_DN(dn)) {
+ if (of_node_to_eeh_dev(dn)) {
/* Mark the pci device driver too */
- struct pci_dev *dev = PCI_DN(dn)->pcidev;
+ struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
- PCI_DN(dn)->eeh_mode |= mode_flag;
+ of_node_to_eeh_dev(dn)->mode |= mode_flag;
if (dev && dev->driver)
dev->error_state = pci_channel_io_frozen;
@@ -312,13 +312,13 @@ void eeh_mark_slot(struct device_node *dn, int mode_flag)
dn = eeh_find_device_pe(dn);
/* Back up one, since config addrs might be shared */
- if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+ if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
dn = dn->parent;
- PCI_DN(dn)->eeh_mode |= mode_flag;
+ of_node_to_eeh_dev(dn)->mode |= mode_flag;
/* Mark the pci device too */
- dev = PCI_DN(dn)->pcidev;
+ dev = of_node_to_eeh_dev(dn)->pdev;
if (dev)
dev->error_state = pci_channel_io_frozen;
@@ -337,9 +337,9 @@ static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
struct device_node *dn;
for_each_child_of_node(parent, dn) {
- if (PCI_DN(dn)) {
- PCI_DN(dn)->eeh_mode &= ~mode_flag;
- PCI_DN(dn)->eeh_check_count = 0;
+ if (of_node_to_eeh_dev(dn)) {
+ of_node_to_eeh_dev(dn)->mode &= ~mode_flag;
+ of_node_to_eeh_dev(dn)->check_count = 0;
__eeh_clear_slot(dn, mode_flag);
}
}
@@ -360,11 +360,11 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag)
dn = eeh_find_device_pe(dn);
/* Back up one, since config addrs might be shared */
- if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+ if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
dn = dn->parent;
- PCI_DN(dn)->eeh_mode &= ~mode_flag;
- PCI_DN(dn)->eeh_check_count = 0;
+ of_node_to_eeh_dev(dn)->mode &= ~mode_flag;
+ of_node_to_eeh_dev(dn)->check_count = 0;
__eeh_clear_slot(dn, mode_flag);
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
}
@@ -388,7 +388,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
{
int ret;
unsigned long flags;
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
int rc = 0;
const char *location;
@@ -402,18 +402,18 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
return 0;
}
dn = eeh_find_device_pe(dn);
- pdn = PCI_DN(dn);
+ edev = of_node_to_eeh_dev(dn);
/* Access to IO BARs might get this far and still not want checking. */
- if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
- pdn->eeh_mode & EEH_MODE_NOCHECK) {
+ if (!(edev->mode & EEH_MODE_SUPPORTED) ||
+ edev->mode & EEH_MODE_NOCHECK) {
ignored_check++;
pr_debug("EEH: Ignored check (%x) for %s %s\n",
- pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
+ edev->mode, eeh_pci_name(dev), dn->full_name);
return 0;
}
- if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
+ if (!edev->config_addr && !edev->pe_config_addr) {
no_cfg_addr++;
return 0;
}
@@ -426,13 +426,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
*/
raw_spin_lock_irqsave(&confirm_error_lock, flags);
rc = 1;
- if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
- pdn->eeh_check_count ++;
- if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
+ if (edev->mode & EEH_MODE_ISOLATED) {
+ edev->check_count++;
+ if (edev->check_count % EEH_MAX_FAILS == 0) {
location = of_get_property(dn, "ibm,loc-code", NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
- pdn->eeh_check_count, location,
+ edev->check_count, location,
eeh_driver_name(dev), eeh_pci_name(dev));
printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
@@ -448,7 +448,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* function zero of a multi-function device.
* In any case they must share a common PHB.
*/
- ret = eeh_ops->get_state(pdn->node, NULL);
+ ret = eeh_ops->get_state(dn, NULL);
/* Note that config-io to empty slots may fail;
* they are empty when they don't have children.
@@ -461,7 +461,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
false_positives++;
- pdn->eeh_false_positives ++;
+ edev->false_positives ++;
rc = 0;
goto dn_unlock;
}
@@ -475,7 +475,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
eeh_mark_slot(dn, EEH_MODE_ISOLATED);
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
- eeh_send_failure_event(dn, dev);
+ eeh_send_failure_event(edev->dn, edev->pdev);
/* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure
@@ -529,22 +529,23 @@ EXPORT_SYMBOL(eeh_check_failure);
/**
* eeh_pci_enable - Enable MMIO or DMA transfers for this slot
- * @pdn pci device node
+ * @edev: pci device node
*
* This routine should be called to reenable frozen MMIO or DMA
* so that it would work correctly again. It's useful while doing
* recovery or log collection on the indicated device.
*/
-int eeh_pci_enable(struct pci_dn *pdn, int function)
+int eeh_pci_enable(struct eeh_dev *edev, int function)
{
int rc;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
- rc = eeh_ops->set_option(pdn->node, function);
+ rc = eeh_ops->set_option(dn, function);
if (rc)
printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
- function, rc, pdn->node->full_name);
+ function, rc, dn->full_name);
- rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
(function == EEH_OPT_THAW_MMIO))
return 0;
@@ -595,8 +596,8 @@ void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
struct device_node *dn;
for_each_child_of_node(parent, dn) {
- if (PCI_DN(dn)) {
- struct pci_dev *dev = PCI_DN(dn)->pcidev;
+ if (of_node_to_eeh_dev(dn)) {
+ struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
if (dev && dev->driver)
*freset |= dev->needs_freset;
@@ -622,10 +623,10 @@ void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
dn = eeh_find_device_pe(dn);
/* Back up one, since config addrs might be shared */
- if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+ if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
dn = dn->parent;
- dev = PCI_DN(dn)->pcidev;
+ dev = of_node_to_eeh_dev(dn)->pdev;
if (dev)
*freset |= dev->needs_freset;
@@ -634,13 +635,14 @@ void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
/**
* eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
- * @pdn: pci device node to be reset.
+ * @edev: pci device node to be reset.
*
* Assert the PCI #RST line for 1/4 second.
*/
-static void eeh_reset_pe_once(struct pci_dn *pdn)
+static void eeh_reset_pe_once(struct eeh_dev *edev)
{
unsigned int freset = 0;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
/* Determine type of EEH reset required for
* Partitionable Endpoint, a hot-reset (1)
@@ -648,12 +650,12 @@ static void eeh_reset_pe_once(struct pci_dn *pdn)
* A fundamental reset required by any device under
* Partitionable Endpoint trumps hot-reset.
*/
- eeh_set_pe_freset(pdn->node, &freset);
+ eeh_set_pe_freset(dn, &freset);
if (freset)
- eeh_ops->reset(pdn->node, EEH_RESET_FUNDAMENTAL);
+ eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
else
- eeh_ops->reset(pdn->node, EEH_RESET_HOT);
+ eeh_ops->reset(dn, EEH_RESET_HOT);
/* The PCI bus requires that the reset be held high for at least
* a 100 milliseconds. We wait a bit longer 'just in case'.
@@ -665,9 +667,9 @@ static void eeh_reset_pe_once(struct pci_dn *pdn)
* pci slot reset line is dropped. Make sure we don't miss
* these, and clear the flag now.
*/
- eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
+ eeh_clear_slot(dn, EEH_MODE_ISOLATED);
- eeh_ops->reset(pdn->node, EEH_RESET_DEACTIVATE);
+ eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
/* After a PCI slot has been reset, the PCI Express spec requires
* a 1.5 second idle time for the bus to stabilize, before starting
@@ -679,31 +681,32 @@ static void eeh_reset_pe_once(struct pci_dn *pdn)
/**
* eeh_reset_pe - Reset the indicated PE
- * @pdn: PCI device node
+ * @edev: PCI device associated EEH device
*
* This routine should be called to reset indicated device, including
* PE. A PE might include multiple PCI devices and sometimes PCI bridges
* might be involved as well.
*/
-int eeh_reset_pe(struct pci_dn *pdn)
+int eeh_reset_pe(struct eeh_dev *edev)
{
int i, rc;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
/* Take three shots at resetting the bus */
for (i=0; i<3; i++) {
- eeh_reset_pe_once(pdn);
+ eeh_reset_pe_once(edev);
- rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
return 0;
if (rc < 0) {
printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
- pdn->node->full_name);
+ dn->full_name);
return -1;
}
printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
- i+1, pdn->node->full_name, rc);
+ i+1, dn->full_name, rc);
}
return -1;
@@ -719,90 +722,95 @@ int eeh_reset_pe(struct pci_dn *pdn)
/**
* eeh_restore_one_device_bars - Restore the Base Address Registers for one device
- * @pdn: pci device node
+ * @edev: PCI device associated EEH device
*
* Loads the PCI configuration space base address registers,
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static inline void eeh_restore_one_device_bars(struct pci_dn *pdn)
+static inline void eeh_restore_one_device_bars(struct eeh_dev *edev)
{
int i;
u32 cmd;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
+
+ if (!edev->phb)
+ return;
- if (NULL==pdn->phb) return;
for (i=4; i<10; i++) {
- rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
+ rtas_write_config(PCI_DN(dn), i*4, 4, edev->config_space[i]);
}
/* 12 == Expansion ROM Address */
- rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
+ rtas_write_config(PCI_DN(dn), 12*4, 4, edev->config_space[12]);
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
-#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
+#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
- rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
+ rtas_write_config(PCI_DN(dn), PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- rtas_write_config(pdn, PCI_LATENCY_TIMER, 1,
+ rtas_write_config(PCI_DN(dn), PCI_LATENCY_TIMER, 1,
SAVED_BYTE(PCI_LATENCY_TIMER));
/* max latency, min grant, interrupt pin and line */
- rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
+ rtas_write_config(PCI_DN(dn), 15*4, 4, edev->config_space[15]);
/* Restore PERR & SERR bits, some devices require it,
* don't touch the other command bits
*/
- rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
- if (pdn->config_space[1] & PCI_COMMAND_PARITY)
+ rtas_read_config(PCI_DN(dn), PCI_COMMAND, 4, &cmd);
+ if (edev->config_space[1] & PCI_COMMAND_PARITY)
cmd |= PCI_COMMAND_PARITY;
else
cmd &= ~PCI_COMMAND_PARITY;
- if (pdn->config_space[1] & PCI_COMMAND_SERR)
+ if (edev->config_space[1] & PCI_COMMAND_SERR)
cmd |= PCI_COMMAND_SERR;
else
cmd &= ~PCI_COMMAND_SERR;
- rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
+ rtas_write_config(PCI_DN(dn), PCI_COMMAND, 4, cmd);
}
/**
* eeh_restore_bars - Restore the PCI config space info
- * @pdn: PCI device node
+ * @edev: EEH device
*
* This routine performs a recursive walk to the children
* of this device as well.
*/
-void eeh_restore_bars(struct pci_dn *pdn)
+void eeh_restore_bars(struct eeh_dev *edev)
{
struct device_node *dn;
- if (!pdn)
+ if (!edev)
return;
- if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
- eeh_restore_one_device_bars(pdn);
+ if ((edev->mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(edev->class_code))
+ eeh_restore_one_device_bars(edev);
- for_each_child_of_node(pdn->node, dn)
- eeh_restore_bars(PCI_DN(dn));
+ for_each_child_of_node(eeh_dev_to_of_node(edev), dn)
+ eeh_restore_bars(of_node_to_eeh_dev(dn));
}
/**
* eeh_save_bars - Save device bars
- * @pdn: PCI device node
+ * @edev: PCI device associated EEH device
*
* Save the values of the device bars. Unlike the restore
* routine, this routine is *not* recursive. This is because
* PCI devices are added individually; but, for the restore,
* an entire slot is reset at a time.
*/
-static void eeh_save_bars(struct pci_dn *pdn)
+static void eeh_save_bars(struct eeh_dev *edev)
{
int i;
+ struct device_node *dn;
- if (!pdn )
+ if (!edev)
return;
+ dn = eeh_dev_to_of_node(edev);
for (i = 0; i < 16; i++)
- rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
+ rtas_read_config(PCI_DN(dn), i * 4, 4, &edev->config_space[i]);
}
/**
@@ -822,13 +830,13 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
const u32 *device_id = of_get_property(dn, "device-id", NULL);
const u32 *regs;
int enable;
- struct pci_dn *pdn = PCI_DN(dn);
+ struct eeh_dev *edev = of_node_to_eeh_dev(dn);
- pdn->class_code = 0;
- pdn->eeh_mode = 0;
- pdn->eeh_check_count = 0;
- pdn->eeh_freeze_count = 0;
- pdn->eeh_false_positives = 0;
+ edev->class_code = 0;
+ edev->mode = 0;
+ edev->check_count = 0;
+ edev->freeze_count = 0;
+ edev->false_positives = 0;
if (!of_device_is_available(dn))
return NULL;
@@ -839,10 +847,10 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
/* There is nothing to check on PCI to ISA bridges */
if (dn->type && !strcmp(dn->type, "isa")) {
- pdn->eeh_mode |= EEH_MODE_NOCHECK;
+ edev->mode |= EEH_MODE_NOCHECK;
return NULL;
}
- pdn->class_code = *class_code;
+ edev->class_code = *class_code;
/* Ok... see if this device supports EEH. Some do, some don't,
* and the only way to find out is to check each and every one.
@@ -855,40 +863,40 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
enable = 0;
if (ret == 0) {
- pdn->eeh_config_addr = regs[0];
+ edev->config_addr = regs[0];
/* If the newer, better, ibm,get-config-addr-info is supported,
* then use that instead.
*/
- pdn->eeh_pe_config_addr = eeh_ops->get_pe_addr(dn);
+ edev->pe_config_addr = eeh_ops->get_pe_addr(dn);
/* Some older systems (Power4) allow the
* ibm,set-eeh-option call to succeed even on nodes
* where EEH is not supported. Verify support
* explicitly.
*/
- ret = eeh_ops->get_state(pdn->node, NULL);
+ ret = eeh_ops->get_state(dn, NULL);
if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
enable = 1;
}
if (enable) {
eeh_subsystem_enabled = 1;
- pdn->eeh_mode |= EEH_MODE_SUPPORTED;
+ edev->mode |= EEH_MODE_SUPPORTED;
pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
- dn->full_name, pdn->eeh_config_addr,
- pdn->eeh_pe_config_addr);
+ dn->full_name, edev->config_addr,
+ edev->pe_config_addr);
} else {
/* This device doesn't support EEH, but it may have an
* EEH parent, in which case we mark it as supported.
*/
- if (dn->parent && PCI_DN(dn->parent)
- && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+ if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
+ (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
/* Parent supports EEH. */
- pdn->eeh_mode |= EEH_MODE_SUPPORTED;
- pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
+ edev->mode |= EEH_MODE_SUPPORTED;
+ edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
return NULL;
}
}
@@ -897,7 +905,7 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
dn->full_name);
}
- eeh_save_bars(pdn);
+ eeh_save_bars(edev);
return NULL;
}
@@ -994,7 +1002,7 @@ void __init eeh_init(void)
unsigned long buid;
buid = get_phb_buid(phb);
- if (buid == 0 || PCI_DN(phb) == NULL)
+ if (buid == 0 || !of_node_to_eeh_dev(phb))
continue;
traverse_pci_devices(phb, eeh_early_enable, NULL);
@@ -1022,9 +1030,9 @@ static void eeh_add_device_early(struct device_node *dn)
{
struct pci_controller *phb;
- if (!dn || !PCI_DN(dn))
+ if (!dn || !of_node_to_eeh_dev(dn))
return;
- phb = PCI_DN(dn)->phb;
+ phb = of_node_to_eeh_dev(dn)->phb;
/* USB Bus children of PCI devices will not have BUID's */
if (NULL == phb || 0 == phb->buid)
@@ -1061,7 +1069,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
static void eeh_add_device_late(struct pci_dev *dev)
{
struct device_node *dn;
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
if (!dev || !eeh_subsystem_enabled)
return;
@@ -1069,15 +1077,16 @@ static void eeh_add_device_late(struct pci_dev *dev)
pr_debug("EEH: Adding device %s\n", pci_name(dev));
dn = pci_device_to_OF_node(dev);
- pdn = PCI_DN(dn);
- if (pdn->pcidev == dev) {
+ edev = pci_dev_to_eeh_dev(dev);
+ if (edev->pdev == dev) {
pr_debug("EEH: Already referenced !\n");
return;
}
- WARN_ON(pdn->pcidev);
+ WARN_ON(edev->pdev);
pci_dev_get(dev);
- pdn->pcidev = dev;
+ edev->pdev = dev;
+ dev->dev.archdata.edev = edev;
pci_addr_cache_insert_device(dev);
eeh_sysfs_add_device(dev);
@@ -1118,19 +1127,21 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
*/
static void eeh_remove_device(struct pci_dev *dev)
{
- struct device_node *dn;
+ struct eeh_dev *edev;
+
if (!dev || !eeh_subsystem_enabled)
return;
+ edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
pr_debug("EEH: Removing device %s\n", pci_name(dev));
- dn = pci_device_to_OF_node(dev);
- if (PCI_DN(dn)->pcidev == NULL) {
+ if (!edev || !edev->pdev) {
pr_debug("EEH: Not referenced !\n");
return;
}
- PCI_DN(dn)->pcidev = NULL;
+ edev->pdev = NULL;
+ dev->dev.archdata.edev = NULL;
pci_dev_put(dev);
pci_addr_cache_remove_device(dev);
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 18/21] Replace pci_dn with eeh_dev for EEH aux components
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (16 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 17/21] Replace pci_dn with eeh_dev for EEH core Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 19/21] Replace pci_dn with eeh_dev for EEH on pSeries Gavin Shan
` (4 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
The original EEH implementation is heavily depending on struct pci_dn.
We have to put EEH related information to pci_dn. Actually, we could
split struct pci_dn so that the EEH sensitive information to form an
individual struct, then EEH looks more independent.
The patch replaces pci_dn with eeh_dev for EEH aux components like
event and driver. Also, the eeh_event struct has been adjusted for
a little bit since eeh_dev has linked the associated FDT (Flat Device
Tree) node and PCI device. It's not necessary for eeh_event struct to
trace FDT node and PCI device. We can just simply to trace eeh_dev in
eeh_event.
The patch also renames function pcid_name() to eeh_pcid_name(), which
should be missed in the previous patch where the EEH aux components
have been cleaned up.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh_event.h | 7 +-
arch/powerpc/platforms/pseries/eeh.c | 2 +-
arch/powerpc/platforms/pseries/eeh_driver.c | 81 +++++++++++++--------------
arch/powerpc/platforms/pseries/eeh_event.c | 36 ++++++------
4 files changed, 63 insertions(+), 63 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 25ebf6a..c68b012 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -28,12 +28,11 @@
*/
struct eeh_event {
struct list_head list; /* to form event queue */
- struct device_node *dn; /* struct device node */
- struct pci_dev *dev; /* affected device */
+ struct eeh_dev *edev; /* EEH device */
};
-int eeh_send_failure_event(struct device_node *dn, struct pci_dev *dev);
-struct pci_dn *handle_eeh_events(struct eeh_event *);
+int eeh_send_failure_event(struct eeh_dev *edev);
+struct eeh_dev *handle_eeh_events(struct eeh_event *);
#endif /* __KERNEL__ */
#endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index aec10f6..9b1fd0c 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -475,7 +475,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
eeh_mark_slot(dn, EEH_MODE_ISOLATED);
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
- eeh_send_failure_event(edev->dn, edev->pdev);
+ eeh_send_failure_event(edev);
/* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 3f25fab..baf92cd 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -40,7 +40,7 @@
* This routine is used to retrieve the name of PCI device driver
* if that's valid.
*/
-static inline const char *pcid_name(struct pci_dev *pdev)
+static inline const char *eeh_pcid_name(struct pci_dev *pdev)
{
if (pdev && pdev->dev.driver)
return pdev->dev.driver->name;
@@ -81,7 +81,7 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent)
*/
static void eeh_disable_irq(struct pci_dev *dev)
{
- struct device_node *dn = pci_device_to_OF_node(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
/* Don't disable MSI and MSI-X interrupts. They are
* effectively disabled by the DMA Stopped state
@@ -93,7 +93,7 @@ static void eeh_disable_irq(struct pci_dev *dev)
if (!irq_has_action(dev->irq))
return;
- PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
+ edev->mode |= EEH_MODE_IRQ_DISABLED;
disable_irq_nosync(dev->irq);
}
@@ -106,10 +106,10 @@ static void eeh_disable_irq(struct pci_dev *dev)
*/
static void eeh_enable_irq(struct pci_dev *dev)
{
- struct device_node *dn = pci_device_to_OF_node(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
- if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
- PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
+ if ((edev->mode) & EEH_MODE_IRQ_DISABLED) {
+ edev->mode &= ~EEH_MODE_IRQ_DISABLED;
enable_irq(dev->irq);
}
}
@@ -270,20 +270,20 @@ static int eeh_report_failure(struct pci_dev *dev, void *userdata)
/**
* eeh_reset_device - Perform actual reset of a pci slot
- * @pe_dn: PE associated device node
+ * @edev: PE associated EEH device
* @bus: PCI bus corresponding to the isolcated slot
*
* This routine must be called to do reset on the indicated PE.
* During the reset, udev might be invoked because those affected
* PCI devices will be removed and then added.
*/
-static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
{
struct device_node *dn;
int cnt, rc;
/* pcibios will clear the counter; save the value */
- cnt = pe_dn->eeh_freeze_count;
+ cnt = edev->freeze_count;
if (bus)
pcibios_remove_pci_devices(bus);
@@ -292,21 +292,22 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus)
* Reconfigure bridges and devices. Don't try to bring the system
* up if the reset failed for some reason.
*/
- rc = eeh_reset_pe(pe_dn);
+ rc = eeh_reset_pe(edev);
if (rc)
return rc;
/* Walk over all functions on this device. */
- dn = pe_dn->node;
- if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+ dn = eeh_dev_to_of_node(edev);
+ if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
dn = dn->parent->child;
while (dn) {
- struct pci_dn *ppe = PCI_DN(dn);
+ struct eeh_dev *pedev = of_node_to_eeh_dev(dn);
+
/* On Power4, always true because eeh_pe_config_addr=0 */
- if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
+ if (edev->pe_config_addr == pedev->pe_config_addr) {
eeh_ops->configure_bridge(dn);
- eeh_restore_bars(ppe);
+ eeh_restore_bars(pedev);
}
dn = dn->sibling;
}
@@ -321,7 +322,7 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus)
ssleep(5);
pcibios_add_pci_devices(bus);
}
- pe_dn->eeh_freeze_count = cnt;
+ edev->freeze_count = cnt;
return 0;
}
@@ -348,23 +349,22 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus)
* drivers (which cause a second set of hotplug events to go out to
* userspace).
*/
-struct pci_dn *handle_eeh_events(struct eeh_event *event)
+struct eeh_dev *handle_eeh_events(struct eeh_event *event)
{
struct device_node *frozen_dn;
- struct pci_dn *frozen_pdn;
+ struct eeh_dev *frozen_edev;
struct pci_bus *frozen_bus;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
- frozen_dn = eeh_find_device_pe(event->dn);
+ frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
if (!frozen_dn) {
-
- location = of_get_property(event->dn, "ibm,loc-code", NULL);
+ location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
location = location ? location : "unknown";
printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
"for location=%s pci addr=%s\n",
- location, eeh_pci_name(event->dev));
+ location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
return NULL;
}
@@ -389,22 +389,21 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
return NULL;
}
- frozen_pdn = PCI_DN(frozen_dn);
- frozen_pdn->eeh_freeze_count++;
+ frozen_edev = of_node_to_eeh_dev(frozen_dn);
+ frozen_edev->freeze_count++;
+ pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
+ drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));
- pci_str = eeh_pci_name(event->dev);
- drv_str = pcid_name(event->dev);
-
- if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
+ if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
goto excess_failures;
printk(KERN_WARNING
"EEH: This PCI device has failed %d times in the last hour:\n",
- frozen_pdn->eeh_freeze_count);
+ frozen_edev->freeze_count);
- if (frozen_pdn->pcidev) {
- bus_pci_str = pci_name(frozen_pdn->pcidev);
- bus_drv_str = pcid_name(frozen_pdn->pcidev);
+ if (frozen_edev->pdev) {
+ bus_pci_str = pci_name(frozen_edev->pdev);
+ bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
printk(KERN_WARNING
"EEH: Bus location=%s driver=%s pci addr=%s\n",
location, bus_drv_str, bus_pci_str);
@@ -425,7 +424,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
/* Get the current PCI slot state. This can take a long time,
* sometimes over 3 seconds for certain systems.
*/
- rc = eeh_ops->wait_state(frozen_pdn->node, MAX_WAIT_FOR_RECOVERY*1000);
+ rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
printk(KERN_WARNING "EEH: Permanent failure\n");
goto hard_fail;
@@ -435,14 +434,14 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
* don't post the error log until after all dev drivers
* have been informed.
*/
- eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP);
+ eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
* go down willingly, without panicing the system.
*/
if (result == PCI_ERS_RESULT_NONE) {
- rc = eeh_reset_device(frozen_pdn, frozen_bus);
+ rc = eeh_reset_device(frozen_edev, frozen_bus);
if (rc) {
printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
goto hard_fail;
@@ -451,7 +450,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
/* If all devices reported they can proceed, then re-enable MMIO */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
- rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_MMIO);
+ rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);
if (rc < 0)
goto hard_fail;
@@ -465,7 +464,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
/* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
- rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_DMA);
+ rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);
if (rc < 0)
goto hard_fail;
@@ -483,7 +482,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
- rc = eeh_reset_device(frozen_pdn, NULL);
+ rc = eeh_reset_device(frozen_edev, NULL);
if (rc) {
printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
goto hard_fail;
@@ -502,7 +501,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
/* Tell all device drivers that they can resume operations */
pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
- return frozen_pdn;
+ return frozen_edev;
excess_failures:
/*
@@ -515,7 +514,7 @@ excess_failures:
"has failed %d times in the last hour "
"and has been permanently disabled.\n"
"Please try reseating this device or replacing it.\n",
- location, drv_str, pci_str, frozen_pdn->eeh_freeze_count);
+ location, drv_str, pci_str, frozen_edev->freeze_count);
goto perm_error;
hard_fail:
@@ -526,7 +525,7 @@ hard_fail:
location, drv_str, pci_str);
perm_error:
- eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM);
+ eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);
/* Notify all devices that they're about to go down. */
pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index e98347c..4a47525 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -56,8 +56,8 @@ DEFINE_MUTEX(eeh_event_mutex);
static int eeh_event_handler(void * dummy)
{
unsigned long flags;
- struct eeh_event *event;
- struct pci_dn *pdn;
+ struct eeh_event *event;
+ struct eeh_dev *edev;
daemonize("eehd");
set_current_state(TASK_INTERRUPTIBLE);
@@ -77,23 +77,26 @@ static int eeh_event_handler(void * dummy)
/* Serialize processing of EEH events */
mutex_lock(&eeh_event_mutex);
- eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
+ edev = event->edev;
+ eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
- eeh_pci_name(event->dev));
+ eeh_pci_name(edev->pdev));
+
+ edev = handle_eeh_events(event);
- pdn = handle_eeh_events(event);
+ eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
+ pci_dev_put(edev->pdev);
- eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
- pci_dev_put(event->dev);
kfree(event);
mutex_unlock(&eeh_event_mutex);
/* If there are no new errors after an hour, clear the counter. */
- if (pdn && pdn->eeh_freeze_count>0) {
+ if (edev && edev->freeze_count>0) {
msleep_interruptible(3600*1000);
- if (pdn->eeh_freeze_count>0)
- pdn->eeh_freeze_count--;
+ if (edev->freeze_count>0)
+ edev->freeze_count--;
+
}
return 0;
@@ -114,17 +117,17 @@ static void eeh_thread_launcher(struct work_struct *dummy)
/**
* eeh_send_failure_event - Generate a PCI error event
- * @dev: pci device
+ * @edev: EEH device
*
* This routine can be called within an interrupt context;
* the actual event will be delivered in a normal context
* (from a workqueue).
*/
-int eeh_send_failure_event(struct device_node *dn,
- struct pci_dev *dev)
+int eeh_send_failure_event(struct eeh_dev *edev)
{
unsigned long flags;
struct eeh_event *event;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
const char *location;
if (!mem_init_done) {
@@ -140,11 +143,10 @@ int eeh_send_failure_event(struct device_node *dn,
return 1;
}
- if (dev)
- pci_dev_get(dev);
+ if (edev->pdev)
+ pci_dev_get(edev->pdev);
- event->dn = dn;
- event->dev = dev;
+ event->edev = edev;
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 19/21] Replace pci_dn with eeh_dev for EEH on pSeries
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (17 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 18/21] Replace pci_dn with eeh_dev for EEH aux components Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 6:04 ` [PATCH 20/21] Introduce struct eeh_stats for EEH Gavin Shan
` (3 subsequent siblings)
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
The pci_dn has been replaced with eeh_dev. In order to comply with
the rule, the EEH platform implementation on pSeries should also
be adjusted for a little bit so that it will depend on eeh_dev instead
of pci_dn.
The patch replaces pci_dn with eeh_dev. The corresponding information
will be retrieved from eeh_dev instead of pci_dn.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 96 +++++++++++++-------------
1 files changed, 48 insertions(+), 48 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 4ed06b2..36a1af1 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -144,11 +144,11 @@ static int pseries_eeh_init(void)
static int pseries_eeh_set_option(struct device_node *dn, int option)
{
int ret = 0;
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
const u32 *reg;
int config_addr;
- pdn = PCI_DN(dn);
+ edev = of_node_to_eeh_dev(dn);
/*
* When we're enabling or disabling EEH functioality on
@@ -165,9 +165,9 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
case EEH_OPT_THAW_MMIO:
case EEH_OPT_THAW_DMA:
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
+ config_addr = edev->config_addr;
+ if (edev->pe_config_addr)
+ config_addr = edev->pe_config_addr;
break;
default:
@@ -177,8 +177,8 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
}
ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
- config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid), option);
+ config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid), option);
return ret;
}
@@ -198,11 +198,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
*/
static int pseries_eeh_get_pe_addr(struct device_node *dn)
{
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
int ret = 0;
int rets[3];
- pdn = PCI_DN(dn);
+ edev = of_node_to_eeh_dev(dn);
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
/*
@@ -211,15 +211,15 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
* meaningless.
*/
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- pdn->eeh_config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid), 1);
+ edev->config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid), 1);
if (ret || (rets[0] == 0))
return 0;
/* Retrieve the associated PE config address */
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- pdn->eeh_config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid), 0);
+ edev->config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid), 0);
if (ret) {
pr_warning("%s: Failed to get PE address for %s\n",
__func__, dn->full_name);
@@ -231,8 +231,8 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
- pdn->eeh_config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid), 0);
+ edev->config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid), 0);
if (ret) {
pr_warning("%s: Failed to get PE address for %s\n",
__func__, dn->full_name);
@@ -260,28 +260,28 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
*/
static int pseries_eeh_get_state(struct device_node *dn, int *state)
{
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
int config_addr;
int ret;
int rets[4];
int result;
/* Figure out PE config address if possible */
- pdn = PCI_DN(dn);
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
+ edev = of_node_to_eeh_dev(dn);
+ config_addr = edev->config_addr;
+ if (edev->pe_config_addr)
+ config_addr = edev->pe_config_addr;
if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
- config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid));
+ config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid));
} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
/* Fake PE unavailable info */
rets[2] = 0;
ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
- config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid));
+ config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid));
} else {
return EEH_STATE_NOT_SUPPORT;
}
@@ -340,27 +340,27 @@ static int pseries_eeh_get_state(struct device_node *dn, int *state)
*/
static int pseries_eeh_reset(struct device_node *dn, int option)
{
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
int config_addr;
int ret;
/* Figure out PE address */
- pdn = PCI_DN(dn);
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
+ edev = of_node_to_eeh_dev(dn);
+ config_addr = edev->config_addr;
+ if (edev->pe_config_addr)
+ config_addr = edev->pe_config_addr;
/* Reset PE through RTAS call */
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
- config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid), option);
+ config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid), option);
/* If fundamental-reset not supported, try hot-reset */
if (option == EEH_RESET_FUNDAMENTAL &&
ret == -8) {
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
- config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid), EEH_RESET_HOT);
+ config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid), EEH_RESET_HOT);
}
return ret;
@@ -437,22 +437,22 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
*/
static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
{
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
int config_addr;
unsigned long flags;
int ret;
- pdn = PCI_DN(dn);
+ edev = of_node_to_eeh_dev(dn);
spin_lock_irqsave(&slot_errbuf_lock, flags);
memset(slot_errbuf, 0, eeh_error_buf_size);
/* Figure out the PE address */
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
+ config_addr = edev->config_addr;
+ if (edev->pe_config_addr)
+ config_addr = edev->pe_config_addr;
ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
- BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid),
+ BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid),
virt_to_phys(drv_log), len,
virt_to_phys(slot_errbuf), eeh_error_buf_size,
severity);
@@ -473,25 +473,25 @@ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_l
*/
static int pseries_eeh_configure_bridge(struct device_node *dn)
{
- struct pci_dn *pdn;
+ struct eeh_dev *edev;
int config_addr;
int ret;
/* Figure out the PE address */
- pdn = PCI_DN(dn);
- config_addr = pdn->eeh_config_addr;
- if (pdn->eeh_pe_config_addr)
- config_addr = pdn->eeh_pe_config_addr;
+ edev = of_node_to_eeh_dev(dn);
+ config_addr = edev->config_addr;
+ if (edev->pe_config_addr)
+ config_addr = edev->pe_config_addr;
/* Use new configure-pe function, if supported */
if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
- config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid));
+ config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid));
} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
- config_addr, BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid));
+ config_addr, BUID_HI(edev->phb->buid),
+ BUID_LO(edev->phb->buid));
} else {
return -EFAULT;
}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 20/21] Introduce struct eeh_stats for EEH
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (18 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 19/21] Replace pci_dn with eeh_dev for EEH on pSeries Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-28 10:04 ` David Laight
` (3 more replies)
2012-02-28 6:04 ` [PATCH 21/21] pSeries platform config space access in EEH Gavin Shan
` (2 subsequent siblings)
22 siblings, 4 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
With the original EEH implementation, the EEH global statistics
are maintained by individual global variables. That makes the
code a little hard to maintain.
The patch introduces extra struct eeh_stats for the EEH global
statistics so that it can be maintained in collective fashion.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh.c | 65 ++++++++++++++++++++--------------
1 files changed, 38 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 9b1fd0c..ca05890 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -102,14 +102,22 @@ static DEFINE_RAW_SPINLOCK(confirm_error_lock);
#define EEH_PCI_REGS_LOG_LEN 4096
static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
-/* System monitoring statistics */
-static unsigned long no_device;
-static unsigned long no_dn;
-static unsigned long no_cfg_addr;
-static unsigned long ignored_check;
-static unsigned long total_mmio_ffs;
-static unsigned long false_positives;
-static unsigned long slot_resets;
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+ unsigned int no_device; /* PCI device not found */
+ unsigned int no_dn; /* OF node not found */
+ unsigned int no_cfg_addr; /* Config address not found */
+ unsigned int ignored_check; /* EEH check skipped */
+ unsigned int total_mmio_ffs; /* Total EEH checks */
+ unsigned int false_positives; /* Unnecessary EEH checks */
+ unsigned int slot_resets; /* PE reset */
+};
+
+static struct eeh_stats eeh_stats;
#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
@@ -392,13 +400,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
int rc = 0;
const char *location;
- total_mmio_ffs++;
+ eeh_stats.total_mmio_ffs++;
if (!eeh_subsystem_enabled)
return 0;
if (!dn) {
- no_dn++;
+ eeh_stats.no_dn++;
return 0;
}
dn = eeh_find_device_pe(dn);
@@ -407,14 +415,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
/* Access to IO BARs might get this far and still not want checking. */
if (!(edev->mode & EEH_MODE_SUPPORTED) ||
edev->mode & EEH_MODE_NOCHECK) {
- ignored_check++;
+ eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check (%x) for %s %s\n",
edev->mode, eeh_pci_name(dev), dn->full_name);
return 0;
}
if (!edev->config_addr && !edev->pe_config_addr) {
- no_cfg_addr++;
+ eeh_stats.no_cfg_addr++;
return 0;
}
@@ -460,13 +468,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
(ret == EEH_STATE_NOT_SUPPORT) ||
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
- false_positives++;
+ eeh_stats.false_positives++;
edev->false_positives ++;
rc = 0;
goto dn_unlock;
}
- slot_resets++;
+ eeh_stats.slot_resets++;
/* Avoid repeated reports of this failure, including problems
* with other functions on this device, and functions under
@@ -513,7 +521,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
addr = eeh_token_to_phys((unsigned long __force) token);
dev = pci_addr_cache_get_device(addr);
if (!dev) {
- no_device++;
+ eeh_stats.no_device++;
return val;
}
@@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
{
if (0 == eeh_subsystem_enabled) {
seq_printf(m, "EEH Subsystem is globally disabled\n");
- seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
+ seq_printf(m, "eeh_total_mmio_ffs=%d\n", eeh_stats.total_mmio_ffs);
} else {
seq_printf(m, "EEH Subsystem is enabled\n");
seq_printf(m,
- "no device=%ld\n"
- "no device node=%ld\n"
- "no config address=%ld\n"
- "check not wanted=%ld\n"
- "eeh_total_mmio_ffs=%ld\n"
- "eeh_false_positives=%ld\n"
- "eeh_slot_resets=%ld\n",
- no_device, no_dn, no_cfg_addr,
- ignored_check, total_mmio_ffs,
- false_positives,
- slot_resets);
+ "no device =%d\n"
+ "no device node =%d\n"
+ "no config address =%d\n"
+ "check not wanted =%d\n"
+ "eeh_total_mmio_ffs =%d\n"
+ "eeh_false_positives =%d\n"
+ "eeh_slot_resets =%d\n",
+ eeh_stats.no_device,
+ eeh_stats.no_dn,
+ eeh_stats.no_cfg_addr,
+ eeh_stats.ignored_check,
+ eeh_stats.total_mmio_ffs,
+ eeh_stats.false_positives,
+ eeh_stats.slot_resets);
}
return 0;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 21/21] pSeries platform config space access in EEH
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (19 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 20/21] Introduce struct eeh_stats for EEH Gavin Shan
@ 2012-02-28 6:04 ` Gavin Shan
2012-02-29 3:04 ` [PATCH v5 00/21] EEH reorganization Gavin Shan
2012-04-12 21:39 ` Anton Blanchard
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-28 6:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: shangw
With the original EEH implementation, the access to config space of
the corresponding PCI device is done by RTAS sensitive function. That
depends on pci_dn heavily. That would limit EEH extension to other
platforms like powernv because other platforms might have different
ways to access PCI config space.
The patch splits those functions used to access PCI config space
and implement them in platform related EEH component. It would be
helpful to support EEH on multiple platforms simutaneously in future.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 2 +
arch/powerpc/platforms/pseries/eeh.c | 32 ++++++++++----------
arch/powerpc/platforms/pseries/eeh_pseries.c | 40 +++++++++++++++++++++++++-
3 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index daaad91..d60f998 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -102,6 +102,8 @@ struct eeh_ops {
int (*wait_state)(struct device_node *dn, int max_wait);
int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len);
int (*configure_bridge)(struct device_node *dn);
+ int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
+ int (*write_config)(struct device_node *dn, int where, int size, u32 val);
};
extern struct eeh_ops *eeh_ops;
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index ca05890..d653fc2 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -141,11 +141,11 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
- rtas_read_config(PCI_DN(dn), PCI_VENDOR_ID, 4, &cfg);
+ eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
- rtas_read_config(PCI_DN(dn), PCI_COMMAND, 4, &cfg);
+ eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
@@ -156,11 +156,11 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
/* Gather bridge-specific registers */
if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
- rtas_read_config(PCI_DN(dn), PCI_SEC_STATUS, 2, &cfg);
+ eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
- rtas_read_config(PCI_DN(dn), PCI_BRIDGE_CONTROL, 2, &cfg);
+ eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
}
@@ -168,11 +168,11 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
/* Dump out the PCI-X command and status regs */
cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
if (cap) {
- rtas_read_config(PCI_DN(dn), cap, 4, &cfg);
+ eeh_ops->read_config(dn, cap, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
- rtas_read_config(PCI_DN(dn), cap+4, 4, &cfg);
+ eeh_ops->read_config(dn, cap+4, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
}
@@ -185,7 +185,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
"EEH: PCI-E capabilities and status follow:\n");
for (i=0; i<=8; i++) {
- rtas_read_config(PCI_DN(dn), cap+4*i, 4, &cfg);
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
}
@@ -197,7 +197,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
"EEH: PCI-E AER capability register set follows:\n");
for (i=0; i<14; i++) {
- rtas_read_config(PCI_DN(dn), cap+4*i, 4, &cfg);
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
}
@@ -746,28 +746,28 @@ static inline void eeh_restore_one_device_bars(struct eeh_dev *edev)
return;
for (i=4; i<10; i++) {
- rtas_write_config(PCI_DN(dn), i*4, 4, edev->config_space[i]);
+ eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
}
/* 12 == Expansion ROM Address */
- rtas_write_config(PCI_DN(dn), 12*4, 4, edev->config_space[12]);
+ eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
- rtas_write_config(PCI_DN(dn), PCI_CACHE_LINE_SIZE, 1,
+ eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- rtas_write_config(PCI_DN(dn), PCI_LATENCY_TIMER, 1,
+ eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
SAVED_BYTE(PCI_LATENCY_TIMER));
/* max latency, min grant, interrupt pin and line */
- rtas_write_config(PCI_DN(dn), 15*4, 4, edev->config_space[15]);
+ eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
/* Restore PERR & SERR bits, some devices require it,
* don't touch the other command bits
*/
- rtas_read_config(PCI_DN(dn), PCI_COMMAND, 4, &cmd);
+ eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
if (edev->config_space[1] & PCI_COMMAND_PARITY)
cmd |= PCI_COMMAND_PARITY;
else
@@ -776,7 +776,7 @@ static inline void eeh_restore_one_device_bars(struct eeh_dev *edev)
cmd |= PCI_COMMAND_SERR;
else
cmd &= ~PCI_COMMAND_SERR;
- rtas_write_config(PCI_DN(dn), PCI_COMMAND, 4, cmd);
+ eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
}
/**
@@ -818,7 +818,7 @@ static void eeh_save_bars(struct eeh_dev *edev)
dn = eeh_dev_to_of_node(edev);
for (i = 0; i < 16; i++)
- rtas_read_config(PCI_DN(dn), i * 4, 4, &edev->config_space[i]);
+ eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
}
/**
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 36a1af1..8752f79 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -503,6 +503,42 @@ static int pseries_eeh_configure_bridge(struct device_node *dn)
return ret;
}
+/**
+ * pseries_eeh_read_config - Read PCI config space
+ * @dn: device node
+ * @where: PCI address
+ * @size: size to read
+ * @val: return value
+ *
+ * Read config space from the speicifed device
+ */
+static int pseries_eeh_read_config(struct device_node *dn, int where, int size, u32 *val)
+{
+ struct pci_dn *pdn;
+
+ pdn = PCI_DN(dn);
+
+ return rtas_read_config(pdn, where, size, val);
+}
+
+/**
+ * pseries_eeh_write_config - Write PCI config space
+ * @dn: device node
+ * @where: PCI address
+ * @size: size to write
+ * @val: value to be written
+ *
+ * Write config space to the specified device
+ */
+static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val)
+{
+ struct pci_dn *pdn;
+
+ pdn = PCI_DN(dn);
+
+ return rtas_write_config(pdn, where, size, val);
+}
+
static struct eeh_ops pseries_eeh_ops = {
.name = "pseries",
.init = pseries_eeh_init,
@@ -512,7 +548,9 @@ static struct eeh_ops pseries_eeh_ops = {
.reset = pseries_eeh_reset,
.wait_state = pseries_eeh_wait_state,
.get_log = pseries_eeh_get_log,
- .configure_bridge = pseries_eeh_configure_bridge
+ .configure_bridge = pseries_eeh_configure_bridge,
+ .read_config = pseries_eeh_read_config,
+ .write_config = pseries_eeh_write_config
};
/**
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* RE: [PATCH 20/21] Introduce struct eeh_stats for EEH
2012-02-28 6:04 ` [PATCH 20/21] Introduce struct eeh_stats for EEH Gavin Shan
@ 2012-02-28 10:04 ` David Laight
2012-02-29 1:08 ` Gavin Shan
2012-02-29 2:25 ` Gavin Shan
` (2 subsequent siblings)
3 siblings, 1 reply; 36+ messages in thread
From: David Laight @ 2012-02-28 10:04 UTC (permalink / raw)
To: Gavin Shan, linuxppc-dev
=20
> +struct eeh_stats {
> + unsigned int no_device; /* PCI device not found */
...
> + "no device =3D%d\n"
...
Use %u (for all the stats), you really don't want negative
values printed.
I've NFI how long wrapping these counters might take!
If it is feasable (maybe much above 100Hz) then you
need 64bit counters.
David
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH
2012-02-28 10:04 ` David Laight
@ 2012-02-29 1:08 ` Gavin Shan
0 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-29 1:08 UTC (permalink / raw)
To: David Laight, linuxppc-dev
>
> > +struct eeh_stats {
> > + unsigned int no_device; /* PCI device not found */
> ...
> > + "no device =%d\n"
> ...
>
> Use %u (for all the stats), you really don't want negative
> values printed.
Yes.
> I've NFI how long wrapping these counters might take!
> If it is feasable (maybe much above 100Hz) then you
> need 64bit counters.
>
I think it's better to use "u64" here ;-)
> David
>
Thanks,
Gavin
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH
2012-02-28 6:04 ` [PATCH 20/21] Introduce struct eeh_stats for EEH Gavin Shan
2012-02-28 10:04 ` David Laight
@ 2012-02-29 2:25 ` Gavin Shan
2012-02-29 12:56 ` Michael Ellerman
2012-03-01 1:47 ` [PATCH 20/21] Introduce struct eeh_stats for EEH - Reworked Gavin Shan
3 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-29 2:25 UTC (permalink / raw)
To: linuxppc-dev
With the original EEH implementation, the EEH global statistics
are maintained by individual global variables. That makes the
code a little hard to maintain.
The patch introduces extra struct eeh_stats for the EEH global
statistics so that it can be maintained in collective fashion.
It's the rework on the corresponding v5 patch. According to
the comments from David Laight, the EEH global statistics have
been changed for a litte bit so that they have fixed-type of
"u64". Also, the format used to print them has been changed to
"%llu" based on David's suggestion.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh.c | 65 ++++++++++++++++++++--------------
1 files changed, 38 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 9b1fd0c..753ec8a 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -102,14 +102,22 @@ static DEFINE_RAW_SPINLOCK(confirm_error_lock);
#define EEH_PCI_REGS_LOG_LEN 4096
static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
-/* System monitoring statistics */
-static unsigned long no_device;
-static unsigned long no_dn;
-static unsigned long no_cfg_addr;
-static unsigned long ignored_check;
-static unsigned long total_mmio_ffs;
-static unsigned long false_positives;
-static unsigned long slot_resets;
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+ u64 no_device; /* PCI device not found */
+ u64 no_dn; /* OF node not found */
+ u64 no_cfg_addr; /* Config address not found */
+ u64 ignored_check; /* EEH check skipped */
+ u64 total_mmio_ffs; /* Total EEH checks */
+ u64 false_positives; /* Unnecessary EEH checks */
+ u64 slot_resets; /* PE reset */
+};
+
+static struct eeh_stats eeh_stats;
#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
@@ -392,13 +400,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
int rc = 0;
const char *location;
- total_mmio_ffs++;
+ eeh_stats.total_mmio_ffs++;
if (!eeh_subsystem_enabled)
return 0;
if (!dn) {
- no_dn++;
+ eeh_stats.no_dn++;
return 0;
}
dn = eeh_find_device_pe(dn);
@@ -407,14 +415,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
/* Access to IO BARs might get this far and still not want checking. */
if (!(edev->mode & EEH_MODE_SUPPORTED) ||
edev->mode & EEH_MODE_NOCHECK) {
- ignored_check++;
+ eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check (%x) for %s %s\n",
edev->mode, eeh_pci_name(dev), dn->full_name);
return 0;
}
if (!edev->config_addr && !edev->pe_config_addr) {
- no_cfg_addr++;
+ eeh_stats.no_cfg_addr++;
return 0;
}
@@ -460,13 +468,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
(ret == EEH_STATE_NOT_SUPPORT) ||
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
- false_positives++;
+ eeh_stats.false_positives++;
edev->false_positives ++;
rc = 0;
goto dn_unlock;
}
- slot_resets++;
+ eeh_stats.slot_resets++;
/* Avoid repeated reports of this failure, including problems
* with other functions on this device, and functions under
@@ -513,7 +521,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
addr = eeh_token_to_phys((unsigned long __force) token);
dev = pci_addr_cache_get_device(addr);
if (!dev) {
- no_device++;
+ eeh_stats.no_device++;
return val;
}
@@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
{
if (0 == eeh_subsystem_enabled) {
seq_printf(m, "EEH Subsystem is globally disabled\n");
- seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
+ seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
} else {
seq_printf(m, "EEH Subsystem is enabled\n");
seq_printf(m,
- "no device=%ld\n"
- "no device node=%ld\n"
- "no config address=%ld\n"
- "check not wanted=%ld\n"
- "eeh_total_mmio_ffs=%ld\n"
- "eeh_false_positives=%ld\n"
- "eeh_slot_resets=%ld\n",
- no_device, no_dn, no_cfg_addr,
- ignored_check, total_mmio_ffs,
- false_positives,
- slot_resets);
+ "no device =%llu\n"
+ "no device node =%llu\n"
+ "no config address =%llu\n"
+ "check not wanted =%llu\n"
+ "eeh_total_mmio_ffs =%llu\n"
+ "eeh_false_positives =%llu\n"
+ "eeh_slot_resets =%llu\n",
+ eeh_stats.no_device,
+ eeh_stats.no_dn,
+ eeh_stats.no_cfg_addr,
+ eeh_stats.ignored_check,
+ eeh_stats.total_mmio_ffs,
+ eeh_stats.false_positives,
+ eeh_stats.slot_resets);
}
return 0;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v5 00/21] EEH reorganization
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (20 preceding siblings ...)
2012-02-28 6:04 ` [PATCH 21/21] pSeries platform config space access in EEH Gavin Shan
@ 2012-02-29 3:04 ` Gavin Shan
2012-04-12 21:39 ` Anton Blanchard
22 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-02-29 3:04 UTC (permalink / raw)
To: benh; +Cc: linuxppc-dev
Hi Ben,
Could you pls take a look on this when you have time?
Thanks,
Gavin
> This series of patches is going to reorganize EEH so that it could support
> multiple platforms in future. The requirements were raised from the aspects.
>
> * The original EEH implementation only support pSeries platform, which
> would be regarded as guest system. Platform powernv is coming and EEH
> needs to be supported on powernv as well.
> * Different platforms might be running based on variable firmware.Further
> more, the firmware would supply different EEH interfaces to kernel.
> Therefore, we have to do necessary abstraction on current EEH implementation.
>
> In order to accomodate the requirements, the series of patches have reorganized
> current EEH implementation.
>
> * The original implementation looks not clean enough. Necessary cleanup
> will be done in some of the patches.
> * struct eeh_ops has been introduced so that EEH core components and platform
> dependent implementation could be split up. That make it possible for EEH
> to be supported on multiple platforms.
> * struct eeh_dev has been introduced to replace struct pci_dn so that EEH module
> works independently as much as possible.
> * EEH global statistics will be maintained in a collective fashion.
>
> v1 -> v2:
>
> * If possible, to add "eeh_" prefix for function names.
> * The format of leading function comments won't be changed in order not to
> break kernel document automatic generation (e.g. by "make pdfdocs").
> * The name of local variables won't be changed if there're no explicit reasons.
> * Represent the PE's state in bitmap fasion.
> * Some function names have been adjusted so that they look shorter and
> meaningful.
> * Platform operation name has been changed to "pseries".
> * Merge those patches for cleanup if possible.
> * The line length is kept as appropriately short if possible.
> * Fixup on alignment & spacing issues.
>
> v2 -> v3:
> * Split cleanup patch into 2: one for comment cleanup and another one for
> renaming function names.
> * Try to use pr_warning/pr_info/pr_debug instead of printk() function call.
> * Function names are adjusted a little bit so that they looks more meaningful
> according to comments from Michael/Ben.
> * Useful comment has been kept according to Michael's comments.
> * struct eeh_ops::set_eeh has been changed to eeh_ops::set_option.
> * struct eeh_ops::name has been changed to "char *".
> * Remove file name from the source file.
> * Copyright (C) format has been changed since "(C)" isn't encouraged to use.
> * The header files included in the source file have been sorted alphabetically.
> * eeh_platform_init() has been replaced by eeh_pseries_init() to avoid duplicate
> functions when kernel supports multiple platforms.
> * "F/W" has been changed to "Firmware".
> * The maximal wait time to retrieve PE's state has been covered by macro.
> * It also include changes according to the minor comments from Michael.
>
> v3 -> v4:
> * Fix some typo included in the commit messages.
> * Reduce code nesting according to Ram's suggestions.
> * Addtinal pr_warning on failure of configuring bridges.
>
> v4 -> v5:
> * OF node and PCI device are tracing the corresponding eeh device.
> That has been changed to "struct eeh_dev *" instead of the original
> "void *".
> * The conversion between OF node, PCI device, eeh device is changed
> to inline functions instead of the original macros.
> * The "struct eeh_stats" has been moved from eeh.h to eeh.c. Besides,
> the individual members of the struct have been changed to fixed-type
> "unsigned int".
>
>
> The series of patches (v5) has been verified on Firebird-L machine. In order to carry out
> the test, you have to install IBM Power Tools from IBM internal yum source. Following
> command is used to force EEH check on ethernet interface, which could be recovered eventually
> by EEH and device driver successfully. You could keep pinging to the blade before issuing
> the following command to force EEH. You should see the network interface can't be reached for
> a moment and everything will be recovered couple of seconds after the forced EEH error. At the
> same time, you should see EEH error log out of system console.
>
> * errinjct eeh -v -f 0 -p U78AE.001.WZS00M9-P1-C18-L1-T2 -a 0x0 -m 0x0
>
> -----
>
> arch/powerpc/include/asm/device.h | 3 +
> arch/powerpc/include/asm/eeh.h | 134 +++-
> arch/powerpc/include/asm/eeh_event.h | 33 +-
> arch/powerpc/include/asm/ppc-pci.h | 89 +--
> arch/powerpc/kernel/of_platform.c | 3 +
> arch/powerpc/kernel/rtas_pci.c | 3 +
> arch/powerpc/platforms/pseries/Makefile | 3 +-
> arch/powerpc/platforms/pseries/eeh.c | 1044 ++++++++++++--------------
> arch/powerpc/platforms/pseries/eeh_cache.c | 44 +-
> arch/powerpc/platforms/pseries/eeh_dev.c | 102 +++
> arch/powerpc/platforms/pseries/eeh_driver.c | 213 +++---
> arch/powerpc/platforms/pseries/eeh_event.c | 55 +-
> arch/powerpc/platforms/pseries/eeh_pseries.c | 565 ++++++++++++++
> arch/powerpc/platforms/pseries/eeh_sysfs.c | 25 +-
> arch/powerpc/platforms/pseries/msi.c | 2 +-
> arch/powerpc/platforms/pseries/pci_dlpar.c | 3 +
> arch/powerpc/platforms/pseries/setup.c | 7 +-
> include/linux/of.h | 10 +
> include/linux/pci.h | 7 +
> 19 files changed, 1477 insertions(+), 868 deletions(-)
>
> Thanks,
> Gavin
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH
2012-02-28 6:04 ` [PATCH 20/21] Introduce struct eeh_stats for EEH Gavin Shan
2012-02-28 10:04 ` David Laight
2012-02-29 2:25 ` Gavin Shan
@ 2012-02-29 12:56 ` Michael Ellerman
2012-03-01 1:14 ` Gavin Shan
2012-03-01 1:47 ` [PATCH 20/21] Introduce struct eeh_stats for EEH - Reworked Gavin Shan
3 siblings, 1 reply; 36+ messages in thread
From: Michael Ellerman @ 2012-02-29 12:56 UTC (permalink / raw)
To: Gavin Shan; +Cc: linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 1405 bytes --]
On Tue, 2012-02-28 at 14:04 +0800, Gavin Shan wrote:
> With the original EEH implementation, the EEH global statistics
> are maintained by individual global variables. That makes the
> code a little hard to maintain.
Hi Gavin,
> @@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
> {
> if (0 == eeh_subsystem_enabled) {
> seq_printf(m, "EEH Subsystem is globally disabled\n");
> - seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
> + seq_printf(m, "eeh_total_mmio_ffs=%d\n", eeh_stats.total_mmio_ffs);
> } else {
> seq_printf(m, "EEH Subsystem is enabled\n");
> seq_printf(m,
> - "no device=%ld\n"
> - "no device node=%ld\n"
> - "no config address=%ld\n"
> - "check not wanted=%ld\n"
> - "eeh_total_mmio_ffs=%ld\n"
> - "eeh_false_positives=%ld\n"
> - "eeh_slot_resets=%ld\n",
> - no_device, no_dn, no_cfg_addr,
> - ignored_check, total_mmio_ffs,
> - false_positives,
> - slot_resets);
> + "no device =%d\n"
> + "no device node =%d\n"
> + "no config address =%d\n"
> + "check not wanted =%d\n"
> + "eeh_total_mmio_ffs =%d\n"
> + "eeh_false_positives =%d\n"
> + "eeh_slot_resets =%d\n",
There *might* be tools out there that parse this output, so I'd say
don't change it unless you have to - and I don't think you have to?
cheers
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH
2012-02-29 12:56 ` Michael Ellerman
@ 2012-03-01 1:14 ` Gavin Shan
0 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-03-01 1:14 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
> > With the original EEH implementation, the EEH global statistics
> > are maintained by individual global variables. That makes the
> > code a little hard to maintain.
>
> Hi Gavin,
>
> > @@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
> > {
> > if (0 == eeh_subsystem_enabled) {
> > seq_printf(m, "EEH Subsystem is globally disabled\n");
> > - seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
> > + seq_printf(m, "eeh_total_mmio_ffs=%d\n", eeh_stats.total_mmio_ffs);
> > } else {
> > seq_printf(m, "EEH Subsystem is enabled\n");
> > seq_printf(m,
> > - "no device=%ld\n"
> > - "no device node=%ld\n"
> > - "no config address=%ld\n"
> > - "check not wanted=%ld\n"
> > - "eeh_total_mmio_ffs=%ld\n"
> > - "eeh_false_positives=%ld\n"
> > - "eeh_slot_resets=%ld\n",
> > - no_device, no_dn, no_cfg_addr,
> > - ignored_check, total_mmio_ffs,
> > - false_positives,
> > - slot_resets);
> > + "no device =%d\n"
> > + "no device node =%d\n"
> > + "no config address =%d\n"
> > + "check not wanted =%d\n"
> > + "eeh_total_mmio_ffs =%d\n"
> > + "eeh_false_positives =%d\n"
> > + "eeh_slot_resets =%d\n",
>
> There *might* be tools out there that parse this output, so I'd say
> don't change it unless you have to - and I don't think you have to?
>
Thanks for catching the point, Michael. I will change it back soon ;-)
Thanks,
Gavin
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH - Reworked
2012-02-28 6:04 ` [PATCH 20/21] Introduce struct eeh_stats for EEH Gavin Shan
` (2 preceding siblings ...)
2012-02-29 12:56 ` Michael Ellerman
@ 2012-03-01 1:47 ` Gavin Shan
3 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-03-01 1:47 UTC (permalink / raw)
To: linuxppc-dev
With the original EEH implementation, the EEH global statistics
are maintained by individual global variables. That makes the
code a little hard to maintain.
The patch introduces extra struct eeh_stats for the EEH global
statistics so that it can be maintained in collective fashion.
It's the rework on the corresponding v5 patch. According to
the comments from David Laight, the EEH global statistics have
been changed for a litte bit so that they have fixed-type of
"u64". Also, the format used to print them has been changed to
"%llu" based on David's suggestion. Also, the output format of
EEH global statistics should be kept as intacted according to
Michael's suggestion that there might be tools parsing them.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh.c | 65 ++++++++++++++++++++--------------
1 files changed, 38 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 9b1fd0c..1d08cd7 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -102,14 +102,22 @@ static DEFINE_RAW_SPINLOCK(confirm_error_lock);
#define EEH_PCI_REGS_LOG_LEN 4096
static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
-/* System monitoring statistics */
-static unsigned long no_device;
-static unsigned long no_dn;
-static unsigned long no_cfg_addr;
-static unsigned long ignored_check;
-static unsigned long total_mmio_ffs;
-static unsigned long false_positives;
-static unsigned long slot_resets;
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+ u64 no_device; /* PCI device not found */
+ u64 no_dn; /* OF node not found */
+ u64 no_cfg_addr; /* Config address not found */
+ u64 ignored_check; /* EEH check skipped */
+ u64 total_mmio_ffs; /* Total EEH checks */
+ u64 false_positives; /* Unnecessary EEH checks */
+ u64 slot_resets; /* PE reset */
+};
+
+static struct eeh_stats eeh_stats;
#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
@@ -392,13 +400,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
int rc = 0;
const char *location;
- total_mmio_ffs++;
+ eeh_stats.total_mmio_ffs++;
if (!eeh_subsystem_enabled)
return 0;
if (!dn) {
- no_dn++;
+ eeh_stats.no_dn++;
return 0;
}
dn = eeh_find_device_pe(dn);
@@ -407,14 +415,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
/* Access to IO BARs might get this far and still not want checking. */
if (!(edev->mode & EEH_MODE_SUPPORTED) ||
edev->mode & EEH_MODE_NOCHECK) {
- ignored_check++;
+ eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check (%x) for %s %s\n",
edev->mode, eeh_pci_name(dev), dn->full_name);
return 0;
}
if (!edev->config_addr && !edev->pe_config_addr) {
- no_cfg_addr++;
+ eeh_stats.no_cfg_addr++;
return 0;
}
@@ -460,13 +468,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
(ret == EEH_STATE_NOT_SUPPORT) ||
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
- false_positives++;
+ eeh_stats.false_positives++;
edev->false_positives ++;
rc = 0;
goto dn_unlock;
}
- slot_resets++;
+ eeh_stats.slot_resets++;
/* Avoid repeated reports of this failure, including problems
* with other functions on this device, and functions under
@@ -513,7 +521,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
addr = eeh_token_to_phys((unsigned long __force) token);
dev = pci_addr_cache_get_device(addr);
if (!dev) {
- no_device++;
+ eeh_stats.no_device++;
return val;
}
@@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
{
if (0 == eeh_subsystem_enabled) {
seq_printf(m, "EEH Subsystem is globally disabled\n");
- seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
+ seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
} else {
seq_printf(m, "EEH Subsystem is enabled\n");
seq_printf(m,
- "no device=%ld\n"
- "no device node=%ld\n"
- "no config address=%ld\n"
- "check not wanted=%ld\n"
- "eeh_total_mmio_ffs=%ld\n"
- "eeh_false_positives=%ld\n"
- "eeh_slot_resets=%ld\n",
- no_device, no_dn, no_cfg_addr,
- ignored_check, total_mmio_ffs,
- false_positives,
- slot_resets);
+ "no device=%llu\n"
+ "no device node=%llu\n"
+ "no config address=%llu\n"
+ "check not wanted=%llu\n"
+ "eeh_total_mmio_ffs=%llu\n"
+ "eeh_false_positives=%llu\n"
+ "eeh_slot_resets=%llu\n",
+ eeh_stats.no_device,
+ eeh_stats.no_dn,
+ eeh_stats.no_cfg_addr,
+ eeh_stats.ignored_check,
+ eeh_stats.total_mmio_ffs,
+ eeh_stats.false_positives,
+ eeh_stats.slot_resets);
}
return 0;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v5 00/21] EEH reorganization
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
` (21 preceding siblings ...)
2012-02-29 3:04 ` [PATCH v5 00/21] EEH reorganization Gavin Shan
@ 2012-04-12 21:39 ` Anton Blanchard
2012-04-13 2:03 ` Anton Blanchard
22 siblings, 1 reply; 36+ messages in thread
From: Anton Blanchard @ 2012-04-12 21:39 UTC (permalink / raw)
To: Gavin Shan; +Cc: linuxppc-dev
Hi Gavin,
> This series of patches is going to reorganize EEH so that it could
> support multiple platforms in future. The requirements were raised
> from the aspects.
I just hit this on mainline from today (3.4.0-rc2-00065-gf549e08).
Haven't had a chance to narrow it down yet.
Oops: Kernel access of bad area, sig: 11 [#1]
SMP NR_CPUS=1024 NUMA pSeries
Modules linked in:
NIP: c000000000055af8 LR: c000000000033204 CTR: 0000000000000000
REGS: c000001f42fb7990 TRAP: 0300 Tainted: G W (3.4.0-rc2-00065-gf549e08-dirty)
MSR: 8000000000009032 <SF,EE,ME,IR,DR,RI> CR: 24008084 XER: 00000000
SOFTE: 1
CFAR: 00000000000049b8
DAR: 0000000000000070, DSISR: 40000000
TASK = c000001f6c7dfc40[19010] 'eehd' THREAD: c000001f42fb4000 CPU: 6
GPR00: 0000000000000001 c000001f42fb7c10 c000000000bd3a28 c000001f80ab0800
GPR04: c000001f7c57d418 0000000000000380 c000001f7c57e070 c000000000ed5360
GPR08: 0000000000000000 c000000000c77088 0000000000000000 0000000000000001
GPR12: 0000000044008088 c00000000eda1500 00000000019ffa78 0000000000a70000
GPR16: 00000000000000bb c000000000a9f754 c000000000963230 000000000000005e
GPR20: 0000000001b37e80 00000000000000bb 0000000000000000 c000000000b0ad90
GPR24: 0000000000000000 c000000000b10588 0000000000000001 c000001f80ab0800
GPR28: 0000000000000000 c000001f80ab0828 0000000000000000 c000001f7ee10000
NIP [c000000000055af8] .eeh_add_device_tree_late+0x58/0xf0
LR [c000000000033204] .pcibios_finish_adding_to_bus+0x34/0x50
Call Trace:
[c000001f42fb7c10] [00000000fdffffff] 0xfdffffff (unreliable)
[c000001f42fb7ca0] [c000000000033204] .pcibios_finish_adding_to_bus+0x34/0x50
[c000001f42fb7d20] [c000000000059a5c] .pcibios_add_pci_devices+0x7c/0x190
[c000001f42fb7db0] [c000000000057a6c] .eeh_reset_device+0xfc/0x1a0
[c000001f42fb7e50] [c000000000057e18] .handle_eeh_events+0x308/0x480
[c000001f42fb7f00] [c0000000000584dc] .eeh_event_handler+0x13c/0x1d0
[c000001f42fb7f90] [c00000000002099c] .kernel_thread+0x54/0x70
Instruction dump:
480000a8 60000000 ebff0000 7fbfe800 419e0098 2fbf0000 419e005c e9229eb0
80090008 2f800000 419e004c ebdf01d0 <e81e0070> 7fbf0000 3160ffff
7d2b0110
Anton
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v5 00/21] EEH reorganization
2012-04-12 21:39 ` Anton Blanchard
@ 2012-04-13 2:03 ` Anton Blanchard
2012-04-17 1:29 ` Gavin Shan
0 siblings, 1 reply; 36+ messages in thread
From: Anton Blanchard @ 2012-04-13 2:03 UTC (permalink / raw)
To: Gavin Shan; +Cc: linuxppc-dev
Hi,
> I just hit this on mainline from today (3.4.0-rc2-00065-gf549e08).
> Haven't had a chance to narrow it down yet.
Looking closer, it was caused by an EEH error at boot. It looks like
the Mellanox infiniband card gets an error when probed by their
firmware tool (mstmread), but only if the kernel driver is not loaded.
I see this EEH error back on 3.0, so it's not new.
The question now is why we oops in the EEH code on mainline.
Anton
------------[ cut here ]------------
WARNING: at arch/powerpc/platforms/pseries/eeh.c:492
Modules linked in:
NIP: c000000000056cc4 LR: c000000000056cc0 CTR: c00000000051dd60
REGS: c000001f3953f6a0 TRAP: 0700 Not tainted (3.4.0-rc2-00065-gf549e08-dirty)
MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI> CR: 28004482 XER: 0000000f
SOFTE: 0
CFAR: c00000000074ea30
TASK = c000001f39685040[19058] 'mstmread' THREAD: c000001f3953c000 CPU: 38
GPR00: c000000000056cc0 c000001f3953f920 c000000000bd3a28 0000000000000021
GPR04: 0000000000000000 ffffffffffffffff 00000000000323f7 0000000000000000
GPR08: 000000006365203c c000000000b10a20 0000000000020000 c000000000a74cc0
GPR12: 0000000024004422 c00000000eda8500 000000003a58582e 00000000583a5858
GPR16: 000000002f585858 0000000069636573 000000002f646576 0000000010003b48
GPR20: 00000fffc7a3d17c 0000000000000058 0000000000000004 c000001f3953fb90
GPR24: 0000000000000000 0000000000000000 c000000000c77088 c000003e6fffeee8
GPR28: c000000000d82680 0000000000000000 c000000000c770d0 0000000000000000
NIP [c000000000056cc4] .eeh_dn_check_failure+0x304/0x320
LR [c000000000056cc0] .eeh_dn_check_failure+0x300/0x320
Call Trace:
[c000001f3953f920] [c000000000056cc0] .eeh_dn_check_failure+0x300/0x320 (unreliable)
[c000001f3953f9d0] [c00000000002717c] .rtas_read_config+0x13c/0x1b0
[c000001f3953fa70] [c0000000003d543c] .pci_user_read_config_dword+0xcc/0x150
[c000001f3953fb20] [c0000000003e19d8] .pci_read_config+0xe8/0x2a0
[c000001f3953fc00] [c00000000022d330] .read+0x130/0x210
[c000001f3953fce0] [c0000000001a723c] .vfs_read+0xec/0x1e0
[c000001f3953fd80] [c0000000001a73ec] .SyS_pread64+0xbc/0xd0
[c000001f3953fe30] [c000000000009780] syscall_exit+0x0/0x7c
Instruction dump:
7f83e378 48001909 60000000 2fbf0000 419e002c e89f00d8 2fa40000 409e0008
e89f0098 e8629fb8 486f7d39 60000000 <0fe00000> 3b200001 4bfffdb4 e8829fa8
---[ end trace a6e6d788c9869e00 ]---
EEH: Detected PCI bus error on device 0006:01:00.0
EEH: This PCI device has failed 1 times in the last hour:
EEH: Bus location=U78AB.001.WZSGRFL-P1-C4-T1 driver= pci addr=0006:01:00.0
EEH: Device location=U78AB.001.WZSGRFL-P1-C4-T1 driver= pci addr=0006:01:00.0
EEH: of node=/pci@800000020000203/pci1014,415@0
EEH: PCI device/vendor: 673c15b3
EEH: PCI cmd/status register: 00100140
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v5 00/21] EEH reorganization
2012-04-13 2:03 ` Anton Blanchard
@ 2012-04-17 1:29 ` Gavin Shan
2012-04-17 1:37 ` Anton Blanchard
0 siblings, 1 reply; 36+ messages in thread
From: Gavin Shan @ 2012-04-17 1:29 UTC (permalink / raw)
To: Anton Blanchard; +Cc: linuxppc-dev
>> I just hit this on mainline from today (3.4.0-rc2-00065-gf549e08).
>> Haven't had a chance to narrow it down yet.
Thanks for the information. I'll try to reproduce the issue on
Firebird-L today. By the way, it seems that "mstmread" is some
user-level application accessing the config space while the problem
happened?
>
>Looking closer, it was caused by an EEH error at boot. It looks like
>the Mellanox infiniband card gets an error when probed by their
>firmware tool (mstmread), but only if the kernel driver is not loaded.
>I see this EEH error back on 3.0, so it's not new.
>
>The question now is why we oops in the EEH code on mainline.
>
It seems the crash was caused by something like WARN_ON(). I checked
the function pointed by the backtrace (eeh_dn_check_failure) and I
didn't find any place has called WARN_ON() staff. Maybe I missed something
here.
Anyway, I'll try to reproduce it on Firebird-L machine first of all
and then narrow it down.
>Anton
>
Thanks,
Gavin
>------------[ cut here ]------------
>WARNING: at arch/powerpc/platforms/pseries/eeh.c:492
>Modules linked in:
>NIP: c000000000056cc4 LR: c000000000056cc0 CTR: c00000000051dd60
>REGS: c000001f3953f6a0 TRAP: 0700 Not tainted (3.4.0-rc2-00065-gf549e08-dirty)
>MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI> CR: 28004482 XER: 0000000f
>SOFTE: 0
>CFAR: c00000000074ea30
>TASK = c000001f39685040[19058] 'mstmread' THREAD: c000001f3953c000 CPU: 38
>GPR00: c000000000056cc0 c000001f3953f920 c000000000bd3a28 0000000000000021
>GPR04: 0000000000000000 ffffffffffffffff 00000000000323f7 0000000000000000
>GPR08: 000000006365203c c000000000b10a20 0000000000020000 c000000000a74cc0
>GPR12: 0000000024004422 c00000000eda8500 000000003a58582e 00000000583a5858
>GPR16: 000000002f585858 0000000069636573 000000002f646576 0000000010003b48
>GPR20: 00000fffc7a3d17c 0000000000000058 0000000000000004 c000001f3953fb90
>GPR24: 0000000000000000 0000000000000000 c000000000c77088 c000003e6fffeee8
>GPR28: c000000000d82680 0000000000000000 c000000000c770d0 0000000000000000
>NIP [c000000000056cc4] .eeh_dn_check_failure+0x304/0x320
>LR [c000000000056cc0] .eeh_dn_check_failure+0x300/0x320
>Call Trace:
>[c000001f3953f920] [c000000000056cc0] .eeh_dn_check_failure+0x300/0x320 (unreliable)
>[c000001f3953f9d0] [c00000000002717c] .rtas_read_config+0x13c/0x1b0
>[c000001f3953fa70] [c0000000003d543c] .pci_user_read_config_dword+0xcc/0x150
>[c000001f3953fb20] [c0000000003e19d8] .pci_read_config+0xe8/0x2a0
>[c000001f3953fc00] [c00000000022d330] .read+0x130/0x210
>[c000001f3953fce0] [c0000000001a723c] .vfs_read+0xec/0x1e0
>[c000001f3953fd80] [c0000000001a73ec] .SyS_pread64+0xbc/0xd0
>[c000001f3953fe30] [c000000000009780] syscall_exit+0x0/0x7c
>Instruction dump:
>7f83e378 48001909 60000000 2fbf0000 419e002c e89f00d8 2fa40000 409e0008
>e89f0098 e8629fb8 486f7d39 60000000 <0fe00000> 3b200001 4bfffdb4 e8829fa8
>---[ end trace a6e6d788c9869e00 ]---
>EEH: Detected PCI bus error on device 0006:01:00.0
>EEH: This PCI device has failed 1 times in the last hour:
>EEH: Bus location=U78AB.001.WZSGRFL-P1-C4-T1 driver= pci addr=0006:01:00.0
>EEH: Device location=U78AB.001.WZSGRFL-P1-C4-T1 driver= pci addr=0006:01:00.0
>EEH: of node=/pci@800000020000203/pci1014,415@0
>EEH: PCI device/vendor: 673c15b3
>EEH: PCI cmd/status register: 00100140
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v5 00/21] EEH reorganization
2012-04-17 1:29 ` Gavin Shan
@ 2012-04-17 1:37 ` Anton Blanchard
2012-04-17 1:57 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 36+ messages in thread
From: Anton Blanchard @ 2012-04-17 1:37 UTC (permalink / raw)
To: Gavin Shan; +Cc: linuxppc-dev
Hi,
> Thanks for the information. I'll try to reproduce the issue on
> Firebird-L today. By the way, it seems that "mstmread" is some
> user-level application accessing the config space while the problem
> happened?
The EEH error is caused by the Melanox firmware tools.
> It seems the crash was caused by something like WARN_ON(). I checked
> the function pointed by the backtrace (eeh_dn_check_failure) and I
> didn't find any place has called WARN_ON() staff. Maybe I missed
> something here.
No. I replaced that backtrace in eeh_dn_check_failure with a WARN_ON()
because the backtrace doesn't give us enough info. I'm submitting a
patch for that today.
Bottom line is mstmread has been causing an EEH error since at least
3.0, but in 3.4 we now oops instead of recovering. The signs all point
to the EEH rework in 3.4.
Anton
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v5 00/21] EEH reorganization
2012-04-17 1:37 ` Anton Blanchard
@ 2012-04-17 1:57 ` Benjamin Herrenschmidt
2012-04-17 5:30 ` Gavin Shan
0 siblings, 1 reply; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2012-04-17 1:57 UTC (permalink / raw)
To: Gavin Shan; +Cc: linuxppc-dev, Anton Blanchard
On Tue, 2012-04-17 at 11:37 +1000, Anton Blanchard wrote:
>
> No. I replaced that backtrace in eeh_dn_check_failure with a WARN_ON()
> because the backtrace doesn't give us enough info. I'm submitting a
> patch for that today.
>
> Bottom line is mstmread has been causing an EEH error since at least
> 3.0, but in 3.4 we now oops instead of recovering. The signs all point
> to the EEH rework in 3.4.
More precisely, the original oops reported by Anton decodes as such:
>Oops: Kernel access of bad area, sig: 11 [#1]
This is typically a bad memory access..
>SMP NR_CPUS=1024 NUMA pSeries
>Modules linked in:
>NIP: c000000000055af8 LR: c000000000033204 CTR: 0000000000000000
>REGS: c000001f42fb7990 TRAP: 0300 Tainted: G W (3.4.0-rc2-00065-gf549e08-dirty)
TRAP: 300 means that it's the result of a data access interrupts, ie,
load or store to a bad address
>MSR: 8000000000009032 <SF,EE,ME,IR,DR,RI> CR: 24008084 XER: 00000000
>SOFTE: 1
>CFAR: 00000000000049b8
>DAR: 0000000000000070, DSISR: 40000000
Here the DAR tells us what address was accessed. 0x70 is a strong indication
that this was an access to a NULL pointer (at offset 0x70 from that pointer).
It -might- be something else (such as a NULL passed to a list head or such)
but the idea that there's a NULL floating around is a good hint.
>TASK = c000001f6c7dfc40[19010] 'eehd' THREAD: c000001f42fb4000 CPU: 6
>GPR00: 0000000000000001 c000001f42fb7c10 c000000000bd3a28 c000001f80ab0800
>GPR04: c000001f7c57d418 0000000000000380 c000001f7c57e070 c000000000ed5360
>GPR08: 0000000000000000 c000000000c77088 0000000000000000 0000000000000001
>GPR12: 0000000044008088 c00000000eda1500 00000000019ffa78 0000000000a70000
>GPR16: 00000000000000bb c000000000a9f754 c000000000963230 000000000000005e
>GPR20: 0000000001b37e80 00000000000000bb 0000000000000000 c000000000b0ad90
>GPR24: 0000000000000000 c000000000b10588 0000000000000001 c000001f80ab0800
>GPR28: 0000000000000000 c000001f80ab0828 0000000000000000 c000001f7ee10000
>NIP [c000000000055af8] .eeh_add_device_tree_late+0x58/0xf0
This is the function where it happened (eeh_add_device_tree_late)
>LR [c000000000033204] .pcibios_finish_adding_to_bus+0x34/0x50
>Call Trace:
>[c000001f42fb7c10] [00000000fdffffff] 0xfdffffff (unreliable)
>[c000001f42fb7ca0] [c000000000033204] .pcibios_finish_adding_to_bus+0x34/0x50
>[c000001f42fb7d20] [c000000000059a5c] .pcibios_add_pci_devices+0x7c/0x190
>[c000001f42fb7db0] [c000000000057a6c] .eeh_reset_device+0xfc/0x1a0
>[c000001f42fb7e50] [c000000000057e18] .handle_eeh_events+0x308/0x480
>[c000001f42fb7f00] [c0000000000584dc] .eeh_event_handler+0x13c/0x1d0
>[c000001f42fb7f90] [c00000000002099c] .kernel_thread+0x54/0x70
And your backtrace. You can see that you got an eeh event, which triggered an
eeh reset, which triggered a pcibios_add_pci_devices() etc...
>Instruction dump:
>480000a8 60000000 ebff0000 7fbfe800 419e0098 2fbf0000 419e005c e9229eb0
>80090008 2f800000 419e004c ebdf01d0 <e81e0070> 7fbf0000 3160ffff
>7d2b0110
Cheers,
Ben.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v5 00/21] EEH reorganization
2012-04-17 1:57 ` Benjamin Herrenschmidt
@ 2012-04-17 5:30 ` Gavin Shan
0 siblings, 0 replies; 36+ messages in thread
From: Gavin Shan @ 2012-04-17 5:30 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Anton Blanchard
Ben, thanks a lot for the backtrace to help narrowing down the root
cause. Also thanks a lot for how to parse the backtrace and register
staff printed by oops ;-)
Finally, I successfully reproduced the issue on Firebird-L machine
without loading the corresponding device driver for Emulex ethernet
by disable the corresponding config options in .config. With injected
config space data parity error destined to the Emulex ethernet MAC,
I saw following backtrace. The problem came from following piece of
code. Actually, the EEH device should be retrieve from OF node instead
of PCI device since the PCI device didn't trace the corresponding
EEH device yet at that time. I'll send one patch against it soon even
it only need 1 line of code change ;-)
(gdb) p &(((struct eeh_dev *)0)->pdev)
$1 = (struct pci_dev **) 0x70
static void eeh_add_device_late(struct pci_dev *dev)
{
struct device_node *dn;
struct eeh_dev *edev;
if (!dev || !eeh_subsystem_enabled)
return;
dn = pci_device_to_OF_node(dev);
edev = pci_dev_to_eeh_dev(dev); <<< edev should be NULL
if (edev->pdev == dev) { <<< data access fault here.
pr_debug("EEH: Already referenced !\n");
return;
}
WARN_ON(edev->pdev);
:
:
}
[ 176.972046] Unable to handle kernel paging request for data at address 0x00000070
[ 176.972054] Faulting instruction address: 0xc000000000055ecc
[ 176.972064] Oops: Kernel access of bad area, sig: 11 [#1]
[ 176.972070] SMP NR_CPUS=1024 NUMA pSeries
[ 176.972078] Modules linked in:
[ 176.972086] NIP: c000000000055ecc LR: c000000000055ec8 CTR: c00000000005babc
[ 176.972102] REGS: c000000f4d913970 TRAP: 0300 Not tainted (3.4.0-rc2+)
[ 176.972109] MSR: 8000000000009032 <SF,EE,ME,IR,DR,RI> CR: 28000084 XER: 00000009
[ 176.972129] SOFTE: 1
[ 176.972133] CFAR: c000000000005080
[ 176.972138] DAR: 0000000000000070, DSISR: 40000000
[ 176.972146] TASK = c000000f4d8c3600[1038] 'eehd' THREAD: c000000f4d910000 CPU: 24
[ 176.972155] GPR00: c000000000055ec8 c000000f4d913bf0 c00000000147ed90 000000000000001e
[ 176.972170] GPR04: 0000000000000000 ffffffffffffffff 0000000000000000 0000000000000000
[ 176.972183] GPR08: 000000004f4e450d c000000000c44208 0000000000036710 0000000000ec0000
[ 176.972197] GPR12: 0000000028000082 c00000000ff25400 0000000000000000 000000000106c9c8
[ 176.972212] GPR16: 0000000002280000 0000000002e5acf0 0000000001aff9a4 0000000000000060
[ 176.972227] GPR20: 0000000000000000 ffffffffffffffff ffffffffffffffff c000000001345c78
[ 176.972241] GPR24: c000000001345c70 0000000000000000 0000000000000000 c000000000851ac0
[ 176.972256] GPR28: c000000000a95ad3 c000000f529f2c28 c000000f529f2c00 c000000f4d880000
[ 176.972276] NIP [c000000000055ecc] .eeh_add_device_tree_late+0x17c/0x2c4
[ 176.972286] LR [c000000000055ec8] .eeh_add_device_tree_late+0x178/0x2c4
[ 176.972294] Call Trace:
[ 176.972300] [c000000f4d913bf0] [c000000000055ec8] .eeh_add_device_tree_late+0x178/0x2c4 (unreliable)
[ 176.972316] [c000000f4d913ca0] [c000000000036bc8] .pcibios_finish_adding_to_bus+0x74/0x90
[ 176.972328] [c000000f4d913d20] [c000000000059b50] .pcibios_add_pci_devices+0x12c/0x150
[ 176.972339] [c000000f4d913db0] [c000000000057c60] .eeh_reset_device+0x10c/0x140
[ 176.972350] [c000000f4d913e50] [c000000000057ee4] .handle_eeh_events+0x250/0x42c
[ 176.972361] [c000000f4d913f10] [c000000000058560] .eeh_event_handler+0xe4/0x178
[ 176.972372] [c000000f4d913f90] [c000000000021550] .kernel_thread+0x54/0x70
[ 176.972380] Instruction dump:
[ 176.972384] eb82a1f0 7f83e378 487dd2e9 60000000 e862a1f8 7f64db78 487dd2d9 60000000
[ 176.972400] eb5f02c0 7f83e378 487dd2c9 60000000 <e81a0070> 7fa0f800 40de0028 e862a188
Thanks,
Gavin
>
>More precisely, the original oops reported by Anton decodes as such:
>
>>Oops: Kernel access of bad area, sig: 11 [#1]
>
>This is typically a bad memory access..
>
>>SMP NR_CPUS=1024 NUMA pSeries
>>Modules linked in:
>>NIP: c000000000055af8 LR: c000000000033204 CTR: 0000000000000000
>>REGS: c000001f42fb7990 TRAP: 0300 Tainted: G W (3.4.0-rc2-00065-gf549e08-dirty)
>
>TRAP: 300 means that it's the result of a data access interrupts, ie,
>load or store to a bad address
>
>>MSR: 8000000000009032 <SF,EE,ME,IR,DR,RI> CR: 24008084 XER: 00000000
>>SOFTE: 1
>>CFAR: 00000000000049b8
>>DAR: 0000000000000070, DSISR: 40000000
>
>Here the DAR tells us what address was accessed. 0x70 is a strong indication
>that this was an access to a NULL pointer (at offset 0x70 from that pointer).
>
>It -might- be something else (such as a NULL passed to a list head or such)
>but the idea that there's a NULL floating around is a good hint.
>
>>TASK = c000001f6c7dfc40[19010] 'eehd' THREAD: c000001f42fb4000 CPU: 6
>>GPR00: 0000000000000001 c000001f42fb7c10 c000000000bd3a28 c000001f80ab0800
>>GPR04: c000001f7c57d418 0000000000000380 c000001f7c57e070 c000000000ed5360
>>GPR08: 0000000000000000 c000000000c77088 0000000000000000 0000000000000001
>>GPR12: 0000000044008088 c00000000eda1500 00000000019ffa78 0000000000a70000
>>GPR16: 00000000000000bb c000000000a9f754 c000000000963230 000000000000005e
>>GPR20: 0000000001b37e80 00000000000000bb 0000000000000000 c000000000b0ad90
>>GPR24: 0000000000000000 c000000000b10588 0000000000000001 c000001f80ab0800
>>GPR28: 0000000000000000 c000001f80ab0828 0000000000000000 c000001f7ee10000
>>NIP [c000000000055af8] .eeh_add_device_tree_late+0x58/0xf0
>
>This is the function where it happened (eeh_add_device_tree_late)
>
>>LR [c000000000033204] .pcibios_finish_adding_to_bus+0x34/0x50
>>Call Trace:
>>[c000001f42fb7c10] [00000000fdffffff] 0xfdffffff (unreliable)
>>[c000001f42fb7ca0] [c000000000033204] .pcibios_finish_adding_to_bus+0x34/0x50
>>[c000001f42fb7d20] [c000000000059a5c] .pcibios_add_pci_devices+0x7c/0x190
>>[c000001f42fb7db0] [c000000000057a6c] .eeh_reset_device+0xfc/0x1a0
>>[c000001f42fb7e50] [c000000000057e18] .handle_eeh_events+0x308/0x480
>>[c000001f42fb7f00] [c0000000000584dc] .eeh_event_handler+0x13c/0x1d0
>>[c000001f42fb7f90] [c00000000002099c] .kernel_thread+0x54/0x70
>
>And your backtrace. You can see that you got an eeh event, which triggered an
>eeh reset, which triggered a pcibios_add_pci_devices() etc...
>
>>Instruction dump:
>>480000a8 60000000 ebff0000 7fbfe800 419e0098 2fbf0000 419e005c e9229eb0
>>80090008 2f800000 419e004c ebdf01d0 <e81e0070> 7fbf0000 3160ffff
>>7d2b0110
>
>Cheers,
>Ben.
>
>
^ permalink raw reply [flat|nested] 36+ messages in thread
end of thread, other threads:[~2012-04-17 5:31 UTC | newest]
Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-28 6:03 [PATCH v5 00/21] EEH reorganization Gavin Shan
2012-02-28 6:03 ` [PATCH 01/21] Cleanup on comments of EEH core Gavin Shan
2012-02-28 6:03 ` [PATCH 02/21] Cleanup on function names " Gavin Shan
2012-02-28 6:03 ` [PATCH 03/21] Platform dependent EEH operations Gavin Shan
2012-02-28 6:03 ` [PATCH 04/21] pSeries platform EEH initialization Gavin Shan
2012-02-28 6:03 ` [PATCH 05/21] pSeries platform EEH operation Gavin Shan
2012-02-28 6:03 ` [PATCH 06/21] pSeries platform EEH PE address retrieval Gavin Shan
2012-02-28 6:03 ` [PATCH 07/21] pSeries platform PE state retrieval Gavin Shan
2012-02-28 6:03 ` [PATCH 08/21] pSeries platform EEH wait PE state Gavin Shan
2012-02-28 6:03 ` [PATCH 09/21] pSeries platform EEH reset PE Gavin Shan
2012-02-28 6:04 ` [PATCH 10/21] pSeries platform EEH error log retrieval Gavin Shan
2012-02-28 6:04 ` [PATCH 11/21] pSeries platform EEH configure bridge Gavin Shan
2012-02-28 6:04 ` [PATCH 12/21] Cleanup on comments of EEH aux components Gavin Shan
2012-02-28 6:04 ` [PATCH 13/21] Cleanup on function names " Gavin Shan
2012-02-28 6:04 ` [PATCH 14/21] Introduce EEH device Gavin Shan
2012-02-28 6:04 ` [PATCH 15/21] Replace pci_dn with eeh_dev for EEH sysfs Gavin Shan
2012-02-28 6:04 ` [PATCH 16/21] Replace pci_dn with eeh_dev for EEH address cache Gavin Shan
2012-02-28 6:04 ` [PATCH 17/21] Replace pci_dn with eeh_dev for EEH core Gavin Shan
2012-02-28 6:04 ` [PATCH 18/21] Replace pci_dn with eeh_dev for EEH aux components Gavin Shan
2012-02-28 6:04 ` [PATCH 19/21] Replace pci_dn with eeh_dev for EEH on pSeries Gavin Shan
2012-02-28 6:04 ` [PATCH 20/21] Introduce struct eeh_stats for EEH Gavin Shan
2012-02-28 10:04 ` David Laight
2012-02-29 1:08 ` Gavin Shan
2012-02-29 2:25 ` Gavin Shan
2012-02-29 12:56 ` Michael Ellerman
2012-03-01 1:14 ` Gavin Shan
2012-03-01 1:47 ` [PATCH 20/21] Introduce struct eeh_stats for EEH - Reworked Gavin Shan
2012-02-28 6:04 ` [PATCH 21/21] pSeries platform config space access in EEH Gavin Shan
2012-02-29 3:04 ` [PATCH v5 00/21] EEH reorganization Gavin Shan
2012-04-12 21:39 ` Anton Blanchard
2012-04-13 2:03 ` Anton Blanchard
2012-04-17 1:29 ` Gavin Shan
2012-04-17 1:37 ` Anton Blanchard
2012-04-17 1:57 ` Benjamin Herrenschmidt
2012-04-17 5:30 ` Gavin Shan
-- strict thread matches above, loose matches on Subject: below --
2012-02-24 9:37 [PATCH v4 " Gavin Shan
2012-02-24 9:38 ` [PATCH 07/21] pSeries platform PE state retrieval Gavin Shan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).