* Support for PCI Express reset type in EEH
@ 2009-07-14 18:35 Mike Mason
2009-07-15 18:32 ` Mike Mason
2009-07-15 18:45 ` [PATCH] " Mike Mason
0 siblings, 2 replies; 8+ messages in thread
From: Mike Mason @ 2009-07-14 18:35 UTC (permalink / raw)
To: linuxppc-dev, linux-pci, Paul Mackerras, benh, linasvepstas; +Cc: Richard Lary
By default, EEH does what's known as a "hot reset" during error recovery of a PCI Express device. We've found a case where the device needs a "fundamental reset" to recover properly. The current PCI error recovery and EEH frameworks do not support this distinction.
The attached patch (courtesy of Richard Lary) implements a reset type callback that can be used to determine what type of reset a device requires. It is backwards compatible with all other drivers that implement PCI error recovery callbacks. Only drivers that require a fundamental reset need to be changed. So far we're only aware of one driver that has the requirement (qla2xxx). The patch touches mostly EEH and pseries code, but does require a couple of minor additions to the overall PCI error recovery framework.
Signed-off-by: Mike Mason <mmlnx@us.ibm.com>
--- a/arch/powerpc/include/asm/ppc-pci.h 2009-06-09 20:05:27.000000000 -0700
+++ b/arch/powerpc/include/asm/ppc-pci.h 2009-07-13 16:12:31.000000000 -0700
@@ -90,7 +90,9 @@ int rtas_pci_enable(struct pci_dn *pdn,
*
* Returns a non-zero value if the reset failed.
*/
-int rtas_set_slot_reset (struct pci_dn *);
+#define HOT_RESET 1
+#define FUNDAMENTAL_RESET 3
+int rtas_set_slot_reset (struct pci_dn *, int reset_type);
int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
/**
--- a/arch/powerpc/platforms/pseries/eeh.c 2009-06-09 20:05:27.000000000 -0700
+++ b/arch/powerpc/platforms/pseries/eeh.c 2009-07-13 16:27:27.000000000 -0700
@@ -666,7 +666,7 @@ rtas_pci_enable(struct pci_dn *pdn, int
/**
* rtas_pci_slot_reset - raises/lowers the pci #RST line
* @pdn pci device node
- * @state: 1/0 to raise/lower the #RST
+ * @state: 1/3/0 to raise hot-reset/fundamental-reset/lower the #RST
*
* Clear the EEH-frozen condition on a slot. This routine
* asserts the PCI #RST line if the 'state' argument is '1',
@@ -742,9 +742,9 @@ int pcibios_set_pcie_reset_state(struct
* Return 0 if success, else a non-zero value.
*/
-static void __rtas_set_slot_reset(struct pci_dn *pdn)
+static void __rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
{
- rtas_pci_slot_reset (pdn, 1);
+ rtas_pci_slot_reset (pdn, reset_type);
/* The PCI bus requires that the reset be held high for at least
* a 100 milliseconds. We wait a bit longer 'just in case'. */
@@ -766,13 +766,13 @@ static void __rtas_set_slot_reset(struct
msleep (PCI_BUS_SETTLE_TIME_MSEC);
}
-int rtas_set_slot_reset(struct pci_dn *pdn)
+int rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
{
int i, rc;
/* Take three shots at resetting the bus */
for (i=0; i<3; i++) {
- __rtas_set_slot_reset(pdn);
+ __rtas_set_slot_reset(pdn, reset_type);
rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
if (rc == 0)
--- a/arch/powerpc/platforms/pseries/eeh_driver.c 2009-07-13 14:25:24.000000000 -0700
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c 2009-07-13 16:39:16.000000000 -0700
@@ -115,6 +115,34 @@ static void eeh_enable_irq(struct pci_de
/* ------------------------------------------------------- */
/**
+ * eeh_query_reset_type - query each device driver for reset type
+ *
+ * Query each device driver for special reset type if required
+ * merge the device driver responses. Cumulative response
+ * passed back in "userdata".
+ */
+
+static int eeh_query_reset_type(struct pci_dev *dev, void *userdata)
+{
+ enum pci_ers_result rc, *res = userdata;
+ struct pci_driver *driver = dev->driver;
+
+ if (!driver)
+ return 0;
+
+ if (!driver->err_handler ||
+ !driver->err_handler->reset_type)
+ return 0;
+
+ rc = driver->err_handler->reset_type (dev);
+
+ /* A driver that needs a special reset trumps all others */
+ if (rc == PCI_ERS_RESULT_FUNDAMENTAL_RESET ) *res = rc;
+
+ return 0;
+}
+
+/**
* eeh_report_error - report pci error to each device driver
*
* Report an EEH error to each device driver, collect up and
@@ -282,9 +310,12 @@ static int eeh_report_failure(struct pci
* @pe_dn: pointer to a "Partionable Endpoint" device node.
* This is the top-level structure on which pci
* bus resets can be performed.
+ *
+ * reset_type: some devices may require type other than default hot reset.
*/
-static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
+static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus,
+ int reset_type)
{
struct device_node *dn;
int cnt, rc;
@@ -298,7 +329,7 @@ static int eeh_reset_device (struct pci_
/* Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
* up if the reset failed for some reason. */
- rc = rtas_set_slot_reset(pe_dn);
+ rc = rtas_set_slot_reset(pe_dn, reset_type);
if (rc)
return rc;
@@ -343,6 +374,7 @@ struct pci_dn * handle_eeh_events (struc
struct pci_dn *frozen_pdn;
struct pci_bus *frozen_bus;
int rc = 0;
+ int reset_type = HOT_RESET;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
const char *location, *pci_str, *drv_str;
@@ -400,10 +432,16 @@ struct pci_dn * handle_eeh_events (struc
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
- * to accomplish the reset. Each child gets a report of the
- * status ... if any child can't handle the reset, then the entire
- * slot is dlpar removed and added.
+ * to accomplish the reset. Query device driver for special reset
+ * requiements. Report eeh error to each child with cumulative
+ * result status... if any child can't handle the reset,
+ * then the entire slot is dlpar removed and added.
*/
+ pci_walk_bus(frozen_bus, eeh_query_reset_type, &result);
+ if ( result == PCI_ERS_RESULT_FUNDAMENTAL_RESET )
+ reset_type = FUNDAMENTAL_RESET;
+
+ result = PCI_ERS_RESULT_NONE;
pci_walk_bus(frozen_bus, eeh_report_error, &result);
/* Get the current PCI slot state. This can take a long time,
@@ -425,7 +463,8 @@ struct pci_dn * handle_eeh_events (struc
* go down willingly, without panicing the system.
*/
if (result == PCI_ERS_RESULT_NONE) {
- rc = eeh_reset_device(frozen_pdn, frozen_bus);
+ rc = eeh_reset_device(frozen_pdn, frozen_bus, reset_type);
+
if (rc) {
printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
goto hard_fail;
@@ -466,7 +505,7 @@ struct pci_dn * handle_eeh_events (struc
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
- rc = eeh_reset_device(frozen_pdn, NULL);
+ rc = eeh_reset_device(frozen_pdn, NULL, reset_type);
if (rc) {
printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
goto hard_fail;
--- a/include/linux/pci.h 2009-07-13 14:25:37.000000000 -0700
+++ b/include/linux/pci.h 2009-07-13 16:12:31.000000000 -0700
@@ -446,6 +446,9 @@ enum pci_ers_result {
/* Device driver is fully recovered and operational */
PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
+
+ /* Device driver requires fundamental reset to recover */
+ PCI_ERS_RESULT_FUNDAMENTAL_RESET = (__force pci_ers_result_t) 6,
};
/* PCI bus error event callbacks */
@@ -465,6 +468,9 @@ struct pci_error_handlers {
/* Device driver may resume normal operations */
void (*resume)(struct pci_dev *dev);
+
+ /* PCI slot requires special reset type for recovery */
+ pci_ers_result_t (*reset_type)(struct pci_dev *dev);
};
/* ---------------------------------------------------------------- */
--- a/arch/powerpc/include/asm/ppc-pci.h 2009-06-09 20:05:27.000000000 -0700
+++ b/arch/powerpc/include/asm/ppc-pci.h 2009-07-13 16:12:31.000000000 -0700
@@ -90,7 +90,9 @@ int rtas_pci_enable(struct pci_dn *pdn,
*
* Returns a non-zero value if the reset failed.
*/
-int rtas_set_slot_reset (struct pci_dn *);
+#define HOT_RESET 1
+#define FUNDAMENTAL_RESET 3
+int rtas_set_slot_reset (struct pci_dn *, int reset_type);
int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
/**
--- a/arch/powerpc/platforms/pseries/eeh.c 2009-06-09 20:05:27.000000000 -0700
+++ b/arch/powerpc/platforms/pseries/eeh.c 2009-07-13 16:27:27.000000000 -0700
@@ -666,7 +666,7 @@ rtas_pci_enable(struct pci_dn *pdn, int
/**
* rtas_pci_slot_reset - raises/lowers the pci #RST line
* @pdn pci device node
- * @state: 1/0 to raise/lower the #RST
+ * @state: 1/3/0 to raise hot-reset/fundamental-reset/lower the #RST
*
* Clear the EEH-frozen condition on a slot. This routine
* asserts the PCI #RST line if the 'state' argument is '1',
@@ -742,9 +742,9 @@ int pcibios_set_pcie_reset_state(struct
* Return 0 if success, else a non-zero value.
*/
-static void __rtas_set_slot_reset(struct pci_dn *pdn)
+static void __rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
{
- rtas_pci_slot_reset (pdn, 1);
+ rtas_pci_slot_reset (pdn, reset_type);
/* The PCI bus requires that the reset be held high for at least
* a 100 milliseconds. We wait a bit longer 'just in case'. */
@@ -766,13 +766,13 @@ static void __rtas_set_slot_reset(struct
msleep (PCI_BUS_SETTLE_TIME_MSEC);
}
-int rtas_set_slot_reset(struct pci_dn *pdn)
+int rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
{
int i, rc;
/* Take three shots at resetting the bus */
for (i=0; i<3; i++) {
- __rtas_set_slot_reset(pdn);
+ __rtas_set_slot_reset(pdn, reset_type);
rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
if (rc == 0)
--- a/arch/powerpc/platforms/pseries/eeh_driver.c 2009-07-13 14:25:24.000000000 -0700
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c 2009-07-13 16:39:16.000000000 -0700
@@ -115,6 +115,34 @@ static void eeh_enable_irq(struct pci_de
/* ------------------------------------------------------- */
/**
+ * eeh_query_reset_type - query each device driver for reset type
+ *
+ * Query each device driver for special reset type if required
+ * merge the device driver responses. Cumulative response
+ * passed back in "userdata".
+ */
+
+static int eeh_query_reset_type(struct pci_dev *dev, void *userdata)
+{
+ enum pci_ers_result rc, *res = userdata;
+ struct pci_driver *driver = dev->driver;
+
+ if (!driver)
+ return 0;
+
+ if (!driver->err_handler ||
+ !driver->err_handler->reset_type)
+ return 0;
+
+ rc = driver->err_handler->reset_type (dev);
+
+ /* A driver that needs a special reset trumps all others */
+ if (rc == PCI_ERS_RESULT_FUNDAMENTAL_RESET ) *res = rc;
+
+ return 0;
+}
+
+/**
* eeh_report_error - report pci error to each device driver
*
* Report an EEH error to each device driver, collect up and
@@ -282,9 +310,12 @@ static int eeh_report_failure(struct pci
* @pe_dn: pointer to a "Partionable Endpoint" device node.
* This is the top-level structure on which pci
* bus resets can be performed.
+ *
+ * reset_type: some devices may require type other than default hot reset.
*/
-static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
+static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus,
+ int reset_type)
{
struct device_node *dn;
int cnt, rc;
@@ -298,7 +329,7 @@ static int eeh_reset_device (struct pci_
/* Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
* up if the reset failed for some reason. */
- rc = rtas_set_slot_reset(pe_dn);
+ rc = rtas_set_slot_reset(pe_dn, reset_type);
if (rc)
return rc;
@@ -343,6 +374,7 @@ struct pci_dn * handle_eeh_events (struc
struct pci_dn *frozen_pdn;
struct pci_bus *frozen_bus;
int rc = 0;
+ int reset_type = HOT_RESET;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
const char *location, *pci_str, *drv_str;
@@ -400,10 +432,16 @@ struct pci_dn * handle_eeh_events (struc
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
- * to accomplish the reset. Each child gets a report of the
- * status ... if any child can't handle the reset, then the entire
- * slot is dlpar removed and added.
+ * to accomplish the reset. Query device driver for special reset
+ * requiements. Report eeh error to each child with cumulative
+ * result status... if any child can't handle the reset,
+ * then the entire slot is dlpar removed and added.
*/
+ pci_walk_bus(frozen_bus, eeh_query_reset_type, &result);
+ if ( result == PCI_ERS_RESULT_FUNDAMENTAL_RESET )
+ reset_type = FUNDAMENTAL_RESET;
+
+ result = PCI_ERS_RESULT_NONE;
pci_walk_bus(frozen_bus, eeh_report_error, &result);
/* Get the current PCI slot state. This can take a long time,
@@ -425,7 +463,8 @@ struct pci_dn * handle_eeh_events (struc
* go down willingly, without panicing the system.
*/
if (result == PCI_ERS_RESULT_NONE) {
- rc = eeh_reset_device(frozen_pdn, frozen_bus);
+ rc = eeh_reset_device(frozen_pdn, frozen_bus, reset_type);
+
if (rc) {
printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
goto hard_fail;
@@ -466,7 +505,7 @@ struct pci_dn * handle_eeh_events (struc
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
- rc = eeh_reset_device(frozen_pdn, NULL);
+ rc = eeh_reset_device(frozen_pdn, NULL, reset_type);
if (rc) {
printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
goto hard_fail;
--- a/include/linux/pci.h 2009-07-13 14:25:37.000000000 -0700
+++ b/include/linux/pci.h 2009-07-13 16:12:31.000000000 -0700
@@ -446,6 +446,9 @@ enum pci_ers_result {
/* Device driver is fully recovered and operational */
PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
+
+ /* Device driver requires fundamental reset to recover */
+ PCI_ERS_RESULT_FUNDAMENTAL_RESET = (__force pci_ers_result_t) 6,
};
/* PCI bus error event callbacks */
@@ -465,6 +468,9 @@ struct pci_error_handlers {
/* Device driver may resume normal operations */
void (*resume)(struct pci_dev *dev);
+
+ /* PCI slot requires special reset type for recovery */
+ pci_ers_result_t (*reset_type)(struct pci_dev *dev);
};
/* ---------------------------------------------------------------- */
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: Support for PCI Express reset type in EEH
2009-07-14 18:35 Support for PCI Express reset type in EEH Mike Mason
@ 2009-07-15 18:32 ` Mike Mason
2009-07-15 18:45 ` [PATCH] " Mike Mason
1 sibling, 0 replies; 8+ messages in thread
From: Mike Mason @ 2009-07-15 18:32 UTC (permalink / raw)
To: linuxppc-dev, linux-pci, Paul Mackerras, benh, linasvepstas; +Cc: Richard Lary
This patch was simultaneously submitted to Red Hat for review. As a result of that review, I'm withdrawing this patch and will submit a new version shortly.
Mike
Mike Mason wrote:
> By default, EEH does what's known as a "hot reset" during error recovery
> of a PCI Express device. We've found a case where the device needs a
> "fundamental reset" to recover properly. The current PCI error recovery
> and EEH frameworks do not support this distinction.
>
> The attached patch (courtesy of Richard Lary) implements a reset type
> callback that can be used to determine what type of reset a device
> requires. It is backwards compatible with all other drivers that
> implement PCI error recovery callbacks. Only drivers that require a
> fundamental reset need to be changed. So far we're only aware of one
> driver that has the requirement (qla2xxx). The patch touches mostly EEH
> and pseries code, but does require a couple of minor additions to the
> overall PCI error recovery framework.
>
> Signed-off-by: Mike Mason <mmlnx@us.ibm.com>
>
> --- a/arch/powerpc/include/asm/ppc-pci.h 2009-06-09
> 20:05:27.000000000 -0700
> +++ b/arch/powerpc/include/asm/ppc-pci.h 2009-07-13
> 16:12:31.000000000 -0700
> @@ -90,7 +90,9 @@ int rtas_pci_enable(struct pci_dn *pdn,
> *
> * Returns a non-zero value if the reset failed.
> */
> -int rtas_set_slot_reset (struct pci_dn *);
> +#define HOT_RESET 1
> +#define FUNDAMENTAL_RESET 3
> +int rtas_set_slot_reset (struct pci_dn *, int reset_type);
> int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
>
> /** --- a/arch/powerpc/platforms/pseries/eeh.c 2009-06-09
> 20:05:27.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh.c 2009-07-13
> 16:27:27.000000000 -0700
> @@ -666,7 +666,7 @@ rtas_pci_enable(struct pci_dn *pdn, int
> /**
> * rtas_pci_slot_reset - raises/lowers the pci #RST line
> * @pdn pci device node
> - * @state: 1/0 to raise/lower the #RST
> + * @state: 1/3/0 to raise hot-reset/fundamental-reset/lower the #RST
> *
> * Clear the EEH-frozen condition on a slot. This routine
> * asserts the PCI #RST line if the 'state' argument is '1',
> @@ -742,9 +742,9 @@ int pcibios_set_pcie_reset_state(struct
> * Return 0 if success, else a non-zero value.
> */
>
> -static void __rtas_set_slot_reset(struct pci_dn *pdn)
> +static void __rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
> {
> - rtas_pci_slot_reset (pdn, 1);
> + rtas_pci_slot_reset (pdn, reset_type);
>
> /* The PCI bus requires that the reset be held high for at least
> * a 100 milliseconds. We wait a bit longer 'just in case'. */
> @@ -766,13 +766,13 @@ static void __rtas_set_slot_reset(struct
> msleep (PCI_BUS_SETTLE_TIME_MSEC);
> }
>
> -int rtas_set_slot_reset(struct pci_dn *pdn)
> +int rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
> {
> int i, rc;
>
> /* Take three shots at resetting the bus */
> for (i=0; i<3; i++) {
> - __rtas_set_slot_reset(pdn);
> + __rtas_set_slot_reset(pdn, reset_type);
>
> rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
> if (rc == 0)
> --- a/arch/powerpc/platforms/pseries/eeh_driver.c 2009-07-13
> 14:25:24.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh_driver.c 2009-07-13
> 16:39:16.000000000 -0700
> @@ -115,6 +115,34 @@ static void eeh_enable_irq(struct pci_de
>
> /* ------------------------------------------------------- */
> /**
> + * eeh_query_reset_type - query each device driver for reset type
> + *
> + * Query each device driver for special reset type if required
> + * merge the device driver responses. Cumulative response
> + * passed back in "userdata".
> + */
> +
> +static int eeh_query_reset_type(struct pci_dev *dev, void *userdata)
> +{
> + enum pci_ers_result rc, *res = userdata;
> + struct pci_driver *driver = dev->driver;
> +
> + if (!driver)
> + return 0;
> +
> + if (!driver->err_handler ||
> + !driver->err_handler->reset_type)
> + return 0;
> +
> + rc = driver->err_handler->reset_type (dev);
> +
> + /* A driver that needs a special reset trumps all others */
> + if (rc == PCI_ERS_RESULT_FUNDAMENTAL_RESET ) *res = rc;
> +
> + return 0;
> +}
> +
> +/**
> * eeh_report_error - report pci error to each device driver
> * * Report an EEH error to each device driver, collect up and @@ -282,9
> +310,12 @@ static int eeh_report_failure(struct pci
> * @pe_dn: pointer to a "Partionable Endpoint" device node.
> * This is the top-level structure on which pci
> * bus resets can be performed.
> + *
> + * reset_type: some devices may require type other than default hot reset.
> */
>
> -static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
> +static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus,
> + int reset_type)
> {
> struct device_node *dn;
> int cnt, rc;
> @@ -298,7 +329,7 @@ static int eeh_reset_device (struct pci_
> /* Reset the pci controller. (Asserts RST#; resets config space).
> * Reconfigure bridges and devices. Don't try to bring the system
> * up if the reset failed for some reason. */
> - rc = rtas_set_slot_reset(pe_dn);
> + rc = rtas_set_slot_reset(pe_dn, reset_type);
> if (rc)
> return rc;
>
> @@ -343,6 +374,7 @@ struct pci_dn * handle_eeh_events (struc
> struct pci_dn *frozen_pdn;
> struct pci_bus *frozen_bus;
> int rc = 0;
> + int reset_type = HOT_RESET;
> enum pci_ers_result result = PCI_ERS_RESULT_NONE;
> const char *location, *pci_str, *drv_str;
>
> @@ -400,10 +432,16 @@ struct pci_dn * handle_eeh_events (struc
>
> /* Walk the various device drivers attached to this slot through
> * a reset sequence, giving each an opportunity to do what it needs
> - * to accomplish the reset. Each child gets a report of the
> - * status ... if any child can't handle the reset, then the entire
> - * slot is dlpar removed and added.
> + * to accomplish the reset. Query device driver for special reset
> + * requiements. Report eeh error to each child with cumulative
> + * result status... if any child can't handle the reset,
> + * then the entire slot is dlpar removed and added.
> */
> + pci_walk_bus(frozen_bus, eeh_query_reset_type, &result);
> + if ( result == PCI_ERS_RESULT_FUNDAMENTAL_RESET )
> + reset_type = FUNDAMENTAL_RESET;
> +
> + result = PCI_ERS_RESULT_NONE;
> pci_walk_bus(frozen_bus, eeh_report_error, &result);
>
> /* Get the current PCI slot state. This can take a long time,
> @@ -425,7 +463,8 @@ struct pci_dn * handle_eeh_events (struc
> * go down willingly, without panicing the system.
> */
> if (result == PCI_ERS_RESULT_NONE) {
> - rc = eeh_reset_device(frozen_pdn, frozen_bus);
> + rc = eeh_reset_device(frozen_pdn, frozen_bus, reset_type);
> +
> if (rc) {
> printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
> goto hard_fail;
> @@ -466,7 +505,7 @@ struct pci_dn * handle_eeh_events (struc
>
> /* If any device called out for a reset, then reset the slot */
> if (result == PCI_ERS_RESULT_NEED_RESET) {
> - rc = eeh_reset_device(frozen_pdn, NULL);
> + rc = eeh_reset_device(frozen_pdn, NULL, reset_type);
> if (rc) {
> printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
> goto hard_fail;
> --- a/include/linux/pci.h 2009-07-13 14:25:37.000000000 -0700
> +++ b/include/linux/pci.h 2009-07-13 16:12:31.000000000 -0700
> @@ -446,6 +446,9 @@ enum pci_ers_result {
>
> /* Device driver is fully recovered and operational */
> PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
> +
> + /* Device driver requires fundamental reset to recover */
> + PCI_ERS_RESULT_FUNDAMENTAL_RESET = (__force pci_ers_result_t) 6,
> };
>
> /* PCI bus error event callbacks */
> @@ -465,6 +468,9 @@ struct pci_error_handlers {
>
> /* Device driver may resume normal operations */
> void (*resume)(struct pci_dev *dev);
> +
> + /* PCI slot requires special reset type for recovery */
> + pci_ers_result_t (*reset_type)(struct pci_dev *dev);
> };
>
> /* ---------------------------------------------------------------- */
> --- a/arch/powerpc/include/asm/ppc-pci.h 2009-06-09
> 20:05:27.000000000 -0700
> +++ b/arch/powerpc/include/asm/ppc-pci.h 2009-07-13
> 16:12:31.000000000 -0700
> @@ -90,7 +90,9 @@ int rtas_pci_enable(struct pci_dn *pdn,
> *
> * Returns a non-zero value if the reset failed.
> */
> -int rtas_set_slot_reset (struct pci_dn *);
> +#define HOT_RESET 1
> +#define FUNDAMENTAL_RESET 3
> +int rtas_set_slot_reset (struct pci_dn *, int reset_type);
> int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
>
> /** --- a/arch/powerpc/platforms/pseries/eeh.c 2009-06-09
> 20:05:27.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh.c 2009-07-13
> 16:27:27.000000000 -0700
> @@ -666,7 +666,7 @@ rtas_pci_enable(struct pci_dn *pdn, int
> /**
> * rtas_pci_slot_reset - raises/lowers the pci #RST line
> * @pdn pci device node
> - * @state: 1/0 to raise/lower the #RST
> + * @state: 1/3/0 to raise hot-reset/fundamental-reset/lower the #RST
> *
> * Clear the EEH-frozen condition on a slot. This routine
> * asserts the PCI #RST line if the 'state' argument is '1',
> @@ -742,9 +742,9 @@ int pcibios_set_pcie_reset_state(struct
> * Return 0 if success, else a non-zero value.
> */
>
> -static void __rtas_set_slot_reset(struct pci_dn *pdn)
> +static void __rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
> {
> - rtas_pci_slot_reset (pdn, 1);
> + rtas_pci_slot_reset (pdn, reset_type);
>
> /* The PCI bus requires that the reset be held high for at least
> * a 100 milliseconds. We wait a bit longer 'just in case'. */
> @@ -766,13 +766,13 @@ static void __rtas_set_slot_reset(struct
> msleep (PCI_BUS_SETTLE_TIME_MSEC);
> }
>
> -int rtas_set_slot_reset(struct pci_dn *pdn)
> +int rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
> {
> int i, rc;
>
> /* Take three shots at resetting the bus */
> for (i=0; i<3; i++) {
> - __rtas_set_slot_reset(pdn);
> + __rtas_set_slot_reset(pdn, reset_type);
>
> rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
> if (rc == 0)
> --- a/arch/powerpc/platforms/pseries/eeh_driver.c 2009-07-13
> 14:25:24.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh_driver.c 2009-07-13
> 16:39:16.000000000 -0700
> @@ -115,6 +115,34 @@ static void eeh_enable_irq(struct pci_de
>
> /* ------------------------------------------------------- */
> /**
> + * eeh_query_reset_type - query each device driver for reset type
> + *
> + * Query each device driver for special reset type if required
> + * merge the device driver responses. Cumulative response
> + * passed back in "userdata".
> + */
> +
> +static int eeh_query_reset_type(struct pci_dev *dev, void *userdata)
> +{
> + enum pci_ers_result rc, *res = userdata;
> + struct pci_driver *driver = dev->driver;
> +
> + if (!driver)
> + return 0;
> +
> + if (!driver->err_handler ||
> + !driver->err_handler->reset_type)
> + return 0;
> +
> + rc = driver->err_handler->reset_type (dev);
> +
> + /* A driver that needs a special reset trumps all others */
> + if (rc == PCI_ERS_RESULT_FUNDAMENTAL_RESET ) *res = rc;
> +
> + return 0;
> +}
> +
> +/**
> * eeh_report_error - report pci error to each device driver
> * * Report an EEH error to each device driver, collect up and @@ -282,9
> +310,12 @@ static int eeh_report_failure(struct pci
> * @pe_dn: pointer to a "Partionable Endpoint" device node.
> * This is the top-level structure on which pci
> * bus resets can be performed.
> + *
> + * reset_type: some devices may require type other than default hot reset.
> */
>
> -static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
> +static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus,
> + int reset_type)
> {
> struct device_node *dn;
> int cnt, rc;
> @@ -298,7 +329,7 @@ static int eeh_reset_device (struct pci_
> /* Reset the pci controller. (Asserts RST#; resets config space).
> * Reconfigure bridges and devices. Don't try to bring the system
> * up if the reset failed for some reason. */
> - rc = rtas_set_slot_reset(pe_dn);
> + rc = rtas_set_slot_reset(pe_dn, reset_type);
> if (rc)
> return rc;
>
> @@ -343,6 +374,7 @@ struct pci_dn * handle_eeh_events (struc
> struct pci_dn *frozen_pdn;
> struct pci_bus *frozen_bus;
> int rc = 0;
> + int reset_type = HOT_RESET;
> enum pci_ers_result result = PCI_ERS_RESULT_NONE;
> const char *location, *pci_str, *drv_str;
>
> @@ -400,10 +432,16 @@ struct pci_dn * handle_eeh_events (struc
>
> /* Walk the various device drivers attached to this slot through
> * a reset sequence, giving each an opportunity to do what it needs
> - * to accomplish the reset. Each child gets a report of the
> - * status ... if any child can't handle the reset, then the entire
> - * slot is dlpar removed and added.
> + * to accomplish the reset. Query device driver for special reset
> + * requiements. Report eeh error to each child with cumulative
> + * result status... if any child can't handle the reset,
> + * then the entire slot is dlpar removed and added.
> */
> + pci_walk_bus(frozen_bus, eeh_query_reset_type, &result);
> + if ( result == PCI_ERS_RESULT_FUNDAMENTAL_RESET )
> + reset_type = FUNDAMENTAL_RESET;
> +
> + result = PCI_ERS_RESULT_NONE;
> pci_walk_bus(frozen_bus, eeh_report_error, &result);
>
> /* Get the current PCI slot state. This can take a long time,
> @@ -425,7 +463,8 @@ struct pci_dn * handle_eeh_events (struc
> * go down willingly, without panicing the system.
> */
> if (result == PCI_ERS_RESULT_NONE) {
> - rc = eeh_reset_device(frozen_pdn, frozen_bus);
> + rc = eeh_reset_device(frozen_pdn, frozen_bus, reset_type);
> +
> if (rc) {
> printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
> goto hard_fail;
> @@ -466,7 +505,7 @@ struct pci_dn * handle_eeh_events (struc
>
> /* If any device called out for a reset, then reset the slot */
> if (result == PCI_ERS_RESULT_NEED_RESET) {
> - rc = eeh_reset_device(frozen_pdn, NULL);
> + rc = eeh_reset_device(frozen_pdn, NULL, reset_type);
> if (rc) {
> printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
> goto hard_fail;
> --- a/include/linux/pci.h 2009-07-13 14:25:37.000000000 -0700
> +++ b/include/linux/pci.h 2009-07-13 16:12:31.000000000 -0700
> @@ -446,6 +446,9 @@ enum pci_ers_result {
>
> /* Device driver is fully recovered and operational */
> PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
> +
> + /* Device driver requires fundamental reset to recover */
> + PCI_ERS_RESULT_FUNDAMENTAL_RESET = (__force pci_ers_result_t) 6,
> };
>
> /* PCI bus error event callbacks */
> @@ -465,6 +468,9 @@ struct pci_error_handlers {
>
> /* Device driver may resume normal operations */
> void (*resume)(struct pci_dev *dev);
> +
> + /* PCI slot requires special reset type for recovery */
> + pci_ers_result_t (*reset_type)(struct pci_dev *dev);
> };
>
> /* ---------------------------------------------------------------- */
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] Support for PCI Express reset type in EEH
2009-07-14 18:35 Support for PCI Express reset type in EEH Mike Mason
2009-07-15 18:32 ` Mike Mason
@ 2009-07-15 18:45 ` Mike Mason
2009-07-23 14:44 ` Linas Vepstas
1 sibling, 1 reply; 8+ messages in thread
From: Mike Mason @ 2009-07-15 18:45 UTC (permalink / raw)
To: linuxppc-dev, linux-pci, Paul Mackerras, benh, linasvepstas; +Cc: Richard Lary
By default, EEH does what's known as a "hot reset" during error recovery of a PCI Express device. We've found a case where the device needs a "fundamental reset" to recover properly. The current PCI error recovery and EEH frameworks do not support this distinction.
The attached patch (courtesy of Richard Lary) adds a bit field to pci_dev that indicates whether the device requires a fundamental reset during error recovery. This bit can be checked by EEH to determine which reset type is required.
This patch supersedes the previously submitted patch that implemented a reset type callback.
Please review and let me know of any concerns.
Signed-off-by: Mike Mason <mmlnx@us.ibm.com>
diff -uNrp a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
--- a/arch/powerpc/kernel/pci_64.c 2009-07-13 14:25:24.000000000 -0700
+++ b/arch/powerpc/kernel/pci_64.c 2009-07-15 10:26:26.000000000 -0700
@@ -143,6 +143,7 @@ struct pci_dev *of_create_pci_dev(struct
dev->dev.bus = &pci_bus_type;
dev->devfn = devfn;
dev->multifunction = 0; /* maybe a lie? */
+ dev->fndmntl_rst_rqd = 0; /* pcie fundamental reset required */
dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
dev->device = get_int_prop(node, "device-id", 0xffff);
diff -uNrp a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
--- a/arch/powerpc/platforms/pseries/eeh.c 2009-06-09 20:05:27.000000000 -0700
+++ b/arch/powerpc/platforms/pseries/eeh.c 2009-07-15 10:29:04.000000000 -0700
@@ -744,7 +744,15 @@ int pcibios_set_pcie_reset_state(struct
static void __rtas_set_slot_reset(struct pci_dn *pdn)
{
- rtas_pci_slot_reset (pdn, 1);
+ struct pci_dev *dev = pdn->pcidev;
+
+ /* Determine type of EEH reset required by device,
+ * default hot reset or fundamental reset
+ */
+ if (dev->fndmntl_rst_rqd)
+ rtas_pci_slot_reset(pdn, 3);
+ else
+ rtas_pci_slot_reset(pdn, 1);
/* The PCI bus requires that the reset be held high for at least
* a 100 milliseconds. We wait a bit longer 'just in case'. */
diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
--- a/include/linux/pci.h 2009-07-13 14:25:37.000000000 -0700
+++ b/include/linux/pci.h 2009-07-15 10:25:37.000000000 -0700
@@ -273,6 +273,7 @@ struct pci_dev {
unsigned int ari_enabled:1; /* ARI forwarding */
unsigned int is_managed:1;
unsigned int is_pcie:1;
+ unsigned int fndmntl_rst_rqd:1; /* Dev requires fundamental reset */
unsigned int state_saved:1;
unsigned int is_physfn:1;
unsigned int is_virtfn:1;
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Support for PCI Express reset type in EEH
2009-07-15 18:45 ` [PATCH] " Mike Mason
@ 2009-07-23 14:44 ` Linas Vepstas
2009-07-23 15:03 ` Richard Lary
2009-07-24 21:36 ` Richard Lary
0 siblings, 2 replies; 8+ messages in thread
From: Linas Vepstas @ 2009-07-23 14:44 UTC (permalink / raw)
To: Mike Mason; +Cc: linuxppc-dev, Paul Mackerras, Richard Lary, linux-pci
2009/7/15 Mike Mason <mmlnx@us.ibm.com>:
> By default, EEH does what's known as a "hot reset" during error recovery =
of
> a PCI Express device. =C2=A0We've found a case where the device needs a
> "fundamental reset" to recover properly. =C2=A0The current PCI error reco=
very and
> EEH frameworks do not support this distinction.
>
> The attached patch (courtesy of Richard Lary) adds a bit field to pci_dev
> that indicates whether the device requires a fundamental reset during err=
or
> recovery. =C2=A0This bit can be checked by EEH to determine which reset t=
ype is
> required.
>
> This patch supersedes the previously submitted patch that implemented a
> reset type callback.
>
> Please review and let me know of any concerns.
I like this patch a *lot* better .. it is vastly simpler, more direct.
> diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
> --- a/include/linux/pci.h =C2=A0 =C2=A0 =C2=A0 2009-07-13 14:25:37.000000=
000 -0700
> +++ b/include/linux/pci.h =C2=A0 =C2=A0 =C2=A0 2009-07-15 10:25:37.000000=
000 -0700
> @@ -273,6 +273,7 @@ struct pci_dev {
> =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0ari_enabled:1; =C2=
=A0/* ARI forwarding */
> =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_managed:1;
> =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_pcie:1;
> + =C2=A0 =C2=A0 =C2=A0 unsigned int =C2=A0 =C2=A0fndmntl_rst_rqd:1; /* De=
v requires fundamental reset
> */
> =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0state_saved:1;
> =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_physfn:1;
> =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_virtfn:1;
As Ben points out, the name is awkward. How about needs_freset ?
Since this affects the entire pci subsystem, it should be documented
properly. The "pci error recovery" subsystem was designed to be
usable in other architectures, and so the error recovery docs should
take at least a paragraph to describe what this flag means, and when
its supposed to be used.
Providing the docs patch together with the pci.h patch *only* would
probably simplify acceptance by the PCI community.
--linas
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Support for PCI Express reset type in EEH
2009-07-23 14:44 ` Linas Vepstas
@ 2009-07-23 15:03 ` Richard Lary
2009-07-24 21:36 ` Richard Lary
1 sibling, 0 replies; 8+ messages in thread
From: Richard Lary @ 2009-07-23 15:03 UTC (permalink / raw)
To: linasvepstas; +Cc: linuxppc-dev, mmlnx, Paul Mackerras, linux-pci
[-- Attachment #1: Type: text/plain, Size: 2169 bytes --]
Linas Vepstas <linasvepstas@gmail.com> wrote on 07/23/2009 07:44:33 AM:
> 2009/7/15 Mike Mason <mmlnx@us.ibm.com>:
> > By default, EEH does what's known as a "hot reset" during error
recovery of
> > a PCI Express device. We've found a case where the device needs a
> > "fundamental reset" to recover properly. The current PCI error
recovery and
> > EEH frameworks do not support this distinction.
> >
> > The attached patch (courtesy of Richard Lary) adds a bit field to
pci_dev
> > that indicates whether the device requires a fundamental reset during
error
> > recovery. This bit can be checked by EEH to determine which reset type
is
> > required.
> >
> > This patch supersedes the previously submitted patch that implemented a
> > reset type callback.
> >
> > Please review and let me know of any concerns.
>
> I like this patch a *lot* better .. it is vastly simpler, more direct.
>
>
> > diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
> > --- a/include/linux/pci.h 2009-07-13 14:25:37.000000000 -0700
> > +++ b/include/linux/pci.h 2009-07-15 10:25:37.000000000 -0700
> > @@ -273,6 +273,7 @@ struct pci_dev {
> > unsigned int ari_enabled:1; /* ARI forwarding */
> > unsigned int is_managed:1;
> > unsigned int is_pcie:1;
> > + unsigned int fndmntl_rst_rqd:1; /* Dev requires fundamental
reset
> > */
> > unsigned int state_saved:1;
> > unsigned int is_physfn:1;
> > unsigned int is_virtfn:1;
>
> As Ben points out, the name is awkward. How about needs_freset ?
I have no problem changing the name.
> Since this affects the entire pci subsystem, it should be documented
> properly. The "pci error recovery" subsystem was designed to be
> usable in other architectures, and so the error recovery docs should
> take at least a paragraph to describe what this flag means, and when
> its supposed to be used.
I will take a stab at updating the docs and post here for comment.
> Providing the docs patch together with the pci.h patch *only* would
> probably simplify acceptance by the PCI community.
>
> --linas
[-- Attachment #2: Type: text/html, Size: 2710 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Support for PCI Express reset type in EEH
2009-07-23 14:44 ` Linas Vepstas
2009-07-23 15:03 ` Richard Lary
@ 2009-07-24 21:36 ` Richard Lary
2009-07-25 0:30 ` Linas Vepstas
1 sibling, 1 reply; 8+ messages in thread
From: Richard Lary @ 2009-07-24 21:36 UTC (permalink / raw)
To: linasvepstas; +Cc: linuxppc-dev, mmlnx, Paul Mackerras, linux-pci
[-- Attachment #1: Type: text/plain, Size: 2225 bytes --]
Linas Vepstas <linasvepstas@gmail.com> wrote on 07/23/2009 07:44:33 AM:
> 2009/7/15 Mike Mason <mmlnx@us.ibm.com>:
> > By default, EEH does what's known as a "hot reset" during error
recovery of
> > a PCI Express device. We've found a case where the device needs a
> > "fundamental reset" to recover properly. The current PCI error
recovery and
> > EEH frameworks do not support this distinction.
> >
> > The attached patch (courtesy of Richard Lary) adds a bit field to
pci_dev
> > that indicates whether the device requires a fundamental reset during
error
> > recovery. This bit can be checked by EEH to determine which reset type
is
> > required.
> >
> > This patch supersedes the previously submitted patch that implemented a
> > reset type callback.
> >
> > Please review and let me know of any concerns.
>
> I like this patch a *lot* better .. it is vastly simpler, more direct.
>
>
> > diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
> > --- a/include/linux/pci.h 2009-07-13 14:25:37.000000000 -0700
> > +++ b/include/linux/pci.h 2009-07-15 10:25:37.000000000 -0700
> > @@ -273,6 +273,7 @@ struct pci_dev {
> > unsigned int ari_enabled:1; /* ARI forwarding */
> > unsigned int is_managed:1;
> > unsigned int is_pcie:1;
> > + unsigned int fndmntl_rst_rqd:1; /* Dev requires fundamental
reset
> > */
> > unsigned int state_saved:1;
> > unsigned int is_physfn:1;
> > unsigned int is_virtfn:1;
>
> As Ben points out, the name is awkward. How about needs_freset ?
I am OK with name change.
> Since this affects the entire pci subsystem, it should be documented
> properly. The "pci error recovery" subsystem was designed to be
> usable in other architectures, and so the error recovery docs should
> take at least a paragraph to describe what this flag means, and when
> its supposed to be used.
I will update the documentation, are you referring to
Documentation/powerpc/eeh-pci-error-recovery.txt
or some other documentation?
> Providing the docs patch together with the pci.h patch *only* would
> probably simplify acceptance by the PCI community.
>
> --linas
[-- Attachment #2: Type: text/html, Size: 2792 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Support for PCI Express reset type in EEH
2009-07-24 21:36 ` Richard Lary
@ 2009-07-25 0:30 ` Linas Vepstas
2009-07-27 14:29 ` Richard Lary
0 siblings, 1 reply; 8+ messages in thread
From: Linas Vepstas @ 2009-07-25 0:30 UTC (permalink / raw)
To: Richard Lary; +Cc: linuxppc-dev, mmlnx, Paul Mackerras, linux-pci
2009/7/24 Richard Lary <rlary@us.ibm.com>:
> Linas Vepstas <linasvepstas@gmail.com> wrote on 07/23/2009 07:44:33 AM:
>
>> 2009/7/15 Mike Mason <mmlnx@us.ibm.com>:
>> > By default, EEH does what's known as a "hot reset" during error recove=
ry
>> > of
>> > a PCI Express device. =C2=A0We've found a case where the device needs =
a
>> > "fundamental reset" to recover properly. =C2=A0The current PCI error r=
ecovery
>> > and
>> > EEH frameworks do not support this distinction.
>> >
>> > The attached patch (courtesy of Richard Lary) adds a bit field to
>> > pci_dev
>> > that indicates whether the device requires a fundamental reset during
>> > error
>> > recovery. =C2=A0This bit can be checked by EEH to determine which rese=
t type
>> > is
>> > required.
>> >
>> > This patch supersedes the previously submitted patch that implemented =
a
>> > reset type callback.
>> >
>> > Please review and let me know of any concerns.
>>
>> I like this patch a *lot* better .. it is vastly simpler, more direct.
>>
>>
>> > diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
>> > --- a/include/linux/pci.h =C2=A0 =C2=A0 =C2=A0 2009-07-13 14:25:37.000=
000000 -0700
>> > +++ b/include/linux/pci.h =C2=A0 =C2=A0 =C2=A0 2009-07-15 10:25:37.000=
000000 -0700
>> > @@ -273,6 +273,7 @@ struct pci_dev {
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0ari_enabled:1; =
=C2=A0/* ARI forwarding */
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_managed:1;
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_pcie:1;
>> > + =C2=A0 =C2=A0 =C2=A0 unsigned int =C2=A0 =C2=A0fndmntl_rst_rqd:1; /*=
Dev requires fundamental
>> > reset
>> > */
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0state_saved:1;
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_physfn:1;
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_virtfn:1;
>>
>> As Ben points out, the name is awkward. =C2=A0How about needs_freset ?
>
> I am OK with name change.
>
>
>> Since this affects the entire pci subsystem, it should be documented
>> properly. =C2=A0The "pci error recovery" subsystem was designed to be
>> usable in other architectures, and so the error recovery docs should
>> take at least a paragraph to describe what this flag means, and when
>> its supposed to be used.
>
> I will update the documentation, are you referring to
> Documentation/powerpc/eeh-pci-error-recovery.txt
> or some other documentation?
No, I'm thinking
Documentation/PCI/pci-error-recovery.txt
because the flag is not powerpc-specific.
--linas
>
>> Providing the docs patch together with the pci.h patch *only* would
>> probably simplify acceptance by the PCI community.
>>
>> --linas
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Support for PCI Express reset type in EEH
2009-07-25 0:30 ` Linas Vepstas
@ 2009-07-27 14:29 ` Richard Lary
0 siblings, 0 replies; 8+ messages in thread
From: Richard Lary @ 2009-07-27 14:29 UTC (permalink / raw)
To: linasvepstas; +Cc: linuxppc-dev, mmlnx, Paul Mackerras, linux-pci
[-- Attachment #1: Type: text/plain, Size: 2720 bytes --]
Linas Vepstas <linasvepstas@gmail.com> wrote on 07/24/2009 05:30:09 PM:
> 2009/7/24 Richard Lary <rlary@us.ibm.com>:
> > Linas Vepstas <linasvepstas@gmail.com> wrote on 07/23/2009 07:44:33 AM:
> >
> >> 2009/7/15 Mike Mason <mmlnx@us.ibm.com>:
> >> > By default, EEH does what's known as a "hot reset" during error
recovery
> >> > of
> >> > a PCI Express device. We've found a case where the device needs a
> >> > "fundamental reset" to recover properly. The current PCI error
recovery
> >> > and
> >> > EEH frameworks do not support this distinction.
> >> >
> >> > The attached patch (courtesy of Richard Lary) adds a bit field to
> >> > pci_dev
> >> > that indicates whether the device requires a fundamental reset
during
> >> > error
> >> > recovery. This bit can be checked by EEH to determine which reset
type
> >> > is
> >> > required.
> >> >
> >> > This patch supersedes the previously submitted patch that
implemented a
> >> > reset type callback.
> >> >
> >> > Please review and let me know of any concerns.
> >>
> >> I like this patch a *lot* better .. it is vastly simpler, more direct.
> >>
> >>
> >> > diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
> >> > --- a/include/linux/pci.h 2009-07-13 14:25:37.000000000 -0700
> >> > +++ b/include/linux/pci.h 2009-07-15 10:25:37.000000000 -0700
> >> > @@ -273,6 +273,7 @@ struct pci_dev {
> >> > unsigned int ari_enabled:1; /* ARI forwarding */
> >> > unsigned int is_managed:1;
> >> > unsigned int is_pcie:1;
> >> > + unsigned int fndmntl_rst_rqd:1; /* Dev requires
fundamental
> >> > reset
> >> > */
> >> > unsigned int state_saved:1;
> >> > unsigned int is_physfn:1;
> >> > unsigned int is_virtfn:1;
> >>
> >> As Ben points out, the name is awkward. How about needs_freset ?
> >
> > I am OK with name change.
> >
> >
> >> Since this affects the entire pci subsystem, it should be documented
> >> properly. The "pci error recovery" subsystem was designed to be
> >> usable in other architectures, and so the error recovery docs should
> >> take at least a paragraph to describe what this flag means, and when
> >> its supposed to be used.
> >
> > I will update the documentation, are you referring to
> > Documentation/powerpc/eeh-pci-error-recovery.txt
> > or some other documentation?
>
> No, I'm thinking
> Documentation/PCI/pci-error-recovery.txt
>
> because the flag is not powerpc-specific.
Got it, glad I asked...
-rich
> >
> >> Providing the docs patch together with the pci.h patch *only* would
> >> probably simplify acceptance by the PCI community.
> >>
> >> --linas
> >
[-- Attachment #2: Type: text/html, Size: 3743 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2009-07-27 14:29 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-14 18:35 Support for PCI Express reset type in EEH Mike Mason
2009-07-15 18:32 ` Mike Mason
2009-07-15 18:45 ` [PATCH] " Mike Mason
2009-07-23 14:44 ` Linas Vepstas
2009-07-23 15:03 ` Richard Lary
2009-07-24 21:36 ` Richard Lary
2009-07-25 0:30 ` Linas Vepstas
2009-07-27 14:29 ` Richard Lary
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).