* [PATCH 1/2] powerpc/eeh: Introduce flag EEH_PE_REMOVED
@ 2014-12-11 3:28 Gavin Shan
2014-12-11 3:28 ` [PATCH 2/2] powerpc/eeh: Allow to set maximal frozen times Gavin Shan
0 siblings, 1 reply; 2+ messages in thread
From: Gavin Shan @ 2014-12-11 3:28 UTC (permalink / raw)
To: linuxppc-dev; +Cc: grimm, Gavin Shan
The conditions that one specific PE's frozen count exceeds the maximal
allowed times (EEH_MAX_ALLOWED_FREEZES) and it's in isolated or recovery
state indicate the PE was removed permanently implicitly. The patch
introduces flag EEH_PE_REMOVED to indicate that explicitly so that we
don't depend on the fixed maximal allowed times, which can be varied as
we do in subsequent patch.
Flag EEH_PE_REMOVED is expected to be marked for the PE whose frozen
count exceeds the maximal allowed times, or just failed from recovery.
Requested-by: Ryan Grimm <grimm@linux.vnet.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 1 +
arch/powerpc/kernel/eeh_driver.c | 2 +-
arch/powerpc/kernel/eeh_pe.c | 6 ++----
3 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9c11d1e..3e4dd34 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -78,6 +78,7 @@ struct device_node;
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
+#define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
struct eeh_pe {
int type; /* PE type: PHB/Bus/Device */
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index b17e793..ac00672 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -806,7 +806,7 @@ perm_error:
eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
/* Mark the PE to be removed permanently */
- pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
/*
* Shut down the device drivers for good. We mark
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index fa950fb..1e4946c 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -526,8 +526,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
struct pci_dev *pdev;
/* Keep the state of permanently removed PE intact */
- if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
- (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+ if (pe->state & EEH_PE_REMOVED)
return NULL;
pe->state |= state;
@@ -600,8 +599,7 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
struct pci_dev *pdev;
/* Keep the state of permanently removed PE intact */
- if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
- (state & EEH_PE_ISOLATED))
+ if (pe->state & EEH_PE_REMOVED)
return NULL;
pe->state &= ~state;
--
1.8.3.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 2/2] powerpc/eeh: Allow to set maximal frozen times
2014-12-11 3:28 [PATCH 1/2] powerpc/eeh: Introduce flag EEH_PE_REMOVED Gavin Shan
@ 2014-12-11 3:28 ` Gavin Shan
0 siblings, 0 replies; 2+ messages in thread
From: Gavin Shan @ 2014-12-11 3:28 UTC (permalink / raw)
To: linuxppc-dev; +Cc: grimm, Gavin Shan
When PE's frozen count hits maximal allowed frozen times, which is
5 currently, it will be forced to be offline permanently. Once the
PE is removed permanently, rebooting machine is required to bring
the PE back. It's not convienent when testing EEH functionality.
The patch exports the maximal allowed frozen times through debugfs
entry (/sys/kernel/debug/powerpc/eeh_max_freezes).
Requested-by: Ryan Grimm <grimm@linux.vnet.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 7 +------
arch/powerpc/kernel/eeh.c | 24 ++++++++++++++++++++++++
arch/powerpc/kernel/eeh_driver.c | 2 +-
3 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 3e4dd34..55abfd0 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -218,6 +218,7 @@ struct eeh_ops {
};
extern int eeh_subsystem_flags;
+extern int eeh_max_freezes;
extern struct eeh_ops *eeh_ops;
extern raw_spinlock_t confirm_error_lock;
@@ -255,12 +256,6 @@ static inline void eeh_serialize_unlock(unsigned long flags)
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
}
-/*
- * Max number of EEH freezes allowed before we consider the device
- * to be permanently disabled.
- */
-#define EEH_MAX_ALLOWED_FREEZES 5
-
typedef void *(*eeh_traverse_func)(void *data, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 05be77d..cada1aa 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -104,6 +104,13 @@
int eeh_subsystem_flags;
EXPORT_SYMBOL(eeh_subsystem_flags);
+/*
+ * EEH allowed maximal frozen times. If one particular PE's
+ * frozen count in last hour exceeds this limit, the PE will
+ * be forced to be offline permanently.
+ */
+int eeh_max_freezes = 5;
+
/* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL;
@@ -1655,8 +1662,22 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val)
return 0;
}
+static int eeh_freeze_dbgfs_set(void *data, u64 val)
+{
+ eeh_max_freezes = val;
+ return 0;
+}
+
+static int eeh_freeze_dbgfs_get(void *data, u64 *val)
+{
+ *val = eeh_max_freezes;
+ return 0;
+}
+
DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
eeh_enable_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
+ eeh_freeze_dbgfs_set, "0x%llx\n");
#endif
static int __init eeh_init_proc(void)
@@ -1667,6 +1688,9 @@ static int __init eeh_init_proc(void)
debugfs_create_file("eeh_enable", 0600,
powerpc_debugfs_root, NULL,
&eeh_enable_dbgfs_ops);
+ debugfs_create_file("eeh_max_freezes", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_freeze_dbgfs_ops);
#endif
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index ac00672..d099540 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -667,7 +667,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
- if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
+ if (pe->freeze_count > eeh_max_freezes)
goto excess_failures;
pr_warn("EEH: This PCI device has failed %d times in the last hour\n",
pe->freeze_count);
--
1.8.3.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-12-11 3:29 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-12-11 3:28 [PATCH 1/2] powerpc/eeh: Introduce flag EEH_PE_REMOVED Gavin Shan
2014-12-11 3:28 ` [PATCH 2/2] powerpc/eeh: Allow to set maximal frozen times Gavin Shan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).