From: linas <linas@austin.ibm.com>
To: paulus@samba.org
Cc: linuxppc64-dev@ozlabs.org, linux-kernel@vger.kernel.org,
linux-pci@atrey.karlin.mff.cuni.cz
Subject: [PATCH 8/22] ppc64: Slot Marking Bugfix
Date: Thu, 6 Oct 2005 18:33:20 -0500 [thread overview]
Message-ID: <20051006233320.GI29826@austin.ibm.com> (raw)
In-Reply-To: <20051006232032.GA29826@austin.ibm.com>
08-eeh-slot-marking-bug.patch
A device that experiences a PCI outage may be just one deivce out
of many that was affected. In order to avoid repeated reports of
a failure, the entire tree of affected devices should be marked
as failed. This patch marks up the entire tree.
Signed-off-by: Linas Vepstas <linas@linas.org>
Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:52:37.399078590 -0500
+++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:53:02.164603746 -0500
@@ -480,32 +480,47 @@
* an interrupt context, which is bad.
*/
-static inline void __eeh_mark_slot (struct device_node *dn)
+static inline void __eeh_mark_slot (struct device_node *dn, int mode_flag)
{
while (dn) {
- PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED;
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode |= mode_flag;
- if (dn->child)
- __eeh_mark_slot (dn->child);
+ if (dn->child)
+ __eeh_mark_slot (dn->child, mode_flag);
+ }
dn = dn->sibling;
}
}
-static inline void __eeh_clear_slot (struct device_node *dn)
+void eeh_mark_slot (struct device_node *dn, int mode_flag)
+{
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode |= mode_flag;
+ __eeh_mark_slot (dn->child, mode_flag);
+}
+
+static inline void __eeh_clear_slot (struct device_node *dn, int mode_flag)
{
while (dn) {
- PCI_DN(dn)->eeh_mode &= ~EEH_MODE_ISOLATED;
- if (dn->child)
- __eeh_clear_slot (dn->child);
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ if (dn->child)
+ __eeh_clear_slot (dn->child, mode_flag);
+ }
dn = dn->sibling;
}
}
-static inline void eeh_clear_slot (struct device_node *dn)
+void eeh_clear_slot (struct device_node *dn, int mode_flag)
{
unsigned long flags;
spin_lock_irqsave(&confirm_error_lock, flags);
- __eeh_clear_slot (dn);
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ __eeh_clear_slot (dn->child, mode_flag);
spin_unlock_irqrestore(&confirm_error_lock, flags);
}
@@ -530,7 +545,6 @@
int rets[3];
unsigned long flags;
struct pci_dn *pdn;
- struct device_node *pe_dn;
int rc = 0;
__get_cpu_var(total_mmio_ffs)++;
@@ -632,8 +646,7 @@
/* Avoid repeated reports of this failure, including problems
* with other functions on this device, and functions under
* bridges. */
- pe_dn = find_device_pe (dn);
- __eeh_mark_slot (pe_dn);
+ eeh_mark_slot (dn, EEH_MODE_ISOLATED);
spin_unlock_irqrestore(&confirm_error_lock, flags);
eeh_send_failure_event (dn, dev, rets[0], rets[2]);
@@ -745,9 +758,6 @@
rc, state, pdn->node->full_name);
return;
}
-
- if (state == 0)
- eeh_clear_slot (pdn->node->parent->child);
}
/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
@@ -766,6 +776,12 @@
#define PCI_BUS_RST_HOLD_TIME_MSEC 250
msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
+
+ /* We might get hit with another EEH freeze as soon as the
+ * pci slot reset line is dropped. Make sure we don't miss
+ * these, and clear the flag now. */
+ eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED);
+
rtas_pci_slot_reset (pdn, 0);
/* After a PCI slot has been reset, the PCI Express spec requires
Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.h 2005-10-06 17:52:37.399078590 -0500
+++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h 2005-10-06 17:53:02.165603605 -0500
@@ -86,6 +86,13 @@
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
+/**
+ * mark and clear slots: find "partition endpoint" PE and set or
+ * clear the flags for each subnode of the PE.
+ */
+void eeh_mark_slot (struct device_node *dn, int mode_flag);
+void eeh_clear_slot (struct device_node *dn, int mode_flag);
+
#endif
#endif /* __PPC_KERNEL_PCI_H__ */
next prev parent reply other threads:[~2005-10-06 23:33 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-10-06 23:20 [PATCH 0/22] ppc64: Full sequence of PCI Error recovery patches linas
2005-10-06 23:23 ` [PATCH 1/22] ppc64: Dynamic LPAR bugfix linas
2005-10-06 23:25 ` [PATCH 2/22] ppc64: Enable detection bugfix linas
2005-10-06 23:26 ` [PATCH 3/22] ppc64: EEH Recovery dispatcher thread linas
2005-10-06 23:28 ` [PATCH 4/22] ppc64: EEH Recovery support routines linas
2005-10-06 23:29 ` [PATCH 5/22] ppc64: Device BAR save and restore linas
2005-10-06 23:31 ` [PATCH 6/22] ppc64: PCI Error Recovery: documentation patch linas
2005-10-06 23:32 ` [PATCH 7/22] PCI Error Recovery: header file patch linas
2005-10-06 23:33 ` linas [this message]
2005-10-06 23:35 ` [PATCH 9/22] ppc64: DLPAR slot add and remove bugfixes linas
2005-10-06 23:36 ` [PATCH 10/22] ppc64: Crash on DLPAR PHB add linas
2005-10-06 23:39 ` [PATCH 11/22] ppc64: RPA PHP and EEH common code linas
2005-10-06 23:40 ` [PATCH 12/22] ppc64: RPA PHP cleanup linas
2005-10-06 23:44 ` [PATCH 13/22] ppc64: RPAPHP duplicated code removal linas
2005-10-06 23:46 ` [PATCH 14/22] ppc64: RPA PHP to EEH code movement linas
2006-01-07 21:28 ` Olaf Hering
2006-01-09 19:58 ` [PATCH]: ppowerpc: fix compile-time failure when EEH disabled linas
2005-10-06 23:47 ` [PATCH 15/22] ppc64: PCI Error Recovery: PPC64 core recovery routines linas
2005-10-12 9:49 ` Paul Mackerras
2005-10-13 16:03 ` linas
2005-10-06 23:53 ` [PATCH 16/22] PCI Address cache lookup code linas
2005-10-06 23:54 ` [PATCH 17/22] ppc64: New Partition Endpoin support linas
2005-10-06 23:55 ` [PATCH 18/22] PCI Error Recovery: IPR SCSI device driver linas
2005-10-06 23:56 ` [PATCH 19/22] PCI Error Recovery: Symbios " linas
2005-10-06 23:57 ` [PATCH 20/22] PCI Error Recovery: e100 network " linas
2005-10-11 0:10 ` Greg KH
2005-10-11 23:04 ` linas
2005-10-11 23:41 ` Paul Mackerras
2005-10-06 23:58 ` [PATCH 21/22] PCI Error Recovery: e1000 " linas
2005-10-06 23:59 ` [PATCH 22/22] PCI Error Recovery: ixgb " linas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051006233320.GI29826@austin.ibm.com \
--to=linas@austin.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@atrey.karlin.mff.cuni.cz \
--cc=linuxppc64-dev@ozlabs.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox