From: linas <linas@austin.ibm.com>
To: paulus@samba.org
Cc: linuxppc64-dev@ozlabs.org, linux-kernel@vger.kernel.org,
linux-pci@atrey.karlin.mff.cuni.cz
Subject: [PATCH 8/22] ppc64: Slot Marking Bugfix
Date: Thu, 6 Oct 2005 18:33:20 -0500 [thread overview]
Message-ID: <20051006233320.GI29826@austin.ibm.com> (raw)
In-Reply-To: <20051006232032.GA29826@austin.ibm.com>
08-eeh-slot-marking-bug.patch
A device that experiences a PCI outage may be just one deivce out
of many that was affected. In order to avoid repeated reports of
a failure, the entire tree of affected devices should be marked
as failed. This patch marks up the entire tree.
Signed-off-by: Linas Vepstas <linas@linas.org>
Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:52:37.399078590 -0500
+++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:53:02.164603746 -0500
@@ -480,32 +480,47 @@
* an interrupt context, which is bad.
*/
-static inline void __eeh_mark_slot (struct device_node *dn)
+static inline void __eeh_mark_slot (struct device_node *dn, int mode_flag)
{
while (dn) {
- PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED;
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode |= mode_flag;
- if (dn->child)
- __eeh_mark_slot (dn->child);
+ if (dn->child)
+ __eeh_mark_slot (dn->child, mode_flag);
+ }
dn = dn->sibling;
}
}
-static inline void __eeh_clear_slot (struct device_node *dn)
+void eeh_mark_slot (struct device_node *dn, int mode_flag)
+{
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode |= mode_flag;
+ __eeh_mark_slot (dn->child, mode_flag);
+}
+
+static inline void __eeh_clear_slot (struct device_node *dn, int mode_flag)
{
while (dn) {
- PCI_DN(dn)->eeh_mode &= ~EEH_MODE_ISOLATED;
- if (dn->child)
- __eeh_clear_slot (dn->child);
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ if (dn->child)
+ __eeh_clear_slot (dn->child, mode_flag);
+ }
dn = dn->sibling;
}
}
-static inline void eeh_clear_slot (struct device_node *dn)
+void eeh_clear_slot (struct device_node *dn, int mode_flag)
{
unsigned long flags;
spin_lock_irqsave(&confirm_error_lock, flags);
- __eeh_clear_slot (dn);
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ __eeh_clear_slot (dn->child, mode_flag);
spin_unlock_irqrestore(&confirm_error_lock, flags);
}
@@ -530,7 +545,6 @@
int rets[3];
unsigned long flags;
struct pci_dn *pdn;
- struct device_node *pe_dn;
int rc = 0;
__get_cpu_var(total_mmio_ffs)++;
@@ -632,8 +646,7 @@
/* Avoid repeated reports of this failure, including problems
* with other functions on this device, and functions under
* bridges. */
- pe_dn = find_device_pe (dn);
- __eeh_mark_slot (pe_dn);
+ eeh_mark_slot (dn, EEH_MODE_ISOLATED);
spin_unlock_irqrestore(&confirm_error_lock, flags);
eeh_send_failure_event (dn, dev, rets[0], rets[2]);
@@ -745,9 +758,6 @@
rc, state, pdn->node->full_name);
return;
}
-
- if (state == 0)
- eeh_clear_slot (pdn->node->parent->child);
}
/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
@@ -766,6 +776,12 @@
#define PCI_BUS_RST_HOLD_TIME_MSEC 250
msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
+
+ /* We might get hit with another EEH freeze as soon as the
+ * pci slot reset line is dropped. Make sure we don't miss
+ * these, and clear the flag now. */
+ eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED);
+
rtas_pci_slot_reset (pdn, 0);
/* After a PCI slot has been reset, the PCI Express spec requires
Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.h 2005-10-06 17:52:37.399078590 -0500
+++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h 2005-10-06 17:53:02.165603605 -0500
@@ -86,6 +86,13 @@
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
+/**
+ * mark and clear slots: find "partition endpoint" PE and set or
+ * clear the flags for each subnode of the PE.
+ */
+void eeh_mark_slot (struct device_node *dn, int mode_flag);
+void eeh_clear_slot (struct device_node *dn, int mode_flag);
+
#endif
#endif /* __PPC_KERNEL_PCI_H__ */
next prev parent reply other threads:[~2005-10-06 23:33 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-10-06 23:20 [PATCH 0/22] ppc64: Full sequence of PCI Error recovery patches linas
2005-10-06 23:23 ` [PATCH 1/22] ppc64: Dynamic LPAR bugfix linas
2005-10-06 23:25 ` [PATCH 2/22] ppc64: Enable detection bugfix linas
2005-10-06 23:26 ` [PATCH 3/22] ppc64: EEH Recovery dispatcher thread linas
2005-10-06 23:28 ` [PATCH 4/22] ppc64: EEH Recovery support routines linas
2005-10-06 23:29 ` [PATCH 5/22] ppc64: Device BAR save and restore linas
2005-10-06 23:31 ` [PATCH 6/22] ppc64: PCI Error Recovery: documentation patch linas
2005-10-06 23:32 ` [PATCH 7/22] PCI Error Recovery: header file patch linas
2005-10-06 23:33 ` linas [this message]
2005-10-06 23:35 ` [PATCH 9/22] ppc64: DLPAR slot add and remove bugfixes linas
2005-10-06 23:36 ` [PATCH 10/22] ppc64: Crash on DLPAR PHB add linas
2005-10-06 23:39 ` [PATCH 11/22] ppc64: RPA PHP and EEH common code linas
2005-10-06 23:40 ` [PATCH 12/22] ppc64: RPA PHP cleanup linas
2005-10-06 23:44 ` [PATCH 13/22] ppc64: RPAPHP duplicated code removal linas
2005-10-06 23:46 ` [PATCH 14/22] ppc64: RPA PHP to EEH code movement linas
2006-01-07 21:28 ` Olaf Hering
2006-01-09 19:58 ` [PATCH]: ppowerpc: fix compile-time failure when EEH disabled linas
2005-10-06 23:47 ` [PATCH 15/22] ppc64: PCI Error Recovery: PPC64 core recovery routines linas
2005-10-12 9:49 ` Paul Mackerras
2005-10-13 16:03 ` linas
2005-10-06 23:53 ` [PATCH 16/22] PCI Address cache lookup code linas
2005-10-06 23:54 ` [PATCH 17/22] ppc64: New Partition Endpoin support linas
2005-10-06 23:55 ` [PATCH 18/22] PCI Error Recovery: IPR SCSI device driver linas
2005-10-06 23:56 ` [PATCH 19/22] PCI Error Recovery: Symbios " linas
2005-10-06 23:57 ` [PATCH 20/22] PCI Error Recovery: e100 network " linas
2005-10-11 0:10 ` Greg KH
2005-10-11 23:04 ` linas
2005-10-11 23:41 ` Paul Mackerras
2005-10-06 23:58 ` [PATCH 21/22] PCI Error Recovery: e1000 " linas
2005-10-06 23:59 ` [PATCH 22/22] PCI Error Recovery: ixgb " linas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051006233320.GI29826@austin.ibm.com \
--to=linas@austin.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@atrey.karlin.mff.cuni.cz \
--cc=linuxppc64-dev@ozlabs.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.