[PATCH v2 0/1] Parallel EEH recovery between PHBs

linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 0/1] Parallel EEH recovery between PHBs
@ 2024-06-17  7:36 Ganesh Goudar
  2024-06-17  7:36 ` [PATCH v2 1/1] powerpc/eeh: Enable PHBs to recovery in parallel Ganesh Goudar
  0 siblings, 1 reply; 2+ messages in thread
From: Ganesh Goudar @ 2024-06-17  7:36 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh

This change is based on Sam Bobroff's patches which aimed
to allow recovery to happen in parallel between PHBs and PEs,
Due to various reasons the patches did not get in.

But having parallel recovery between PHBs is fairly simple and
gives significant improvement on powervm, Since powervm maintains
flat hierarchy for PCI devices.
This patch enables PHBs to have separate event queues and shorten
the time taken for EEH recovery by making the recovery to run in
parallel between PHBs.

On powervm with 64 VFs from same PHB,  I see approximately 48%
reduction in time taken in EEH recovery. On powernv the improvement
is not so significant.

Ganesh Goudar (1):
  powerpc/eeh: Enable PHBs to recovery in parallel

 arch/powerpc/include/asm/eeh_event.h  |  7 ++++
 arch/powerpc/include/asm/pci-bridge.h |  4 ++
 arch/powerpc/kernel/eeh_driver.c      | 27 +++++++++++-
 arch/powerpc/kernel/eeh_event.c       | 59 +++++++++++++++++----------
 arch/powerpc/kernel/eeh_pe.c          |  4 ++
 5 files changed, 78 insertions(+), 23 deletions(-)

-- 
2.44.0

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH v2 1/1] powerpc/eeh: Enable PHBs to recovery in parallel
  2024-06-17  7:36 [PATCH v2 0/1] Parallel EEH recovery between PHBs Ganesh Goudar
@ 2024-06-17  7:36 ` Ganesh Goudar
  0 siblings, 0 replies; 2+ messages in thread
From: Ganesh Goudar @ 2024-06-17  7:36 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh

Currnetly, with a single event queue EEH recovery is entirely
serialized and takes place within a single kernel thread. This
can cause recovery to take a long time when there are many
devices.

Have the recovery event queue per PHB and allow the recovery to
happen in parallel for all the PHBs.

Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
v2: Include missing hunk, which modifies __eeh_send_failure_event.
---
 arch/powerpc/include/asm/eeh_event.h  |  7 ++++
 arch/powerpc/include/asm/pci-bridge.h |  4 ++
 arch/powerpc/kernel/eeh_driver.c      | 27 +++++++++++-
 arch/powerpc/kernel/eeh_event.c       | 59 +++++++++++++++++----------
 arch/powerpc/kernel/eeh_pe.c          |  4 ++
 5 files changed, 78 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index dadde7d52f46..6af1b5bb6103 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -8,6 +8,8 @@
 #define ASM_POWERPC_EEH_EVENT_H
 #ifdef __KERNEL__
 
+#include <linux/workqueue.h>
+
 /*
  * structure holding pci controller data that describes a
  * change in the isolation status of a PCI slot.  A pointer
@@ -15,15 +17,20 @@
  * callback.
  */
 struct eeh_event {
+	struct work_struct	work;
 	struct list_head	list;	/* to form event queue	*/
 	struct eeh_pe		*pe;	/* EEH PE		*/
 };
 
+extern spinlock_t eeh_eventlist_lock;
+
 int eeh_event_init(void);
+int eeh_phb_event(struct eeh_pe *pe);
 int eeh_send_failure_event(struct eeh_pe *pe);
 int __eeh_send_failure_event(struct eeh_pe *pe);
 void eeh_remove_event(struct eeh_pe *pe, bool force);
 void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_normal_event_work(struct work_struct *work);
 void eeh_handle_special_event(void);
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 2aa3a091ef20..61884d9398bf 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -138,6 +138,10 @@ struct pci_controller {
 
 	/* iommu_ops support */
 	struct iommu_device	iommu;
+
+	bool eeh_in_progress;
+	struct list_head eeh_eventlist;
+	spinlock_t eeh_eventlist_lock;
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 7efe04c68f0f..4cf5fd409369 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -1116,6 +1116,30 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
 }
 
+void eeh_handle_normal_event_work(struct work_struct *work)
+{
+	unsigned long flags;
+	struct eeh_event *event = container_of(work, struct eeh_event, work);
+	struct pci_controller *phb = event->pe->phb;
+
+	eeh_handle_normal_event(event->pe);
+
+	kfree(event);
+	spin_lock_irqsave(&phb->eeh_eventlist_lock, flags);
+	WARN_ON_ONCE(!phb->eeh_in_progress);
+	if (list_empty(&phb->eeh_eventlist)) {
+		phb->eeh_in_progress = false;
+		pr_debug("EEH: No more work to do\n");
+	} else {
+		pr_warn("EEH: More work to do\n");
+		event = list_entry(phb->eeh_eventlist.next,
+				   struct eeh_event, list);
+		list_del(&event->list);
+		queue_work(system_unbound_wq, &event->work);
+	}
+	spin_unlock_irqrestore(&phb->eeh_eventlist_lock, flags);
+}
+
 /**
  * eeh_handle_special_event - Handle EEH events without a specific failing PE
  *
@@ -1185,8 +1209,7 @@ void eeh_handle_special_event(void)
 		 */
 		if (rc == EEH_NEXT_ERR_FROZEN_PE ||
 		    rc == EEH_NEXT_ERR_FENCED_PHB) {
-			eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
-			eeh_handle_normal_event(pe);
+			eeh_phb_event(pe);
 		} else {
 			eeh_for_each_pe(pe, tmp_pe)
 				eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index c23a454af08a..8a9d6358d39f 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -22,7 +22,7 @@
  *  work-queue, where a worker thread can drive recovery.
  */
 
-static DEFINE_SPINLOCK(eeh_eventlist_lock);
+DEFINE_SPINLOCK(eeh_eventlist_lock);
 static DECLARE_COMPLETION(eeh_eventlist_event);
 static LIST_HEAD(eeh_eventlist);
 
@@ -91,6 +91,42 @@ int eeh_event_init(void)
 	return 0;
 }
 
+int eeh_phb_event(struct eeh_pe *pe)
+{
+	struct eeh_event *event;
+	unsigned long flags;
+	struct pci_controller *phb;
+
+	event = kzalloc(sizeof(*event), GFP_ATOMIC);
+	if (!event)
+		return -ENOMEM;
+
+	if (pe) {
+		phb = pe->phb;
+		event->pe = pe;
+		INIT_WORK(&event->work, eeh_handle_normal_event_work);
+		eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+		pr_err("EEH: EVENT=ERROR_DETECTED PHB=%#x PE=%#x\n",
+		       phb->global_number, pe->addr);
+		spin_lock_irqsave(&phb->eeh_eventlist_lock, flags);
+		if (phb->eeh_in_progress) {
+			pr_info("EEH: EEH already in progress on this PHB, queueing.\n");
+			list_add(&event->list, &phb->eeh_eventlist);
+		} else {
+			pr_info("EEH: Beginning recovery on this PHB.\n");
+			WARN_ON_ONCE(!list_empty(&phb->eeh_eventlist));
+			phb->eeh_in_progress = true;
+			queue_work(system_unbound_wq, &event->work);
+		}
+		spin_unlock_irqrestore(&phb->eeh_eventlist_lock, flags);
+	} else {
+		spin_lock_irqsave(&eeh_eventlist_lock, flags);
+		list_add(&event->list, &eeh_eventlist);
+		complete(&eeh_eventlist_event);
+		spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+	}
+	return 0;
+}
 /**
  * eeh_send_failure_event - Generate a PCI error event
  * @pe: EEH PE
@@ -101,16 +137,6 @@ int eeh_event_init(void)
  */
 int __eeh_send_failure_event(struct eeh_pe *pe)
 {
-	unsigned long flags;
-	struct eeh_event *event;
-
-	event = kzalloc(sizeof(*event), GFP_ATOMIC);
-	if (!event) {
-		pr_err("EEH: out of memory, event not handled\n");
-		return -ENOMEM;
-	}
-	event->pe = pe;
-
 	/*
 	 * Mark the PE as recovering before inserting it in the queue.
 	 * This prevents the PE from being free()ed by a hotplug driver
@@ -128,16 +154,7 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
 
 		eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
 	}
-
-	/* We may or may not be called in an interrupt context */
-	spin_lock_irqsave(&eeh_eventlist_lock, flags);
-	list_add(&event->list, &eeh_eventlist);
-	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-
-	/* For EEH deamon to knick in */
-	complete(&eeh_eventlist_event);
-
-	return 0;
+	return eeh_phb_event(pe);
 }
 
 int eeh_send_failure_event(struct eeh_pe *pe)
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d1030bc52564..67c6e6621b0d 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -81,6 +81,10 @@ int eeh_phb_pe_create(struct pci_controller *phb)
 {
 	struct eeh_pe *pe;
 
+	phb->eeh_in_progress = false;
+	INIT_LIST_HEAD(&phb->eeh_eventlist);
+	spin_lock_init(&phb->eeh_eventlist_lock);
+
 	/* Allocate PHB PE */
 	pe = eeh_pe_alloc(phb, EEH_PE_PHB);
 	if (!pe) {
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-06-17  7:38 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-17  7:36 [PATCH v2 0/1] Parallel EEH recovery between PHBs Ganesh Goudar
2024-06-17  7:36 ` [PATCH v2 1/1] powerpc/eeh: Enable PHBs to recovery in parallel Ganesh Goudar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).