LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 07/13] powerpc/eeh: Clean up pci_ers_result handling
From: Sam Bobroff @ 2018-05-25  3:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>

As EEH event handling progresses, a cumulative result of type
pci_ers_result is built up by (some of) the eeh_report_*() functions
using either:
	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
or:
	if ((*res == PCI_ERS_RESULT_NONE) ||
	    (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
	if (*res == PCI_ERS_RESULT_DISCONNECT &&
	    rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
(Where *res is the accumulator.)

However, the intent is not immediately clear and the result in some
situations is order dependent.

Address this by assigning a priority to each result value, and always
merging to the highest priority. This renders the intent clear, and
provides a stable value for all orderings.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
====== v1 -> v2: ======
* Added the value, and missing newline, to some WARN()s.
* Improved name of merge_result() to pci_ers_merge_result().
* Adjusted the result priorities so that unknown doesn't overlap with _NONE.

 arch/powerpc/kernel/eeh_driver.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 188d15c4fe3a..2d3cac584899 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -39,6 +39,29 @@ struct eeh_rmv_data {
 	int removed;
 };
 
+static int eeh_result_priority(enum pci_ers_result result)
+{
+	switch (result) {
+	case PCI_ERS_RESULT_NONE: return 1;
+	case PCI_ERS_RESULT_NO_AER_DRIVER: return 2;
+	case PCI_ERS_RESULT_RECOVERED: return 3;
+	case PCI_ERS_RESULT_CAN_RECOVER: return 4;
+	case PCI_ERS_RESULT_DISCONNECT: return 5;
+	case PCI_ERS_RESULT_NEED_RESET: return 6;
+	default:
+		WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
+		return 0;
+	}
+};
+
+static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
+						enum pci_ers_result new)
+{
+	if (eeh_result_priority(new) > eeh_result_priority(old))
+		return new;
+	return old;
+}
+
 /**
  * eeh_pcid_get - Get the PCI device driver
  * @pdev: PCI device
@@ -206,9 +229,7 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 
 	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
-	/* A driver that needs a reset trumps all others */
-	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+	*res = pci_ers_merge_result(*res, rc);
 
 	edev->in_error = true;
 	pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
@@ -249,9 +270,7 @@ static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
 
 	rc = driver->err_handler->mmio_enabled(dev);
 
-	/* A driver that needs a reset trumps all others */
-	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+	*res = pci_ers_merge_result(*res, rc);
 
 out:
 	eeh_pcid_put(dev);
@@ -294,10 +313,7 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 		goto out;
 
 	rc = driver->err_handler->slot_reset(dev);
-	if ((*res == PCI_ERS_RESULT_NONE) ||
-	    (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
-	if (*res == PCI_ERS_RESULT_DISCONNECT &&
-	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+	*res = pci_ers_merge_result(*res, rc);
 
 out:
 	eeh_pcid_put(dev);
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* [PATCH v2 12/13] powerpc/eeh: Cleaner handling of EEH_DEV_NO_HANDLER
From: Sam Bobroff @ 2018-05-25  3:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>

If a device without a driver is recovered via EEH, the flag
EEH_DEV_NO_HANDLER is incorrectly left set on the device after
recovery, because the test in eeh_report_resume() for the existence of
a bound driver is done before the flag is cleared. If a driver is
later bound, and EEH experienced again, some of the drivers EEH
handers are not called.

To correct this, clear the flag unconditionally after EEH processing
is complete.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 arch/powerpc/kernel/eeh_driver.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 8dd2a33424a1..42fe80ed6f59 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -405,7 +405,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 	if (!driver->err_handler ||
 	    !driver->err_handler->resume ||
 	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
-		edev->mode &= ~EEH_DEV_NO_HANDLER;
 		goto out;
 	}
 
@@ -780,6 +779,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 {
 	struct pci_bus *bus;
 	struct eeh_dev *edev, *tmp;
+	struct eeh_pe *tmp_pe;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
 	struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
@@ -934,6 +934,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	eeh_set_irq_state(pe, true);
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
+	eeh_for_each_pe(pe, tmp_pe)
+		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+			edev->mode &= ~EEH_DEV_NO_HANDLER;
+
 	pr_info("EEH: Recovery successful.\n");
 	goto final;
 
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* [PATCH v2 06/13] powerpc/eeh: Add message when PE processing at parent
From: Sam Bobroff @ 2018-05-25  3:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>

To aid debugging, add a message to show when EEH processing for a PE
will be done at the device's parent, rather than directly at the
device.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f82dade4fb9a..1139821a9aec 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -541,8 +541,12 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 
 		/* Frozen parent PE ? */
 		ret = eeh_ops->get_state(parent_pe, NULL);
-		if (ret > 0 && !eeh_state_active(ret))
+		if (ret > 0 && !eeh_state_active(ret)) {
 			pe = parent_pe;
+			pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n",
+			       pe->phb->global_number, pe->addr,
+			       pe->phb->global_number, parent_pe->addr);
+		}
 
 		/* Next parent level */
 		parent_pe = parent_pe->parent;
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* [PATCH v2 10/13] powerpc/eeh: Introduce eeh_set_channel_state()
From: Sam Bobroff @ 2018-05-25  3:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>

To ease future refactoring, extract setting of the channel state
from the report functions out into their own functions. This increases
the amount of code that is identical across all of the report
functions.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 arch/powerpc/kernel/eeh_driver.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 30898866418b..76951ca701b2 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -199,6 +199,17 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
 	return NULL;
 }
 
+static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_for_each_pe(root, pe)
+		eeh_pe_for_each_dev(pe, edev, tmp)
+			if (eeh_edev_actionable(edev))
+				edev->pdev->error_state = s;
+}
+
 /**
  * eeh_report_error - Report pci error to each device driver
  * @data: eeh device
@@ -218,7 +229,6 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 		return NULL;
 
 	device_lock(&dev->dev);
-	dev->error_state = pci_channel_io_frozen;
 
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
@@ -301,7 +311,6 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 		return NULL;
 
 	device_lock(&dev->dev);
-	dev->error_state = pci_channel_io_normal;
 
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
@@ -371,7 +380,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 		return NULL;
 
 	device_lock(&dev->dev);
-	dev->error_state = pci_channel_io_normal;
 
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
@@ -795,6 +803,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	 * hotplug for this case.
 	 */
 	pr_info("EEH: Notify device drivers to shutdown\n");
+	eeh_set_channel_state(pe, pci_channel_io_frozen);
 	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 	if ((pe->type & EEH_PE_PHB) &&
 	    result != PCI_ERS_RESULT_NONE &&
@@ -885,6 +894,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		pr_info("EEH: Notify device drivers "
 			"the completion of reset\n");
 		result = PCI_ERS_RESULT_NONE;
+		eeh_set_channel_state(pe, pci_channel_io_normal);
 		eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
 	}
 
@@ -906,6 +916,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 
 	/* Tell all device drivers that they can resume operations */
 	pr_info("EEH: Notify device driver to resume\n");
+	eeh_set_channel_state(pe, pci_channel_io_normal);
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
 	pr_info("EEH: Recovery successful.\n");
@@ -924,6 +935,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	eeh_slot_error_detail(pe, EEH_LOG_PERM);
 
 	/* Notify all devices that they're about to go down. */
+	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
 	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 
 	/* Mark the PE to be removed permanently */
@@ -1033,6 +1045,7 @@ void eeh_handle_special_event(void)
 
 				/* Notify all devices to be down */
 				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+				eeh_set_channel_state(pe, pci_channel_io_perm_failure);
 				eeh_pe_dev_traverse(pe,
 					eeh_report_failure, NULL);
 				bus = eeh_pe_bus_get(phb_pe);
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* [PATCH v2 09/13] powerpc/eeh: Introduce eeh_edev_actionable()
From: Sam Bobroff @ 2018-05-25  3:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>

The same test is done in every EEH report function, so factor it out.

Since eeh_dev_removed() needs to be moved higher up in the file,
simplify it a little while we're at it.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 arch/powerpc/kernel/eeh_driver.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 2d3cac584899..30898866418b 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -62,6 +62,17 @@ static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
 	return old;
 }
 
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+	return !edev || (edev->mode & EEH_DEV_REMOVED);
+}
+
+static bool eeh_edev_actionable(struct eeh_dev *edev)
+{
+	return (edev->pdev && !eeh_dev_removed(edev) &&
+		!eeh_pe_passed(edev->pe));
+}
+
 /**
  * eeh_pcid_get - Get the PCI device driver
  * @pdev: PCI device
@@ -163,15 +174,6 @@ static void eeh_enable_irq(struct pci_dev *dev)
 	}
 }
 
-static bool eeh_dev_removed(struct eeh_dev *edev)
-{
-	/* EEH device removed ? */
-	if (!edev || (edev->mode & EEH_DEV_REMOVED))
-		return true;
-
-	return false;
-}
-
 static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
 {
 	struct pci_dev *pdev;
@@ -212,7 +214,7 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
@@ -256,7 +258,7 @@ static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
@@ -295,7 +297,7 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
@@ -365,7 +367,7 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 	bool was_in_error;
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
@@ -412,7 +414,7 @@ static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* [PATCH v2 11/13] powerpc/eeh: Introduce eeh_set_irq_state()
From: Sam Bobroff @ 2018-05-25  3:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>

To ease future refactoring, extract calls to eeh_enable_irq() and
eeh_disable_irq() from the various report functions. This makes
the report functions initial sequences more similar, as well as making
the IRQ changes visible when reading eeh_handle_normal_event().

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
====== v1 -> v2: ======
* Reorganised eeh_set_irq_state() to reduce nesting depth.

 arch/powerpc/kernel/eeh_driver.c | 52 ++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 76951ca701b2..8dd2a33424a1 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -118,22 +118,20 @@ static inline void eeh_pcid_put(struct pci_dev *pdev)
  * do real work because EEH should freeze DMA transfers for those PCI
  * devices encountering EEH errors, which includes MSI or MSI-X.
  */
-static void eeh_disable_irq(struct pci_dev *dev)
+static void eeh_disable_irq(struct eeh_dev *edev)
 {
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-
 	/* Don't disable MSI and MSI-X interrupts. They are
 	 * effectively disabled by the DMA Stopped state
 	 * when an EEH error occurs.
 	 */
-	if (dev->msi_enabled || dev->msix_enabled)
+	if (edev->pdev->msi_enabled || edev->pdev->msix_enabled)
 		return;
 
-	if (!irq_has_action(dev->irq))
+	if (!irq_has_action(edev->pdev->irq))
 		return;
 
 	edev->mode |= EEH_DEV_IRQ_DISABLED;
-	disable_irq_nosync(dev->irq);
+	disable_irq_nosync(edev->pdev->irq);
 }
 
 /**
@@ -143,10 +141,8 @@ static void eeh_disable_irq(struct pci_dev *dev)
  * This routine must be called to enable interrupt while failed
  * device could be resumed.
  */
-static void eeh_enable_irq(struct pci_dev *dev)
+static void eeh_enable_irq(struct eeh_dev *edev)
 {
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-
 	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
 		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
 		/*
@@ -169,8 +165,8 @@ static void eeh_enable_irq(struct pci_dev *dev)
 		 *
 		 *	tglx
 		 */
-		if (irqd_irq_disabled(irq_get_irq_data(dev->irq)))
-			enable_irq(dev->irq);
+		if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq)))
+			enable_irq(edev->pdev->irq);
 	}
 }
 
@@ -210,6 +206,29 @@ static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
 				edev->pdev->error_state = s;
 }
 
+static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_for_each_pe(root, pe) {
+		eeh_pe_for_each_dev(pe, edev, tmp) {
+			if (!eeh_edev_actionable(edev))
+				continue;
+
+			if (!eeh_pcid_get(edev->pdev))
+				continue;
+
+			if (enable)
+				eeh_enable_irq(edev);
+			else
+				eeh_disable_irq(edev);
+
+			eeh_pcid_put(edev->pdev);
+		}
+	}
+}
+
 /**
  * eeh_report_error - Report pci error to each device driver
  * @data: eeh device
@@ -233,8 +252,6 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
 
-	eeh_disable_irq(dev);
-
 	if (!driver->err_handler ||
 	    !driver->err_handler->error_detected)
 		goto out;
@@ -315,8 +332,6 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
 
-	eeh_enable_irq(dev);
-
 	if (!driver->err_handler ||
 	    !driver->err_handler->slot_reset ||
 	    (edev->mode & EEH_DEV_NO_HANDLER) ||
@@ -386,7 +401,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 
 	was_in_error = edev->in_error;
 	edev->in_error = false;
-	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->resume ||
@@ -431,8 +445,6 @@ static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
 
-	eeh_disable_irq(dev);
-
 	if (!driver->err_handler ||
 	    !driver->err_handler->error_detected)
 		goto out;
@@ -804,6 +816,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	 */
 	pr_info("EEH: Notify device drivers to shutdown\n");
 	eeh_set_channel_state(pe, pci_channel_io_frozen);
+	eeh_set_irq_state(pe, false);
 	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 	if ((pe->type & EEH_PE_PHB) &&
 	    result != PCI_ERS_RESULT_NONE &&
@@ -895,6 +908,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 			"the completion of reset\n");
 		result = PCI_ERS_RESULT_NONE;
 		eeh_set_channel_state(pe, pci_channel_io_normal);
+		eeh_set_irq_state(pe, true);
 		eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
 	}
 
@@ -917,6 +931,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	/* Tell all device drivers that they can resume operations */
 	pr_info("EEH: Notify device driver to resume\n");
 	eeh_set_channel_state(pe, pci_channel_io_normal);
+	eeh_set_irq_state(pe, true);
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
 	pr_info("EEH: Recovery successful.\n");
@@ -936,6 +951,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 
 	/* Notify all devices that they're about to go down. */
 	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+	eeh_set_irq_state(pe, false);
 	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 
 	/* Mark the PE to be removed permanently */
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* [PATCH v2 05/13] powerpc/eeh: Strengthen types of eeh traversal functions
From: Sam Bobroff @ 2018-05-25  3:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>

The traversal functions eeh_pe_traverse() and eeh_pe_dev_traverse()
both provide their first argument as void * but every single user casts
it to the expected type.

Change the type of the first parameter from void * to the appropriate
type, and clean up all uses.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 arch/powerpc/include/asm/eeh.h   |  7 ++++---
 arch/powerpc/kernel/eeh.c        | 13 +++++--------
 arch/powerpc/kernel/eeh_driver.c | 30 ++++++++++--------------------
 arch/powerpc/kernel/eeh_pe.c     | 19 +++++++------------
 4 files changed, 26 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index c2266ca61853..f02e0400e6f2 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -262,7 +262,8 @@ static inline bool eeh_state_active(int state)
 	== (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 }
 
-typedef void *(*eeh_traverse_func)(void *data, void *flag);
+typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
+typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
 void eeh_set_pe_aux_size(int size);
 int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
@@ -272,9 +273,9 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev);
 int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
 void *eeh_pe_traverse(struct eeh_pe *root,
-		eeh_traverse_func fn, void *flag);
+		      eeh_pe_traverse_func fn, void *flag);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
-		eeh_traverse_func fn, void *flag);
+			  eeh_edev_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
 const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index bc640e4c5ca5..f82dade4fb9a 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -263,9 +263,8 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 	return n;
 }
 
-static void *eeh_dump_pe_log(void *data, void *flag)
+static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = data;
 	struct eeh_dev *edev, *tmp;
 	size_t *plen = flag;
 
@@ -686,9 +685,9 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
 	return rc;
 }
 
-static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
+static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+					    void *userdata)
 {
-	struct eeh_dev *edev = data;
 	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 	struct pci_dev *dev = userdata;
 
@@ -714,9 +713,8 @@ static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
 	return NULL;
 }
 
-static void *eeh_restore_dev_state(void *data, void *userdata)
+static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = data;
 	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 	struct pci_dev *dev = userdata;
@@ -856,11 +854,10 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
  * the indicated device and its children so that the bunch of the
  * devices could be reset properly.
  */
-static void *eeh_set_dev_freset(void *data, void *flag)
+static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
 {
 	struct pci_dev *dev;
 	unsigned int *freset = (unsigned int *)flag;
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 
 	dev = eeh_dev_to_pci_dev(edev);
 	if (dev)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index ca9a73fe9cc5..188d15c4fe3a 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -149,9 +149,8 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
 	return false;
 }
 
-static void *eeh_dev_save_state(void *data, void *userdata)
+static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = data;
 	struct pci_dev *pdev;
 
 	if (!edev)
@@ -184,9 +183,8 @@ static void *eeh_dev_save_state(void *data, void *userdata)
  * merge the device driver responses. Cumulative response
  * passed back in "userdata".
  */
-static void *eeh_report_error(void *data, void *userdata)
+static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
@@ -231,9 +229,8 @@ static void *eeh_report_error(void *data, void *userdata)
  * are now enabled. Collects up and merges the device driver responses.
  * Cumulative response passed back in "userdata".
  */
-static void *eeh_report_mmio_enabled(void *data, void *userdata)
+static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
@@ -273,9 +270,8 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
  * some actions, usually to save data the driver needs so that the
  * driver can work again while the device is recovered.
  */
-static void *eeh_report_reset(void *data, void *userdata)
+static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
@@ -310,9 +306,8 @@ static void *eeh_report_reset(void *data, void *userdata)
 	return NULL;
 }
 
-static void *eeh_dev_restore_state(void *data, void *userdata)
+static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = data;
 	struct pci_dev *pdev;
 
 	if (!edev)
@@ -348,9 +343,8 @@ static void *eeh_dev_restore_state(void *data, void *userdata)
  * could resume so that the device driver can do some initialization
  * to make the recovered device work again.
  */
-static void *eeh_report_resume(void *data, void *userdata)
+static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	bool was_in_error;
 	struct pci_driver *driver;
@@ -397,9 +391,8 @@ static void *eeh_report_resume(void *data, void *userdata)
  * This informs the device driver that the device is permanently
  * dead, and that no further recovery attempts will be made on it.
  */
-static void *eeh_report_failure(void *data, void *userdata)
+static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	struct pci_driver *driver;
 
@@ -457,10 +450,9 @@ static void *eeh_add_virt_device(void *data, void *userdata)
 	return NULL;
 }
 
-static void *eeh_rmv_device(void *data, void *userdata)
+static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
 {
 	struct pci_driver *driver;
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
 	int *removed = rmv_data ? &rmv_data->removed : NULL;
@@ -532,9 +524,8 @@ static void *eeh_rmv_device(void *data, void *userdata)
 	return NULL;
 }
 
-static void *eeh_pe_detach_dev(void *data, void *userdata)
+static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	struct eeh_dev *edev, *tmp;
 
 	eeh_pe_for_each_dev(pe, edev, tmp) {
@@ -555,9 +546,8 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
  * PE reset (for 3 times), we try to clear the frozen state
  * for 3 times as well.
  */
-static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
+static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	bool clear_sw_state = *(bool *)flag;
 	int i, rc = 1;
 
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index ee5a67d57aab..38a4bcd8ed13 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -173,7 +173,7 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
  * to be traversed.
  */
 void *eeh_pe_traverse(struct eeh_pe *root,
-		      eeh_traverse_func fn, void *flag)
+		      eeh_pe_traverse_func fn, void *flag)
 {
 	struct eeh_pe *pe;
 	void *ret;
@@ -196,7 +196,7 @@ void *eeh_pe_traverse(struct eeh_pe *root,
  * PE and its child PEs.
  */
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
-		eeh_traverse_func fn, void *flag)
+			  eeh_edev_traverse_func fn, void *flag)
 {
 	struct eeh_pe *pe;
 	struct eeh_dev *edev, *tmp;
@@ -235,9 +235,8 @@ struct eeh_pe_get_flag {
 	int config_addr;
 };
 
-static void *__eeh_pe_get(void *data, void *flag)
+static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
 
 	/* Unexpected PHB PE */
@@ -551,9 +550,8 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe)
  * PE. Also, the associated PCI devices will be put into IO frozen
  * state as well.
  */
-static void *__eeh_pe_state_mark(void *data, void *flag)
+static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
 	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
@@ -595,9 +593,8 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
 
-static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
+static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
 {
-	struct eeh_dev *edev = data;
 	int mode = *((int *)flag);
 
 	edev->mode |= mode;
@@ -625,9 +622,8 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
  * given PE. Besides, we also clear the check count of the PE
  * as well.
  */
-static void *__eeh_pe_state_clear(void *data, void *flag)
+static void *__eeh_pe_state_clear(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
 	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
@@ -858,9 +854,8 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
  * the expansion ROM base address, the latency timer, and etc.
  * from the saved values in the device node.
  */
-static void *eeh_restore_one_device_bars(void *data, void *flag)
+static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	/* Do special restore for bridges */
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* [PATCH v2 13/13] powerpc/eeh: Refactor report functions
From: Sam Bobroff @ 2018-05-25  3:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>

The EEH report functions now share a fair bit of code around the start
and end of each function.

So factor out as much as possible, and move the traversal into a
custom function. This also allows accurate debug to be generated more
easily.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
====== v1 -> v2: ======
* Better name for eeh_infoline() and implement using a function.
* Move the core of eeh_pe_report() into a new function to improve readability.
* pci_ers_result_name(): match parameter name to eeh_result_priority(), correct and improve warning message.

 arch/powerpc/kernel/eeh_driver.c | 310 ++++++++++++++++++++-------------------
 1 file changed, 159 insertions(+), 151 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 42fe80ed6f59..7cab58abbec9 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -54,6 +54,40 @@ static int eeh_result_priority(enum pci_ers_result result)
 	}
 };
 
+const char *pci_ers_result_name(enum pci_ers_result result)
+{
+	switch (result) {
+	case PCI_ERS_RESULT_NONE: return "none";
+	case PCI_ERS_RESULT_CAN_RECOVER: return "can recover";
+	case PCI_ERS_RESULT_NEED_RESET: return "need reset";
+	case PCI_ERS_RESULT_DISCONNECT: return "disconnect";
+	case PCI_ERS_RESULT_RECOVERED: return "recovered";
+	case PCI_ERS_RESULT_NO_AER_DRIVER: return "no AER driver";
+	default:
+		WARN_ONCE(1, "Unknown result type: %d\n", (int)result);
+		return "unknown";
+	}
+};
+
+static __printf(2, 3)
+void eeh_edev_info(const struct eeh_dev *edev, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n",
+			edev->pe_config_addr,
+			edev->pdev ? dev_name(&edev->pdev->dev) : "none",
+			&vaf);
+
+	va_end(args);
+}
+
 static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
 						enum pci_ers_result new)
 {
@@ -229,123 +263,124 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
 	}
 }
 
-/**
- * eeh_report_error - Report pci error to each device driver
- * @data: eeh device
- * @userdata: return value
- *
- * Report an EEH error to each device driver, collect up and
- * merge the device driver responses. Cumulative response
- * passed back in "userdata".
- */
-static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
+typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+					     struct pci_driver *);
+static void eeh_pe_report_edev(struct eeh_dev *edev,
+					      eeh_report_fn fn,
+					      enum pci_ers_result *result)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
+	enum pci_ers_result new_result;
+
+	device_lock(&edev->pdev->dev);
+	if (eeh_edev_actionable(edev)) {
+		driver = eeh_pcid_get(edev->pdev);
+
+		if (!driver)
+			eeh_edev_info(edev, "no driver");
+		else if (!driver->err_handler)
+			eeh_edev_info(edev,
+				     "driver not EEH aware");
+		else if (edev->mode & EEH_DEV_NO_HANDLER)
+			eeh_edev_info(edev,
+				     "driver bound too late");
+		else {
+			new_result = fn(edev, driver);
+			eeh_edev_info(edev,
+				"%s driver reports: '%s'",
+				driver->name,
+				pci_ers_result_name(new_result));
+			if (result)
+				*result = pci_ers_merge_result(*result,
+							       new_result);
+		}
+		if (driver)
+			eeh_pcid_put(edev->pdev);
+	} else {
+		eeh_edev_info(edev, "not actionable (%d,%d,%d)",
+			     !!edev->pdev,
+			     !eeh_dev_removed(edev),
+			     !eeh_pe_passed(edev->pe));
+	}
+	device_unlock(&edev->pdev->dev);
+}
 
-	if (!eeh_edev_actionable(edev))
-		return NULL;
+static void eeh_pe_report(const char *name, struct eeh_pe *root,
+			  eeh_report_fn fn,
+			  enum pci_ers_result *result)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
 
-	device_lock(&dev->dev);
+	pr_info("EEH: Beginning: '%s'\n", name);
+	eeh_for_each_pe(root, pe)
+		eeh_pe_for_each_dev(pe, edev, tmp)
+			eeh_pe_report_edev(edev, fn, result);
+	if (result)
+		pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
+			name, pci_ers_result_name(*result));
+	else
+		pr_info("EEH: Finished:'%s'", name);
+}
 
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
+/**
+ * eeh_report_error - Report pci error to each device driver
+ * @edev: eeh device
+ * @driver: device's PCI driver
+ *
+ * Report an EEH error to each device driver.
+ */
+static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+			      struct pci_driver *driver)
+{
+	enum pci_ers_result rc;
+	struct pci_dev *dev = edev->pdev;
 
-	if (!driver->err_handler ||
-	    !driver->err_handler->error_detected)
-		goto out;
+	if (!driver->err_handler->error_detected)
+		return PCI_ERS_RESULT_NONE;
 
+	eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)", driver->name);
 	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
-	*res = pci_ers_merge_result(*res, rc);
-
 	edev->in_error = true;
 	pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
-
-out:
-	eeh_pcid_put(dev);
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	return rc;
 }
 
 /**
  * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
  *
  * Tells each device driver that IO ports, MMIO and config space I/O
- * are now enabled. Collects up and merges the device driver responses.
- * Cumulative response passed back in "userdata".
+ * are now enabled.
  */
-static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
+static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+						   struct pci_driver *driver)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver;
-
-	if (!eeh_edev_actionable(edev))
-		return NULL;
-
-	device_lock(&dev->dev);
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->mmio_enabled ||
-	    (edev->mode & EEH_DEV_NO_HANDLER))
-		goto out;
-
-	rc = driver->err_handler->mmio_enabled(dev);
-
-	*res = pci_ers_merge_result(*res, rc);
-
-out:
-	eeh_pcid_put(dev);
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	if (!driver->err_handler->mmio_enabled)
+		return PCI_ERS_RESULT_NONE;
+	eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
+	return driver->err_handler->mmio_enabled(edev->pdev);
 }
 
 /**
  * eeh_report_reset - Tell device that slot has been reset
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
  *
  * This routine must be called while EEH tries to reset particular
  * PCI device so that the associated PCI device driver could take
  * some actions, usually to save data the driver needs so that the
  * driver can work again while the device is recovered.
  */
-static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
+static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+					    struct pci_driver *driver)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver;
-
-	if (!eeh_edev_actionable(edev))
-		return NULL;
-
-	device_lock(&dev->dev);
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->slot_reset ||
-	    (edev->mode & EEH_DEV_NO_HANDLER) ||
-	    (!edev->in_error))
-		goto out;
-
-	rc = driver->err_handler->slot_reset(dev);
-	*res = pci_ers_merge_result(*res, rc);
-
-out:
-	eeh_pcid_put(dev);
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	if (!driver->err_handler->slot_reset || !edev->in_error)
+		return PCI_ERS_RESULT_NONE;
+	eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
+	return driver->err_handler->slot_reset(edev->pdev);
 }
 
 static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
@@ -378,84 +413,52 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
 
 /**
  * eeh_report_resume - Tell device to resume normal operations
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
  *
  * This routine must be called to notify the device driver that it
  * could resume so that the device driver can do some initialization
  * to make the recovered device work again.
  */
-static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
+static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+					     struct pci_driver *driver)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	bool was_in_error;
-	struct pci_driver *driver;
+	if (!driver->err_handler->resume || !edev->in_error)
+		return PCI_ERS_RESULT_NONE;
 
-	if (!eeh_edev_actionable(edev))
-		return NULL;
-
-	device_lock(&dev->dev);
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
-
-	was_in_error = edev->in_error;
-	edev->in_error = false;
+	eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
+	driver->err_handler->resume(edev->pdev);
 
-	if (!driver->err_handler ||
-	    !driver->err_handler->resume ||
-	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
-		goto out;
-	}
-
-	driver->err_handler->resume(dev);
-
-	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
-out:
-	eeh_pcid_put(dev);
+	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
 #ifdef CONFIG_PCI_IOV
 	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
 		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
 #endif
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	return PCI_ERS_RESULT_NONE;
 }
 
 /**
  * eeh_report_failure - Tell device driver that device is dead.
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
  *
  * This informs the device driver that the device is permanently
  * dead, and that no further recovery attempts will be made on it.
  */
-static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
+static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+					      struct pci_driver *driver)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	struct pci_driver *driver;
-
-	if (!eeh_edev_actionable(edev))
-		return NULL;
-
-	device_lock(&dev->dev);
-	dev->error_state = pci_channel_io_perm_failure;
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
+	enum pci_ers_result rc;
 
-	if (!driver->err_handler ||
-	    !driver->err_handler->error_detected)
-		goto out;
+	if (!driver->err_handler->error_detected)
+		return PCI_ERS_RESULT_NONE;
 
-	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+	eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
+		     driver->name);
+	rc = driver->err_handler->error_detected(edev->pdev, pci_channel_io_perm_failure);
 
-	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
-out:
-	eeh_pcid_put(dev);
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
+	return rc;
 }
 
 static void *eeh_add_virt_device(void *data, void *userdata)
@@ -817,7 +820,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	pr_info("EEH: Notify device drivers to shutdown\n");
 	eeh_set_channel_state(pe, pci_channel_io_frozen);
 	eeh_set_irq_state(pe, false);
-	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+	eeh_pe_report("error_detected(IO frozen)", pe,
+		      eeh_report_error, &result);
 	if ((pe->type & EEH_PE_PHB) &&
 	    result != PCI_ERS_RESULT_NONE &&
 	    result != PCI_ERS_RESULT_NEED_RESET)
@@ -864,7 +868,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 			result = PCI_ERS_RESULT_NEED_RESET;
 		} else {
 			pr_info("EEH: Notify device drivers to resume I/O\n");
-			eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
+			eeh_pe_report("mmio_enabled", pe,
+				      eeh_report_mmio_enabled, &result);
 		}
 	}
 
@@ -909,7 +914,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		result = PCI_ERS_RESULT_NONE;
 		eeh_set_channel_state(pe, pci_channel_io_normal);
 		eeh_set_irq_state(pe, true);
-		eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
+		eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
 	}
 
 	/* All devices should claim they have recovered by now. */
@@ -932,11 +937,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	pr_info("EEH: Notify device driver to resume\n");
 	eeh_set_channel_state(pe, pci_channel_io_normal);
 	eeh_set_irq_state(pe, true);
-	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
-
-	eeh_for_each_pe(pe, tmp_pe)
-		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+	eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+	eeh_for_each_pe(pe, tmp_pe) {
+		eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
 			edev->mode &= ~EEH_DEV_NO_HANDLER;
+			edev->in_error = false;
+		}
+	}
 
 	pr_info("EEH: Recovery successful.\n");
 	goto final;
@@ -956,7 +963,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	/* Notify all devices that they're about to go down. */
 	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
 	eeh_set_irq_state(pe, false);
-	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
+	eeh_pe_report("error_detected(permanent failure)", pe,
+		      eeh_report_failure, NULL);
 
 	/* Mark the PE to be removed permanently */
 	eeh_pe_state_mark(pe, EEH_PE_REMOVED);
@@ -1066,8 +1074,8 @@ void eeh_handle_special_event(void)
 				/* Notify all devices to be down */
 				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 				eeh_set_channel_state(pe, pci_channel_io_perm_failure);
-				eeh_pe_dev_traverse(pe,
-					eeh_report_failure, NULL);
+				eeh_pe_report("error_detected(permanent failure)",
+					       pe, eeh_report_failure, NULL);
 				bus = eeh_pe_bus_get(phb_pe);
 				if (!bus) {
 					pr_err("%s: Cannot find PCI bus for "
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* [PATCH] powerpc/modules: remove unused mod_arch_specific.toc field
From: Josh Poimboeuf @ 2018-05-25  3:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

The toc field in the mod_arch_specific struct isn't actually used
anywhere, so remove it.

Also the ftrace-specific fields are now common between 32-bit and
64-bit, so simplify the struct definition a bit by moving them out of
the __powerpc64__ #ifdef.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/powerpc/include/asm/module.h | 13 +++++--------
 arch/powerpc/kernel/module_64.c   |  1 -
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 18f7214d68b7..d8374f984f39 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -50,13 +50,6 @@ struct mod_arch_specific {
 	unsigned int stubs_section;	/* Index of stubs section in module */
 	unsigned int toc_section;	/* What section is the TOC? */
 	bool toc_fixed;			/* Have we fixed up .TOC.? */
-#ifdef CONFIG_DYNAMIC_FTRACE
-	unsigned long toc;
-	unsigned long tramp;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-	unsigned long tramp_regs;
-#endif
-#endif
 
 	/* For module function descriptor dereference */
 	unsigned long start_opd;
@@ -65,10 +58,14 @@ struct mod_arch_specific {
 	/* Indices of PLT sections within module. */
 	unsigned int core_plt_section;
 	unsigned int init_plt_section;
+#endif /* powerpc64 */
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	unsigned long tramp;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	unsigned long tramp_regs;
+#endif
 #endif
-#endif /* powerpc64 */
 
 	/* List of BUG addresses, source line numbers and filenames */
 	struct list_head bug_list;
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index f7667e2ebfcb..1b7419579820 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -823,7 +823,6 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs,
 
 int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
 {
-	mod->arch.toc = my_r2(sechdrs, mod);
 	mod->arch.tramp = create_ftrace_stub(sechdrs, mod,
 					(unsigned long)ftrace_caller);
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-- 
2.17.0

^ permalink raw reply related

* [PATCH v6 0/4] powerpc/64: memcmp() optimization
From: wei.guo.simon @ 2018-05-25  4:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Paul Mackerras, Michael Ellerman, Naveen N.  Rao, Cyril Bur,
	Simon Guo

From: Simon Guo <wei.guo.simon@gmail.com>

There is some room to optimize memcmp() in powerpc 64 bits version for
following 2 cases:
(1) Even src/dst addresses are not aligned with 8 bytes at the beginning,
memcmp() can align them and go with .Llong comparision mode without
fallback to .Lshort comparision mode do compare buffer byte by byte.
(2) VMX instructions can be used to speed up for large size comparision,
currently the threshold is set for 4K bytes. Notes the VMX instructions
will lead to VMX regs save/load penalty. This patch set includes a
patch to add a 32 bytes pre-checking to minimize the penalty.

It did the similar with glibc commit dec4a7105e (powerpc: Improve memcmp 
performance for POWER8). Thanks Cyril Bur's information.
This patch set also updates memcmp selftest case to make it compiled and
incorporate large size comparison case.

v5 -> v6:
- correct some comments/commit messsage.
- rename VMX_OPS_THRES to VMX_THRESH

v4 -> v5:
- Expand 32 bytes prechk to src/dst different offset case, and remove
KSM specific label/comment.

v3 -> v4:
- Add 32 bytes pre-checking before using VMX instructions.

v2 -> v3:
- add optimization for src/dst with different offset against 8 bytes
boundary.
- renamed some label names.
- reworked some comments from Cyril Bur, such as fill the pipeline, 
and use VMX when size == 4K.
- fix a bug of enter/exit_vmx_ops pairness issue. And revised test 
case to test whether enter/exit_vmx_ops are paired.

v1 -> v2:
- update 8bytes unaligned bytes comparison method.
- fix a VMX comparision bug.
- enhanced the original memcmp() selftest.
- add powerpc/64 to subject/commit message.

Simon Guo (4):
  powerpc/64: Align bytes before fall back to .Lshort in powerpc64
    memcmp()
  powerpc/64: enhance memcmp() with VMX instruction for long bytes
    comparision
  powerpc/64: add 32 bytes prechecking before using VMX optimization on
    memcmp()
  powerpc:selftest update memcmp_64 selftest for VMX implementation

 arch/powerpc/include/asm/asm-prototypes.h          |   4 +-
 arch/powerpc/lib/copypage_power7.S                 |   4 +-
 arch/powerpc/lib/memcmp_64.S                       | 408 ++++++++++++++++++++-
 arch/powerpc/lib/memcpy_power7.S                   |   6 +-
 arch/powerpc/lib/vmx-helper.c                      |   4 +-
 .../selftests/powerpc/copyloops/asm/ppc_asm.h      |   4 +-
 .../selftests/powerpc/stringloops/asm/ppc_asm.h    |  22 ++
 .../testing/selftests/powerpc/stringloops/memcmp.c |  98 +++--
 8 files changed, 510 insertions(+), 40 deletions(-)

-- 
1.8.3.1

^ permalink raw reply

* [PATCH v6 1/4] powerpc/64: Align bytes before fall back to .Lshort in powerpc64 memcmp()
From: wei.guo.simon @ 2018-05-25  4:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Paul Mackerras, Michael Ellerman, Naveen N.  Rao, Cyril Bur,
	Simon Guo
In-Reply-To: <1527221256-17029-1-git-send-email-wei.guo.simon@gmail.com>

From: Simon Guo <wei.guo.simon@gmail.com>

Currently memcmp() 64bytes version in powerpc will fall back to .Lshort
(compare per byte mode) if either src or dst address is not 8 bytes aligned.
It can be opmitized in 2 situations:

1) if both addresses are with the same offset with 8 bytes boundary:
memcmp() can compare the unaligned bytes within 8 bytes boundary firstly
and then compare the rest 8-bytes-aligned content with .Llong mode.

2)  If src/dst addrs are not with the same offset of 8 bytes boundary:
memcmp() can align src addr with 8 bytes, increment dst addr accordingly,
 then load src with aligned mode and load dst with unaligned mode.

This patch optmizes memcmp() behavior in the above 2 situations.

Tested with both little/big endian. Performance result below is based on
little endian.

Following is the test result with src/dst having the same offset case:
(a similar result was observed when src/dst having different offset):
(1) 256 bytes
Test with the existing tools/testing/selftests/powerpc/stringloops/memcmp:
- without patch
	29.773018302 seconds time elapsed                                          ( +- 0.09% )
- with patch
	16.485568173 seconds time elapsed                                          ( +-  0.02% )
		-> There is ~+80% percent improvement

(2) 32 bytes
To observe performance impact on < 32 bytes, modify
tools/testing/selftests/powerpc/stringloops/memcmp.c with following:
-------
 #include <string.h>
 #include "utils.h"

-#define SIZE 256
+#define SIZE 32
 #define ITERATIONS 10000

 int test_memcmp(const void *s1, const void *s2, size_t n);
--------

- Without patch
	0.244746482 seconds time elapsed                                          ( +-  0.36%)
- with patch
	0.215069477 seconds time elapsed                                          ( +-  0.51%)
		-> There is ~+13% improvement

(3) 0~8 bytes
To observe <8 bytes performance impact, modify
tools/testing/selftests/powerpc/stringloops/memcmp.c with following:
-------
 #include <string.h>
 #include "utils.h"

-#define SIZE 256
-#define ITERATIONS 10000
+#define SIZE 8
+#define ITERATIONS 1000000

 int test_memcmp(const void *s1, const void *s2, size_t n);
-------
- Without patch
       1.845642503 seconds time elapsed                                          ( +- 0.12% )
- With patch
       1.849767135 seconds time elapsed                                          ( +- 0.26% )
		-> They are nearly the same. (-0.2%)

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
---
 arch/powerpc/lib/memcmp_64.S | 143 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 136 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index d75d18b..f20e883 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -24,28 +24,41 @@
 #define rH	r31
 
 #ifdef __LITTLE_ENDIAN__
+#define LH	lhbrx
+#define LW	lwbrx
 #define LD	ldbrx
 #else
+#define LH	lhzx
+#define LW	lwzx
 #define LD	ldx
 #endif
 
+/*
+ * There are 2 categories for memcmp:
+ * 1) src/dst has the same offset to the 8 bytes boundary. The handlers
+ * are named like .Lsameoffset_xxxx
+ * 2) src/dst has different offset to the 8 bytes boundary. The handlers
+ * are named like .Ldiffoffset_xxxx
+ */
 _GLOBAL(memcmp)
 	cmpdi	cr1,r5,0
 
-	/* Use the short loop if both strings are not 8B aligned */
-	or	r6,r3,r4
+	/* Use the short loop if the src/dst addresses are not
+	 * with the same offset of 8 bytes align boundary.
+	 */
+	xor	r6,r3,r4
 	andi.	r6,r6,7
 
-	/* Use the short loop if length is less than 32B */
-	cmpdi	cr6,r5,31
+	/* Fall back to short loop if compare at aligned addrs
+	 * with less than 8 bytes.
+	 */
+	cmpdi   cr6,r5,7
 
 	beq	cr1,.Lzero
-	bne	.Lshort
-	bgt	cr6,.Llong
+	bgt	cr6,.Lno_short
 
 .Lshort:
 	mtctr	r5
-
 1:	lbz	rA,0(r3)
 	lbz	rB,0(r4)
 	subf.	rC,rB,rA
@@ -78,11 +91,90 @@ _GLOBAL(memcmp)
 	li	r3,0
 	blr
 
+.Lno_short:
+	dcbt	0,r3
+	dcbt	0,r4
+	bne	.Ldiffoffset_8bytes_make_align_start
+
+
+.Lsameoffset_8bytes_make_align_start:
+	/* attempt to compare bytes not aligned with 8 bytes so that
+	 * rest comparison can run based on 8 bytes alignment.
+	 */
+	andi.   r6,r3,7
+
+	/* Try to compare the first double word which is not 8 bytes aligned:
+	 * load the first double word at (src & ~7UL) and shift left appropriate
+	 * bits before comparision.
+	 */
+	clrlwi  r6,r3,29
+	rlwinm  r6,r6,3,0,28
+	beq     .Lsameoffset_8bytes_aligned
+	clrrdi	r3,r3,3
+	clrrdi	r4,r4,3
+	LD	rA,0,r3
+	LD	rB,0,r4
+	sld	rA,rA,r6
+	sld	rB,rB,r6
+	cmpld	cr0,rA,rB
+	srwi	r6,r6,3
+	bne	cr0,.LcmpAB_lightweight
+	subfic  r6,r6,8
+	subfc.	r5,r6,r5
+	addi	r3,r3,8
+	addi	r4,r4,8
+	beq	.Lzero
+
+.Lsameoffset_8bytes_aligned:
+	/* now we are aligned with 8 bytes.
+	 * Use .Llong loop if left cmp bytes are equal or greater than 32B.
+	 */
+	cmpdi   cr6,r5,31
+	bgt	cr6,.Llong
+
+.Lcmp_lt32bytes:
+	/* compare 1 ~ 32 bytes, at least r3 addr is 8 bytes aligned now */
+	cmpdi   cr5,r5,7
+	srdi    r0,r5,3
+	ble	cr5,.Lcmp_rest_lt8bytes
+
+	/* handle 8 ~ 31 bytes */
+	clrldi  r5,r5,61
+	mtctr   r0
+2:
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	addi	r3,r3,8
+	addi	r4,r4,8
+	bne	cr0,.LcmpAB_lightweight
+	bdnz	2b
+
+	cmpwi   r5,0
+	beq	.Lzero
+
+.Lcmp_rest_lt8bytes:
+	/* Here we have only less than 8 bytes to compare with. at least s1
+	 * Address is aligned with 8 bytes.
+	 * The next double words are load and shift right with appropriate
+	 * bits.
+	 */
+	subfic  r6,r5,8
+	rlwinm  r6,r6,3,0,28
+	LD	rA,0,r3
+	LD	rB,0,r4
+	srd	rA,rA,r6
+	srd	rB,rB,r6
+	cmpld	cr0,rA,rB
+	bne	cr0,.LcmpAB_lightweight
+	b	.Lzero
+
 .Lnon_zero:
 	mr	r3,rC
 	blr
 
 .Llong:
+	/* At least s1 addr is aligned with 8 bytes */
 	li	off8,8
 	li	off16,16
 	li	off24,24
@@ -232,4 +324,41 @@ _GLOBAL(memcmp)
 	ld	r28,-32(r1)
 	ld	r27,-40(r1)
 	blr
+
+.LcmpAB_lightweight:   /* skip NV GPRS restore */
+	li	r3,1
+	bgt	cr0,8f
+	li	r3,-1
+8:
+	blr
+
+.Ldiffoffset_8bytes_make_align_start:
+	/* now try to align s1 with 8 bytes */
+	andi.   r6,r3,0x7
+	rlwinm  r6,r6,3,0,28
+	beq     .Ldiffoffset_align_s1_8bytes
+
+	clrrdi	r3,r3,3
+	LD	rA,0,r3
+	LD	rB,0,r4  /* unaligned load */
+	sld	rA,rA,r6
+	srd	rA,rA,r6
+	srd	rB,rB,r6
+	cmpld	cr0,rA,rB
+	srwi	r6,r6,3
+	bne	cr0,.LcmpAB_lightweight
+
+	subfic  r6,r6,8
+	subfc.	r5,r6,r5
+	addi	r3,r3,8
+	add	r4,r4,r6
+
+	beq	.Lzero
+
+.Ldiffoffset_align_s1_8bytes:
+	/* now s1 is aligned with 8 bytes. */
+	cmpdi   cr5,r5,31
+	ble	cr5,.Lcmp_lt32bytes
+	b	.Llong
+
 EXPORT_SYMBOL(memcmp)
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v6 2/4] powerpc/64: enhance memcmp() with VMX instruction for long bytes comparision
From: wei.guo.simon @ 2018-05-25  4:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Paul Mackerras, Michael Ellerman, Naveen N.  Rao, Cyril Bur,
	Simon Guo
In-Reply-To: <1527221256-17029-1-git-send-email-wei.guo.simon@gmail.com>

From: Simon Guo <wei.guo.simon@gmail.com>

This patch add VMX primitives to do memcmp() in case the compare size
is equal or greater than 4K bytes. KSM feature can benefit from this.

Test result with following test program(replace the "^>" with ""):
------
># cat tools/testing/selftests/powerpc/stringloops/memcmp.c
>#include <malloc.h>
>#include <stdlib.h>
>#include <string.h>
>#include <time.h>
>#include "utils.h"
>#define SIZE (1024 * 1024 * 900)
>#define ITERATIONS 40

int test_memcmp(const void *s1, const void *s2, size_t n);

static int testcase(void)
{
        char *s1;
        char *s2;
        unsigned long i;

        s1 = memalign(128, SIZE);
        if (!s1) {
                perror("memalign");
                exit(1);
        }

        s2 = memalign(128, SIZE);
        if (!s2) {
                perror("memalign");
                exit(1);
        }

        for (i = 0; i < SIZE; i++)  {
                s1[i] = i & 0xff;
                s2[i] = i & 0xff;
        }
        for (i = 0; i < ITERATIONS; i++) {
		int ret = test_memcmp(s1, s2, SIZE);

		if (ret) {
			printf("return %d at[%ld]! should have returned zero\n", ret, i);
			abort();
		}
	}

        return 0;
}

int main(void)
{
        return test_harness(testcase, "memcmp");
}
------
Without this patch (but with the first patch "powerpc/64: Align bytes
before fall back to .Lshort in powerpc64 memcmp()." in the series):
	4.726728762 seconds time elapsed                                          ( +-  3.54%)
With VMX patch:
	4.234335473 seconds time elapsed                                          ( +-  2.63%)
		There is ~+10% improvement.

Testing with unaligned and different offset version (make s1 and s2 shift
random offset within 16 bytes) can archieve higher improvement than 10%..

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
---
 arch/powerpc/include/asm/asm-prototypes.h |   4 +-
 arch/powerpc/lib/copypage_power7.S        |   4 +-
 arch/powerpc/lib/memcmp_64.S              | 233 +++++++++++++++++++++++++++++-
 arch/powerpc/lib/memcpy_power7.S          |   6 +-
 arch/powerpc/lib/vmx-helper.c             |   4 +-
 5 files changed, 241 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index d9713ad..31fdcee 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -49,8 +49,8 @@ void __trace_hcall_exit(long opcode, unsigned long retval,
 /* VMX copying */
 int enter_vmx_usercopy(void);
 int exit_vmx_usercopy(void);
-int enter_vmx_copy(void);
-void * exit_vmx_copy(void *dest);
+int enter_vmx_ops(void);
+void *exit_vmx_ops(void *dest);
 
 /* Traps */
 long machine_check_early(struct pt_regs *regs);
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index 8fa73b7..e38f956 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -57,7 +57,7 @@ _GLOBAL(copypage_power7)
 	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
 	std	r0,16(r1)
 	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	enter_vmx_copy
+	bl	enter_vmx_ops
 	cmpwi	r3,0
 	ld	r0,STACKFRAMESIZE+16(r1)
 	ld	r3,STK_REG(R31)(r1)
@@ -100,7 +100,7 @@ _GLOBAL(copypage_power7)
 	addi	r3,r3,128
 	bdnz	1b
 
-	b	exit_vmx_copy		/* tail call optimise */
+	b	exit_vmx_ops		/* tail call optimise */
 
 #else
 	li	r0,(PAGE_SIZE/128)
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index f20e883..4ba7bb6 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -27,12 +27,73 @@
 #define LH	lhbrx
 #define LW	lwbrx
 #define LD	ldbrx
+#define LVS	lvsr
+#define VPERM(_VRT,_VRA,_VRB,_VRC) \
+	vperm _VRT,_VRB,_VRA,_VRC
 #else
 #define LH	lhzx
 #define LW	lwzx
 #define LD	ldx
+#define LVS	lvsl
+#define VPERM(_VRT,_VRA,_VRB,_VRC) \
+	vperm _VRT,_VRA,_VRB,_VRC
 #endif
 
+#define VMX_THRESH 4096
+#define ENTER_VMX_OPS	\
+	mflr    r0;	\
+	std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
+	std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
+	std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
+	std     r0,16(r1); \
+	stdu    r1,-STACKFRAMESIZE(r1); \
+	bl      enter_vmx_ops; \
+	cmpwi   cr1,r3,0; \
+	ld      r0,STACKFRAMESIZE+16(r1); \
+	ld      r3,STK_REG(R31)(r1); \
+	ld      r4,STK_REG(R30)(r1); \
+	ld      r5,STK_REG(R29)(r1); \
+	addi	r1,r1,STACKFRAMESIZE; \
+	mtlr    r0
+
+#define EXIT_VMX_OPS \
+	mflr    r0; \
+	std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
+	std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
+	std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
+	std     r0,16(r1); \
+	stdu    r1,-STACKFRAMESIZE(r1); \
+	bl      exit_vmx_ops; \
+	ld      r0,STACKFRAMESIZE+16(r1); \
+	ld      r3,STK_REG(R31)(r1); \
+	ld      r4,STK_REG(R30)(r1); \
+	ld      r5,STK_REG(R29)(r1); \
+	addi	r1,r1,STACKFRAMESIZE; \
+	mtlr    r0
+
+/*
+ * LD_VSR_CROSS16B load the 2nd 16 bytes for _vaddr which is unaligned with
+ * 16 bytes boundary and permute the result with the 1st 16 bytes.
+
+ *    |  y y y y y y y y y y y y y 0 1 2 | 3 4 5 6 7 8 9 a b c d e f z z z |
+ *    ^                                  ^                                 ^
+ * 0xbbbb10                          0xbbbb20                          0xbbb30
+ *                                 ^
+ *                                _vaddr
+ *
+ *
+ * _vmask is the mask generated by LVS
+ * _v1st_qw is the 1st aligned QW of current addr which is already loaded.
+ *   for example: 0xyyyyyyyyyyyyy012 for big endian
+ * _v2nd_qw is the 2nd aligned QW of cur _vaddr to be loaded.
+ *   for example: 0x3456789abcdefzzz for big endian
+ * The permute result is saved in _v_res.
+ *   for example: 0x0123456789abcdef for big endian.
+ */
+#define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \
+        lvx     _v2nd_qw,_vaddr,off16; \
+        VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask)
+
 /*
  * There are 2 categories for memcmp:
  * 1) src/dst has the same offset to the 8 bytes boundary. The handlers
@@ -133,7 +194,7 @@ _GLOBAL(memcmp)
 	bgt	cr6,.Llong
 
 .Lcmp_lt32bytes:
-	/* compare 1 ~ 32 bytes, at least r3 addr is 8 bytes aligned now */
+	/* compare 1 ~ 31 bytes, at least r3 addr is 8 bytes aligned now */
 	cmpdi   cr5,r5,7
 	srdi    r0,r5,3
 	ble	cr5,.Lcmp_rest_lt8bytes
@@ -174,6 +235,13 @@ _GLOBAL(memcmp)
 	blr
 
 .Llong:
+#ifdef CONFIG_ALTIVEC
+	/* Try to use vmx loop if length is equal or greater than 4K */
+	cmpldi  cr6,r5,VMX_THRESH
+	bge	cr6,.Lsameoffset_vmx_cmp
+
+.Llong_novmx_cmp:
+#endif
 	/* At least s1 addr is aligned with 8 bytes */
 	li	off8,8
 	li	off16,16
@@ -332,7 +400,94 @@ _GLOBAL(memcmp)
 8:
 	blr
 
+#ifdef CONFIG_ALTIVEC
+.Lsameoffset_vmx_cmp:
+	/* Enter with src/dst addrs has the same offset with 8 bytes
+	 * align boundary
+	 */
+	ENTER_VMX_OPS
+	beq     cr1,.Llong_novmx_cmp
+
+3:
+	/* need to check whether r4 has the same offset with r3
+	 * for 16 bytes boundary.
+	 */
+	xor	r0,r3,r4
+	andi.	r0,r0,0xf
+	bne	.Ldiffoffset_vmx_cmp_start
+
+	/* len is no less than 4KB. Need to align with 16 bytes further.
+	 */
+	andi.	rA,r3,8
+	LD	rA,0,r3
+	beq	4f
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	addi	r3,r3,8
+	addi	r4,r4,8
+	addi	r5,r5,-8
+
+	beq	cr0,4f
+	/* save and restore cr0 */
+	mfocrf  r5,64
+	EXIT_VMX_OPS
+	mtocrf	64,r5
+	b	.LcmpAB_lightweight
+
+4:
+	/* compare 32 bytes for each loop */
+	srdi	r0,r5,5
+	mtctr	r0
+	clrldi  r5,r5,59
+	li	off16,16
+
+.balign 16
+5:
+	lvx 	v0,0,r3
+	lvx 	v1,0,r4
+	vcmpequd. v0,v0,v1
+	bf	24,7f
+	lvx 	v0,off16,r3
+	lvx 	v1,off16,r4
+	vcmpequd. v0,v0,v1
+	bf	24,6f
+	addi	r3,r3,32
+	addi	r4,r4,32
+	bdnz	5b
+
+	EXIT_VMX_OPS
+	cmpdi	r5,0
+	beq	.Lzero
+	b	.Lcmp_lt32bytes
+
+6:
+	addi	r3,r3,16
+	addi	r4,r4,16
+
+7:
+	/* diff the last 16 bytes */
+	EXIT_VMX_OPS
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	li	off8,8
+	bne	cr0,.LcmpAB_lightweight
+
+	LD	rA,off8,r3
+	LD	rB,off8,r4
+	cmpld	cr0,rA,rB
+	bne	cr0,.LcmpAB_lightweight
+	b	.Lzero
+#endif
+
 .Ldiffoffset_8bytes_make_align_start:
+#ifdef CONFIG_ALTIVEC
+	/* only do vmx ops when the size equal or greater than 4K bytes */
+	cmpdi	cr5,r5,VMX_THRESH
+	bge	cr5,.Ldiffoffset_vmx_cmp
+.Ldiffoffset_novmx_cmp:
+#endif
+
 	/* now try to align s1 with 8 bytes */
 	andi.   r6,r3,0x7
 	rlwinm  r6,r6,3,0,28
@@ -359,6 +514,82 @@ _GLOBAL(memcmp)
 	/* now s1 is aligned with 8 bytes. */
 	cmpdi   cr5,r5,31
 	ble	cr5,.Lcmp_lt32bytes
+
+#ifdef CONFIG_ALTIVEC
+	b	.Llong_novmx_cmp
+#else
 	b	.Llong
+#endif
+
+#ifdef CONFIG_ALTIVEC
+.Ldiffoffset_vmx_cmp:
+	ENTER_VMX_OPS
+	beq     cr1,.Ldiffoffset_novmx_cmp
+
+.Ldiffoffset_vmx_cmp_start:
+	/* Firstly try to align r3 with 16 bytes */
+	andi.   r6,r3,0xf
+	li	off16,16
+	beq     .Ldiffoffset_vmx_s1_16bytes_align
 
+	LVS	v3,0,r3
+	LVS	v4,0,r4
+
+	lvx     v5,0,r3
+	lvx     v6,0,r4
+	LD_VSR_CROSS16B(r3,v3,v5,v7,v9)
+	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+
+	vcmpequb.  v7,v9,v10
+	bnl	cr6,.Ldiffoffset_vmx_diff_found
+
+	subfic  r6,r6,16
+	subf    r5,r6,r5
+	add     r3,r3,r6
+	add     r4,r4,r6
+
+.Ldiffoffset_vmx_s1_16bytes_align:
+	/* now s1 is aligned with 16 bytes */
+	lvx     v6,0,r4
+	LVS	v4,0,r4
+	srdi	r6,r5,5  /* loop for 32 bytes each */
+	clrldi  r5,r5,59
+	mtctr	r6
+
+.balign	16
+.Ldiffoffset_vmx_32bytesloop:
+	/* the first qw of r4 was saved in v6 */
+	lvx	v9,0,r3
+	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+	vcmpequb.	v7,v9,v10
+	vor	v6,v8,v8
+	bnl	cr6,.Ldiffoffset_vmx_diff_found
+
+	addi	r3,r3,16
+	addi	r4,r4,16
+
+	lvx	v9,0,r3
+	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+	vcmpequb.	v7,v9,v10
+	vor	v6,v8,v8
+	bnl	cr6,.Ldiffoffset_vmx_diff_found
+
+	addi	r3,r3,16
+	addi	r4,r4,16
+
+	bdnz	.Ldiffoffset_vmx_32bytesloop
+
+	EXIT_VMX_OPS
+
+	cmpdi	r5,0
+	beq	.Lzero
+	b	.Lcmp_lt32bytes
+
+.Ldiffoffset_vmx_diff_found:
+	EXIT_VMX_OPS
+	/* anyway, the diff will appear in next 16 bytes */
+	li	r5,16
+	b	.Lcmp_lt32bytes
+
+#endif
 EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index df7de9d..070cdf6 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -230,7 +230,7 @@ _GLOBAL(memcpy_power7)
 	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
 	std	r0,16(r1)
 	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	enter_vmx_copy
+	bl	enter_vmx_ops
 	cmpwi	cr1,r3,0
 	ld	r0,STACKFRAMESIZE+16(r1)
 	ld	r3,STK_REG(R31)(r1)
@@ -445,7 +445,7 @@ _GLOBAL(memcpy_power7)
 
 15:	addi	r1,r1,STACKFRAMESIZE
 	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	b	exit_vmx_copy		/* tail call optimise */
+	b	exit_vmx_ops		/* tail call optimise */
 
 .Lvmx_unaligned_copy:
 	/* Get the destination 16B aligned */
@@ -649,5 +649,5 @@ _GLOBAL(memcpy_power7)
 
 15:	addi	r1,r1,STACKFRAMESIZE
 	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	b	exit_vmx_copy		/* tail call optimise */
+	b	exit_vmx_ops		/* tail call optimise */
 #endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index bf925cd..9f34049 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -53,7 +53,7 @@ int exit_vmx_usercopy(void)
 	return 0;
 }
 
-int enter_vmx_copy(void)
+int enter_vmx_ops(void)
 {
 	if (in_interrupt())
 		return 0;
@@ -70,7 +70,7 @@ int enter_vmx_copy(void)
  * passed a pointer to the destination which we return as required by a
  * memcpy implementation.
  */
-void *exit_vmx_copy(void *dest)
+void *exit_vmx_ops(void *dest)
 {
 	disable_kernel_altivec();
 	preempt_enable();
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v6 3/4] powerpc/64: add 32 bytes prechecking before using VMX optimization on memcmp()
From: wei.guo.simon @ 2018-05-25  4:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Paul Mackerras, Michael Ellerman, Naveen N.  Rao, Cyril Bur,
	Simon Guo
In-Reply-To: <1527221256-17029-1-git-send-email-wei.guo.simon@gmail.com>

From: Simon Guo <wei.guo.simon@gmail.com>

This patch is based on the previous VMX patch on memcmp().

To optimize ppc64 memcmp() with VMX instruction, we need to think about
the VMX penalty brought with: If kernel uses VMX instruction, it needs
to save/restore current thread's VMX registers. There are 32 x 128 bits
VMX registers in PPC, which means 32 x 16 = 512 bytes for load and store.

The major concern regarding the memcmp() performance in kernel is KSM,
who will use memcmp() frequently to merge identical pages. So it will
make sense to take some measures/enhancement on KSM to see whether any
improvement can be done here.  Cyril Bur indicates that the memcmp() for
KSM has a higher possibility to fail (unmatch) early in previous bytes
in following mail.
	https://patchwork.ozlabs.org/patch/817322/#1773629
And I am taking a follow-up on this with this patch.

Per some testing, it shows KSM memcmp() will fail early at previous 32
bytes.  More specifically:
    - 76% cases will fail/unmatch before 16 bytes;
    - 83% cases will fail/unmatch before 32 bytes;
    - 84% cases will fail/unmatch before 64 bytes;
So 32 bytes looks a better choice than other bytes for pre-checking.

The early failure is also true for memcmp() for non-KSM case. With a
non-typical call load, it shows ~73% cases fail before first 32 bytes.

This patch adds a 32 bytes pre-checking firstly before jumping into VMX
operations, to avoid the unnecessary VMX penalty. It is not limited to
KSM case. And the testing shows ~20% improvement on memcmp() average
execution time with this patch.

And note the 32B pre-checking is only performed when the compare size
is long enough (>=4K currently) to allow VMX operation.

The detail data and analysis is at:
https://github.com/justdoitqd/publicFiles/blob/master/memcmp/README.md

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
---
 arch/powerpc/lib/memcmp_64.S | 50 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index 4ba7bb6..96eb08b 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -403,8 +403,27 @@ _GLOBAL(memcmp)
 #ifdef CONFIG_ALTIVEC
 .Lsameoffset_vmx_cmp:
 	/* Enter with src/dst addrs has the same offset with 8 bytes
-	 * align boundary
+	 * align boundary.
+	 *
+	 * There is an optimization based on following fact: memcmp()
+	 * prones to fail early at the first 32 bytes.
+	 * Before applying VMX instructions which will lead to 32x128bits
+	 * VMX regs load/restore penalty, we compare the first 32 bytes
+	 * so that we can catch the ~80% fail cases.
 	 */
+
+	li	r0,4
+	mtctr	r0
+.Lsameoffset_prechk_32B_loop:
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	addi	r3,r3,8
+	addi	r4,r4,8
+	bne     cr0,.LcmpAB_lightweight
+	addi	r5,r5,-8
+	bdnz	.Lsameoffset_prechk_32B_loop
+
 	ENTER_VMX_OPS
 	beq     cr1,.Llong_novmx_cmp
 
@@ -481,13 +500,6 @@ _GLOBAL(memcmp)
 #endif
 
 .Ldiffoffset_8bytes_make_align_start:
-#ifdef CONFIG_ALTIVEC
-	/* only do vmx ops when the size equal or greater than 4K bytes */
-	cmpdi	cr5,r5,VMX_THRESH
-	bge	cr5,.Ldiffoffset_vmx_cmp
-.Ldiffoffset_novmx_cmp:
-#endif
-
 	/* now try to align s1 with 8 bytes */
 	andi.   r6,r3,0x7
 	rlwinm  r6,r6,3,0,28
@@ -512,6 +524,13 @@ _GLOBAL(memcmp)
 
 .Ldiffoffset_align_s1_8bytes:
 	/* now s1 is aligned with 8 bytes. */
+#ifdef CONFIG_ALTIVEC
+	/* only do vmx ops when the size is equal or greater than 4K bytes */
+	cmpdi	cr5,r5,VMX_THRESH
+	bge	cr5,.Ldiffoffset_vmx_cmp
+.Ldiffoffset_novmx_cmp:
+#endif
+
 	cmpdi   cr5,r5,31
 	ble	cr5,.Lcmp_lt32bytes
 
@@ -523,6 +542,21 @@ _GLOBAL(memcmp)
 
 #ifdef CONFIG_ALTIVEC
 .Ldiffoffset_vmx_cmp:
+	/* perform a 32 bytes pre-checking before
+	 * enable VMX operations.
+	 */
+	li	r0,4
+	mtctr	r0
+.Ldiffoffset_prechk_32B_loop:
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	addi	r3,r3,8
+	addi	r4,r4,8
+	bne     cr0,.LcmpAB_lightweight
+	addi	r5,r5,-8
+	bdnz	.Ldiffoffset_prechk_32B_loop
+
 	ENTER_VMX_OPS
 	beq     cr1,.Ldiffoffset_novmx_cmp
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v6 4/4] powerpc:selftest update memcmp_64 selftest for VMX implementation
From: wei.guo.simon @ 2018-05-25  4:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Paul Mackerras, Michael Ellerman, Naveen N.  Rao, Cyril Bur,
	Simon Guo
In-Reply-To: <1527221256-17029-1-git-send-email-wei.guo.simon@gmail.com>

From: Simon Guo <wei.guo.simon@gmail.com>

This patch reworked selftest memcmp_64 so that memcmp selftest can
cover more test cases.

It adds testcases for:
- memcmp over 4K bytes size.
- s1/s2 with different/random offset on 16 bytes boundary.
- enter/exit_vmx_ops pairness.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
---
 .../selftests/powerpc/copyloops/asm/ppc_asm.h      |  4 +-
 .../selftests/powerpc/stringloops/asm/ppc_asm.h    | 22 +++++
 .../testing/selftests/powerpc/stringloops/memcmp.c | 98 +++++++++++++++++-----
 3 files changed, 100 insertions(+), 24 deletions(-)

diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
index 5ffe04d..dfce161 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -36,11 +36,11 @@
 	li	r3,0
 	blr
 
-FUNC_START(enter_vmx_copy)
+FUNC_START(enter_vmx_ops)
 	li	r3,1
 	blr
 
-FUNC_START(exit_vmx_copy)
+FUNC_START(exit_vmx_ops)
 	blr
 
 FUNC_START(memcpy_power7)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
index 136242e..185d257 100644
--- a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
@@ -1,4 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_ASM_H
+#define __PPC_ASM_H
 #include <ppc-asm.h>
 
 #ifndef r1
@@ -6,3 +8,23 @@
 #endif
 
 #define _GLOBAL(A) FUNC_START(test_ ## A)
+
+#define CONFIG_ALTIVEC
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE	256
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+#endif
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index 8250db2..b5cf717 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,20 +2,40 @@
 #include <malloc.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 #include "utils.h"
 
 #define SIZE 256
 #define ITERATIONS 10000
 
+#define LARGE_SIZE (5 * 1024)
+#define LARGE_ITERATIONS 1000
+#define LARGE_MAX_OFFSET 32
+#define LARGE_SIZE_START 4096
+
+#define MAX_OFFSET_DIFF_S1_S2 48
+
+int vmx_count;
+int enter_vmx_ops(void)
+{
+	vmx_count++;
+	return 1;
+}
+
+void exit_vmx_ops(void)
+{
+	vmx_count--;
+}
 int test_memcmp(const void *s1, const void *s2, size_t n);
 
 /* test all offsets and lengths */
-static void test_one(char *s1, char *s2)
+static void test_one(char *s1, char *s2, unsigned long max_offset,
+		unsigned long size_start, unsigned long max_size)
 {
 	unsigned long offset, size;
 
-	for (offset = 0; offset < SIZE; offset++) {
-		for (size = 0; size < (SIZE-offset); size++) {
+	for (offset = 0; offset < max_offset; offset++) {
+		for (size = size_start; size < (max_size - offset); size++) {
 			int x, y;
 			unsigned long i;
 
@@ -35,70 +55,104 @@ static void test_one(char *s1, char *s2)
 				printf("\n");
 				abort();
 			}
+
+			if (vmx_count != 0) {
+				printf("vmx enter/exit not paired.(offset:%ld size:%ld s1:%p s2:%p vc:%d\n",
+					offset, size, s1, s2, vmx_count);
+				printf("\n");
+				abort();
+			}
 		}
 	}
 }
 
-static int testcase(void)
+static int testcase(bool islarge)
 {
 	char *s1;
 	char *s2;
 	unsigned long i;
 
-	s1 = memalign(128, SIZE);
+	unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
+	unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+	int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+	s1 = memalign(128, alloc_size);
 	if (!s1) {
 		perror("memalign");
 		exit(1);
 	}
 
-	s2 = memalign(128, SIZE);
+	s2 = memalign(128, alloc_size);
 	if (!s2) {
 		perror("memalign");
 		exit(1);
 	}
 
-	srandom(1);
+	srandom(time(0));
 
-	for (i = 0; i < ITERATIONS; i++) {
+	for (i = 0; i < iterations; i++) {
 		unsigned long j;
 		unsigned long change;
+		char *rand_s1 = s1;
+		char *rand_s2 = s2;
 
-		for (j = 0; j < SIZE; j++)
+		for (j = 0; j < alloc_size; j++)
 			s1[j] = random();
 
-		memcpy(s2, s1, SIZE);
+		rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+		rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+		memcpy(rand_s2, rand_s1, comp_size);
 
 		/* change one byte */
-		change = random() % SIZE;
-		s2[change] = random() & 0xff;
-
-		test_one(s1, s2);
+		change = random() % comp_size;
+		rand_s2[change] = random() & 0xff;
+
+		if (islarge)
+			test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+					LARGE_SIZE_START, comp_size);
+		else
+			test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
 	}
 
-	srandom(1);
+	srandom(time(0));
 
-	for (i = 0; i < ITERATIONS; i++) {
+	for (i = 0; i < iterations; i++) {
 		unsigned long j;
 		unsigned long change;
+		char *rand_s1 = s1;
+		char *rand_s2 = s2;
 
-		for (j = 0; j < SIZE; j++)
+		for (j = 0; j < alloc_size; j++)
 			s1[j] = random();
 
-		memcpy(s2, s1, SIZE);
+		rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+		rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+		memcpy(rand_s2, rand_s1, comp_size);
 
 		/* change multiple bytes, 1/8 of total */
-		for (j = 0; j < SIZE / 8; j++) {
-			change = random() % SIZE;
+		for (j = 0; j < comp_size / 8; j++) {
+			change = random() % comp_size;
 			s2[change] = random() & 0xff;
 		}
 
-		test_one(s1, s2);
+		if (islarge)
+			test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+					LARGE_SIZE_START, comp_size);
+		else
+			test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
 	}
 
 	return 0;
 }
 
+static int testcases(void)
+{
+	testcase(0);
+	testcase(1);
+	return 0;
+}
+
 int main(void)
 {
-	return test_harness(testcase, "memcmp");
+	return test_harness(testcases, "memcmp");
 }
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH v4 3/3] powerpc/lib: optimise PPC32 memcmp
From: Christophe LEROY @ 2018-05-25  5:55 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linuxppc-dev, linux-kernel
In-Reply-To: <20180524172458.GA17342@gate.crashing.org>



Le 24/05/2018 à 19:24, Segher Boessenkool a écrit :
> On Wed, May 23, 2018 at 09:47:32AM +0200, Christophe Leroy wrote:
>> At the time being, memcmp() compares two chunks of memory
>> byte per byte.
>>
>> This patch optimises the comparison by comparing word by word.
>>
>> A small benchmark performed on an 8xx comparing two chuncks
>> of 512 bytes performed 100000 times gives:
>>
>> Before : 5852274 TB ticks
>> After:   1488638 TB ticks
> 
>> diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
>> index 40a576d56ac7..542e6cecbcaf 100644
>> --- a/arch/powerpc/lib/string_32.S
>> +++ b/arch/powerpc/lib/string_32.S
>> @@ -16,17 +16,45 @@
>>   	.text
>>
>>   _GLOBAL(memcmp)
>> -	cmpwi	cr0, r5, 0
>> -	beq-	2f
>> -	mtctr	r5
>> -	addi	r6,r3,-1
>> -	addi	r4,r4,-1
>> -1:	lbzu	r3,1(r6)
>> -	lbzu	r0,1(r4)
>> -	subf.	r3,r0,r3
>> -	bdnzt	2,1b
>> +	srawi.	r7, r5, 2		/* Divide len by 4 */
>> +	mr	r6, r3
>> +	beq-	3f
>> +	mtctr	r7
>> +	li	r7, 0
>> +1:
>> +#ifdef __LITTLE_ENDIAN__
>> +	lwbrx	r3, r6, r7
>> +	lwbrx	r0, r4, r7
>> +#else
>> +	lwzx	r3, r6, r7
>> +	lwzx	r0, r4, r7
>> +#endif
> 
> You don't test whether the pointers are word-aligned.  Does that work?

copy_tofrom_user() word-aligns the store address and doesn't take care 
of the load address, so I believe it works.

Now, I just read in the MPC885 Ref Manual that unaligned access 
generates alignment exception when the processor is running in LE mode.

Ref. made to the discussion on patch "powerpc/32be: use stmw/lmw for 
registers save/restore in asm" 
(https://patchwork.ozlabs.org/patch/899465/), I will drop the handling 
for LE mode.

Christophe

> Say, when a load is crossing a page boundary, or segment boundary.
> 
> 
> Segher
> 

^ permalink raw reply

* Re: [PATCH 2/2] selftests/powerpc: Add perf breakpoint test
From: Michael Neuling @ 2018-05-25  6:00 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev
In-Reply-To: <87tvqx4a7q.fsf@concordia.ellerman.id.au>

On Thu, 2018-05-24 at 20:30 +1000, Michael Ellerman wrote:
> Michael Neuling <mikey@neuling.org> writes:
>=20
> > This tests perf hardware breakpoints (ie PERF_TYPE_BREAKPOINT) on
> > powerpc.
>=20
> This doesn't work for me on a P8 guest:
>=20
>   test: perf-hwbreak
>   tags: git_version:bb5602e
>   !! killing perf-hwbreak
>   !! child died by signal 15
>   failure: perf-hwbreak
>=20
>=20
> That means the harness killed it after 2 minutes.
>=20
> Sometimes it gets further:
>=20
>   test: perf-hwbreak
>   tags: git_version:v4.17-rc3-109-gbb20240fb508
>   threads=3D16 loops=3D1048576 scalar test
>   !! killing perf-hwbreak
>   !! child died by signal 15
>   failure: perf-hwbreak
>=20
>=20
> Do we just need to bump the timeout up, or is something going wrong
> here?

Yeah, sorry my bad.  The test was trying to randomly test all the combinati=
ons.
=20
I've rewritten it now to explicitly test all the combinations. This gives i=
t a
more consistent runtime. I do randomly vary the loop count but this is only=
 a
small variation (~10%) to avoid using a count from a previous test.

Mikey

^ permalink raw reply

* Re: [PATCH] powerpc/modules: remove unused mod_arch_specific.toc field
From: Kamalesh Babulal @ 2018-05-25  6:53 UTC (permalink / raw)
  To: Josh Poimboeuf, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <2892e4f57d03caf1b3d6c14bc456b2f9a116b32a.1527220066.git.jpoimboe@redhat.com>

On Friday 25 May 2018 09:18 AM, Josh Poimboeuf wrote:
> The toc field in the mod_arch_specific struct isn't actually used
> anywhere, so remove it.
> 
> Also the ftrace-specific fields are now common between 32-bit and
> 64-bit, so simplify the struct definition a bit by moving them out of
> the __powerpc64__ #ifdef.
> 
> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>

Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>

^ permalink raw reply

* [PATCH stable 4.14 00/23] powerpc backports for 4.14
From: Michael Ellerman @ 2018-05-25 10:09 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev

Hi Greg,

Please queue up this series of patches for 4.14 if you have no objections.

cheers



Mauricio Faria de Oliveira (4):
  powerpc/rfi-flush: Differentiate enabled and patched flush types
  powerpc/pseries: Fix clearing of security feature flags
  powerpc: Move default security feature flags
  powerpc/pseries: Restore default security feature flags on setup

Michael Ellerman (17):
  powerpc/pseries: Support firmware disable of RFI flush
  powerpc/powernv: Support firmware disable of RFI flush
  powerpc/rfi-flush: Move the logic to avoid a redo into the debugfs
    code
  powerpc/rfi-flush: Make it possible to call setup_rfi_flush() again
  powerpc/rfi-flush: Always enable fallback flush on pseries
  powerpc/rfi-flush: Call setup_rfi_flush() after LPM migration
  powerpc/pseries: Add new H_GET_CPU_CHARACTERISTICS flags
  powerpc: Add security feature flags for Spectre/Meltdown
  powerpc/pseries: Set or clear security feature flags
  powerpc/powernv: Set or clear security feature flags
  powerpc/64s: Move cpu_show_meltdown()
  powerpc/64s: Enhance the information in cpu_show_meltdown()
  powerpc/powernv: Use the security flags in pnv_setup_rfi_flush()
  powerpc/pseries: Use the security flags in pseries_setup_rfi_flush()
  powerpc/64s: Wire up cpu_show_spectre_v1()
  powerpc/64s: Wire up cpu_show_spectre_v2()
  powerpc/64s: Fix section mismatch warnings from setup_rfi_flush()

Nicholas Piggin (2):
  powerpc/64s: Improve RFI L1-D cache flush fallback
  powerpc/64s: Add support for a store forwarding barrier at kernel
    entry/exit

 arch/powerpc/include/asm/exception-64s.h     |  29 ++++
 arch/powerpc/include/asm/feature-fixups.h    |  19 +++
 arch/powerpc/include/asm/hvcall.h            |   3 +
 arch/powerpc/include/asm/paca.h              |   3 +-
 arch/powerpc/include/asm/security_features.h |  85 ++++++++++
 arch/powerpc/include/asm/setup.h             |   2 +-
 arch/powerpc/kernel/Makefile                 |   2 +-
 arch/powerpc/kernel/asm-offsets.c            |   3 +-
 arch/powerpc/kernel/exceptions-64s.S         |  95 ++++++-----
 arch/powerpc/kernel/security.c               | 237 +++++++++++++++++++++++++++
 arch/powerpc/kernel/setup_64.c               |  48 ++----
 arch/powerpc/kernel/vmlinux.lds.S            |  14 ++
 arch/powerpc/lib/feature-fixups.c            | 124 +++++++++++++-
 arch/powerpc/platforms/powernv/setup.c       |  92 ++++++++---
 arch/powerpc/platforms/pseries/mobility.c    |   3 +
 arch/powerpc/platforms/pseries/pseries.h     |   2 +
 arch/powerpc/platforms/pseries/setup.c       |  81 +++++++--
 arch/powerpc/xmon/xmon.c                     |   2 +
 18 files changed, 721 insertions(+), 123 deletions(-)
 create mode 100644 arch/powerpc/include/asm/security_features.h
 create mode 100644 arch/powerpc/kernel/security.c

-- 
2.14.1

^ permalink raw reply

* [PATCH stable 4.14 01/23] powerpc/64s: Improve RFI L1-D cache flush fallback
From: Michael Ellerman @ 2018-05-25 10:09 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180525100954.31599-1-mpe@ellerman.id.au>

From: Nicholas Piggin <npiggin@gmail.com>

The fallback RFI flush is used when firmware does not provide a way
to flush the cache. It's a "displacement flush" that evicts useful
data by displacing it with an uninteresting buffer.

The flush has to take care to work with implementation specific cache
replacment policies, so the recipe has been in flux. The initial
slow but conservative approach is to touch all lines of a congruence
class, with dependencies between each load. It has since been
determined that a linear pattern of loads without dependencies is
sufficient, and is significantly faster.

Measuring the speed of a null syscall with RFI fallback flush enabled
gives the relative improvement:

P8 - 1.83x
P9 - 1.75x

The flush also becomes simpler and more adaptable to different cache
geometries.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit bdcb1aefc5b3f7d0f1dc8b02673602bca2ff7a4b)
---
 arch/powerpc/include/asm/paca.h      |  3 +-
 arch/powerpc/kernel/asm-offsets.c    |  3 +-
 arch/powerpc/kernel/exceptions-64s.S | 76 +++++++++++++++++-------------------
 arch/powerpc/kernel/setup_64.c       | 13 +-----
 arch/powerpc/xmon/xmon.c             |  2 +
 5 files changed, 41 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b8366df50d19..e6bd59353e40 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -238,8 +238,7 @@ struct paca_struct {
 	 */
 	u64 exrfi[EX_SIZE] __aligned(0x80);
 	void *rfi_flush_fallback_area;
-	u64 l1d_flush_congruence;
-	u64 l1d_flush_sets;
+	u64 l1d_flush_size;
 #endif
 };
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 748cdc4bb89a..2e5ea300258a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -239,8 +239,7 @@ int main(void)
 	OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
 	OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
 	OFFSET(PACA_EXRFI, paca_struct, exrfi);
-	OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
-	OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+	OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size);
 
 #endif
 	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f9ca4bb3d48e..feba0a8d040e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1440,39 +1440,37 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
 	std	r9,PACA_EXRFI+EX_R9(r13)
 	std	r10,PACA_EXRFI+EX_R10(r13)
 	std	r11,PACA_EXRFI+EX_R11(r13)
-	std	r12,PACA_EXRFI+EX_R12(r13)
-	std	r8,PACA_EXRFI+EX_R13(r13)
 	mfctr	r9
 	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-	ld	r11,PACA_L1D_FLUSH_SETS(r13)
-	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
-	/*
-	 * The load adresses are at staggered offsets within cachelines,
-	 * which suits some pipelines better (on others it should not
-	 * hurt).
-	 */
-	addi	r12,r12,8
+	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
+	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
 	mtctr	r11
 	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
 	/* order ld/st prior to dcbt stop all streams with flushing */
 	sync
-1:	li	r8,0
-	.rept	8 /* 8-way set associative */
-	ldx	r11,r10,r8
-	add	r8,r8,r12
-	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
-	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
-	.endr
-	addi	r10,r10,128 /* 128 byte cache line */
+
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+1:
+	ld	r11,(0x80 + 8)*0(r10)
+	ld	r11,(0x80 + 8)*1(r10)
+	ld	r11,(0x80 + 8)*2(r10)
+	ld	r11,(0x80 + 8)*3(r10)
+	ld	r11,(0x80 + 8)*4(r10)
+	ld	r11,(0x80 + 8)*5(r10)
+	ld	r11,(0x80 + 8)*6(r10)
+	ld	r11,(0x80 + 8)*7(r10)
+	addi	r10,r10,0x80*8
 	bdnz	1b
 
 	mtctr	r9
 	ld	r9,PACA_EXRFI+EX_R9(r13)
 	ld	r10,PACA_EXRFI+EX_R10(r13)
 	ld	r11,PACA_EXRFI+EX_R11(r13)
-	ld	r12,PACA_EXRFI+EX_R12(r13)
-	ld	r8,PACA_EXRFI+EX_R13(r13)
 	GET_SCRATCH0(r13);
 	rfid
 
@@ -1482,39 +1480,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
 	std	r9,PACA_EXRFI+EX_R9(r13)
 	std	r10,PACA_EXRFI+EX_R10(r13)
 	std	r11,PACA_EXRFI+EX_R11(r13)
-	std	r12,PACA_EXRFI+EX_R12(r13)
-	std	r8,PACA_EXRFI+EX_R13(r13)
 	mfctr	r9
 	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-	ld	r11,PACA_L1D_FLUSH_SETS(r13)
-	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
-	/*
-	 * The load adresses are at staggered offsets within cachelines,
-	 * which suits some pipelines better (on others it should not
-	 * hurt).
-	 */
-	addi	r12,r12,8
+	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
+	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
 	mtctr	r11
 	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
 	/* order ld/st prior to dcbt stop all streams with flushing */
 	sync
-1:	li	r8,0
-	.rept	8 /* 8-way set associative */
-	ldx	r11,r10,r8
-	add	r8,r8,r12
-	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
-	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
-	.endr
-	addi	r10,r10,128 /* 128 byte cache line */
+
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+1:
+	ld	r11,(0x80 + 8)*0(r10)
+	ld	r11,(0x80 + 8)*1(r10)
+	ld	r11,(0x80 + 8)*2(r10)
+	ld	r11,(0x80 + 8)*3(r10)
+	ld	r11,(0x80 + 8)*4(r10)
+	ld	r11,(0x80 + 8)*5(r10)
+	ld	r11,(0x80 + 8)*6(r10)
+	ld	r11,(0x80 + 8)*7(r10)
+	addi	r10,r10,0x80*8
 	bdnz	1b
 
 	mtctr	r9
 	ld	r9,PACA_EXRFI+EX_R9(r13)
 	ld	r10,PACA_EXRFI+EX_R10(r13)
 	ld	r11,PACA_EXRFI+EX_R11(r13)
-	ld	r12,PACA_EXRFI+EX_R12(r13)
-	ld	r8,PACA_EXRFI+EX_R13(r13)
 	GET_SCRATCH0(r13);
 	hrfid
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 9527a4c6cbc2..333c64a794eb 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -851,19 +851,8 @@ static void init_fallback_flush(void)
 	memset(l1d_flush_fallback_area, 0, l1d_size * 2);
 
 	for_each_possible_cpu(cpu) {
-		/*
-		 * The fallback flush is currently coded for 8-way
-		 * associativity. Different associativity is possible, but it
-		 * will be treated as 8-way and may not evict the lines as
-		 * effectively.
-		 *
-		 * 128 byte lines are mandatory.
-		 */
-		u64 c = l1d_size / 8;
-
 		paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
-		paca[cpu].l1d_flush_congruence = c;
-		paca[cpu].l1d_flush_sets = c / 128;
+		paca[cpu].l1d_flush_size = l1d_size;
 	}
 }
 
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 2c8b325591cc..a5938fadd031 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2348,6 +2348,8 @@ static void dump_one_paca(int cpu)
 	DUMP(p, slb_cache_ptr, "x");
 	for (i = 0; i < SLB_CACHE_ENTRIES; i++)
 		printf(" slb_cache[%d]:        = 0x%016lx\n", i, p->slb_cache[i]);
+
+	DUMP(p, rfi_flush_fallback_area, "px");
 #endif
 	DUMP(p, dscr_default, "llx");
 #ifdef CONFIG_PPC_BOOK3E
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.14 02/23] powerpc/pseries: Support firmware disable of RFI flush
From: Michael Ellerman @ 2018-05-25 10:09 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180525100954.31599-1-mpe@ellerman.id.au>

Some versions of firmware will have a setting that can be configured
to disable the RFI flush, add support for it.

Fixes: 8989d56878a7 ("powerpc/pseries: Query hypervisor for RFI flush settings")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit 582605a429e20ae68fd0b041b2e840af296edd08)
---
 arch/powerpc/platforms/pseries/setup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ae4f596273b5..8bbbb4e753b5 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -482,7 +482,8 @@ static void pseries_setup_rfi_flush(void)
 		if (types == L1D_FLUSH_NONE)
 			types = L1D_FLUSH_FALLBACK;
 
-		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
+		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
 			enable = false;
 	} else {
 		/* Default to fallback if case hcall is not available */
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.14 03/23] powerpc/powernv: Support firmware disable of RFI flush
From: Michael Ellerman @ 2018-05-25 10:09 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180525100954.31599-1-mpe@ellerman.id.au>

Some versions of firmware will have a setting that can be configured
to disable the RFI flush, add support for it.

Fixes: 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit eb0a2d2620ae431c543963c8c7f08f597366fc60)
---
 arch/powerpc/platforms/powernv/setup.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 7966a314d93a..37a7f5ef00b7 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -79,6 +79,10 @@ static void pnv_setup_rfi_flush(void)
 		if (np && of_property_read_bool(np, "disabled"))
 			enable--;
 
+		np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
+		if (np && of_property_read_bool(np, "disabled"))
+			enable = 0;
+
 		of_node_put(np);
 		of_node_put(fw_features);
 	}
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.14 04/23] powerpc/rfi-flush: Move the logic to avoid a redo into the debugfs code
From: Michael Ellerman @ 2018-05-25 10:09 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180525100954.31599-1-mpe@ellerman.id.au>

rfi_flush_enable() includes a check to see if we're already
enabled (or disabled), and in that case does nothing.

But that means calling setup_rfi_flush() a 2nd time doesn't actually
work, which is a bit confusing.

Move that check into the debugfs code, where it really belongs.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit 1e2a9fc7496955faacbbed49461d611b704a7505)
---
 arch/powerpc/kernel/setup_64.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 333c64a794eb..cbb3fb1820ce 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -822,9 +822,6 @@ static void do_nothing(void *unused)
 
 void rfi_flush_enable(bool enable)
 {
-	if (rfi_flush == enable)
-		return;
-
 	if (enable) {
 		do_rfi_flush_fixups(enabled_flush_types);
 		on_each_cpu(do_nothing, NULL, 1);
@@ -878,13 +875,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
 #ifdef CONFIG_DEBUG_FS
 static int rfi_flush_set(void *data, u64 val)
 {
+	bool enable;
+
 	if (val == 1)
-		rfi_flush_enable(true);
+		enable = true;
 	else if (val == 0)
-		rfi_flush_enable(false);
+		enable = false;
 	else
 		return -EINVAL;
 
+	/* Only do anything if we're changing state */
+	if (enable != rfi_flush)
+		rfi_flush_enable(enable);
+
 	return 0;
 }
 
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.14 05/23] powerpc/rfi-flush: Make it possible to call setup_rfi_flush() again
From: Michael Ellerman @ 2018-05-25 10:09 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180525100954.31599-1-mpe@ellerman.id.au>

For PowerVM migration we want to be able to call setup_rfi_flush()
again after we've migrated the partition.

To support that we need to check that we're not trying to allocate the
fallback flush area after memblock has gone away (i.e., boot-time only).

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit abf110f3e1cea40f5ea15e85f5d67c39c14568a7)
---
 arch/powerpc/include/asm/setup.h | 2 +-
 arch/powerpc/kernel/setup_64.c   | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 469b7fdc9be4..bbcdf929be54 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -49,7 +49,7 @@ enum l1d_flush_type {
 	L1D_FLUSH_MTTRIG	= 0x8,
 };
 
-void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_rfi_flush(enum l1d_flush_type, bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index cbb3fb1820ce..ace6a10a242f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -836,6 +836,10 @@ static void init_fallback_flush(void)
 	u64 l1d_size, limit;
 	int cpu;
 
+	/* Only allocate the fallback flush area once (at boot time). */
+	if (l1d_flush_fallback_area)
+		return;
+
 	l1d_size = ppc64_caches.l1d.size;
 	limit = min(safe_stack_limit(), ppc64_rma_size);
 
@@ -853,7 +857,7 @@ static void init_fallback_flush(void)
 	}
 }
 
-void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 {
 	if (types & L1D_FLUSH_FALLBACK) {
 		pr_info("rfi-flush: Using fallback displacement flush\n");
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.14 06/23] powerpc/rfi-flush: Always enable fallback flush on pseries
From: Michael Ellerman @ 2018-05-25 10:09 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180525100954.31599-1-mpe@ellerman.id.au>

This ensures the fallback flush area is always allocated on pseries,
so in case a LPAR is migrated from a patched to an unpatched system,
it is possible to enable the fallback flush in the target system.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit 84749a58b6e382f109abf1e734bc4dd43c2c25bb)
---
 arch/powerpc/platforms/pseries/setup.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8bbbb4e753b5..2708ddab209b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -468,26 +468,18 @@ static void pseries_setup_rfi_flush(void)
 
 	/* Enable by default */
 	enable = true;
+	types = L1D_FLUSH_FALLBACK;
 
 	rc = plpar_get_cpu_characteristics(&result);
 	if (rc == H_SUCCESS) {
-		types = L1D_FLUSH_NONE;
-
 		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
 			types |= L1D_FLUSH_MTTRIG;
 		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
 			types |= L1D_FLUSH_ORI;
 
-		/* Use fallback if nothing set in hcall */
-		if (types == L1D_FLUSH_NONE)
-			types = L1D_FLUSH_FALLBACK;
-
 		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
 		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
 			enable = false;
-	} else {
-		/* Default to fallback if case hcall is not available */
-		types = L1D_FLUSH_FALLBACK;
 	}
 
 	setup_rfi_flush(types, enable);
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.14 07/23] powerpc/rfi-flush: Differentiate enabled and patched flush types
From: Michael Ellerman @ 2018-05-25 10:09 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180525100954.31599-1-mpe@ellerman.id.au>

From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>

Currently the rfi-flush messages print 'Using <type> flush' for all
enabled_flush_types, but that is not necessarily true -- as now the
fallback flush is always enabled on pseries, but the fixup function
overwrites its nop/branch slot with other flush types, if available.

So, replace the 'Using <type> flush' messages with '<type> flush is
available'.

Also, print the patched flush types in the fixup function, so users
can know what is (not) being used (e.g., the slower, fallback flush,
or no flush type at all if flush is disabled via the debugfs switch).

Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit 0063d61ccfc011f379a31acaeba6de7c926fed2c)
---
 arch/powerpc/kernel/setup_64.c    | 6 +++---
 arch/powerpc/lib/feature-fixups.c | 9 ++++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ace6a10a242f..da12b54cbe5c 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -860,15 +860,15 @@ static void init_fallback_flush(void)
 void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 {
 	if (types & L1D_FLUSH_FALLBACK) {
-		pr_info("rfi-flush: Using fallback displacement flush\n");
+		pr_info("rfi-flush: fallback displacement flush available\n");
 		init_fallback_flush();
 	}
 
 	if (types & L1D_FLUSH_ORI)
-		pr_info("rfi-flush: Using ori type flush\n");
+		pr_info("rfi-flush: ori type flush available\n");
 
 	if (types & L1D_FLUSH_MTTRIG)
-		pr_info("rfi-flush: Using mttrig type flush\n");
+		pr_info("rfi-flush: mttrig type flush available\n");
 
 	enabled_flush_types = types;
 
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index d0c0b8443dcf..8ac72f7d638f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -153,7 +153,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
 		patch_instruction(dest + 2, instrs[2]);
 	}
 
-	printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+	printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+		(types == L1D_FLUSH_NONE)       ? "no" :
+		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+							? "ori+mttrig type"
+							: "ori type" :
+		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+						: "unknown");
 }
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
-- 
2.14.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox