* [PATCH v2 09/13] powerpc/eeh: Introduce eeh_edev_actionable()
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
The same test is done in every EEH report function, so factor it out.
Since eeh_dev_removed() needs to be moved higher up in the file,
simplify it a little while we're at it.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/kernel/eeh_driver.c | 30 ++++++++++++++++--------------
1 file changed, 16 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 2d3cac584899..30898866418b 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -62,6 +62,17 @@ static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
return old;
}
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+ return !edev || (edev->mode & EEH_DEV_REMOVED);
+}
+
+static bool eeh_edev_actionable(struct eeh_dev *edev)
+{
+ return (edev->pdev && !eeh_dev_removed(edev) &&
+ !eeh_pe_passed(edev->pe));
+}
+
/**
* eeh_pcid_get - Get the PCI device driver
* @pdev: PCI device
@@ -163,15 +174,6 @@ static void eeh_enable_irq(struct pci_dev *dev)
}
}
-static bool eeh_dev_removed(struct eeh_dev *edev)
-{
- /* EEH device removed ? */
- if (!edev || (edev->mode & EEH_DEV_REMOVED))
- return true;
-
- return false;
-}
-
static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
@@ -212,7 +214,7 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+ if (!eeh_edev_actionable(edev))
return NULL;
device_lock(&dev->dev);
@@ -256,7 +258,7 @@ static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+ if (!eeh_edev_actionable(edev))
return NULL;
device_lock(&dev->dev);
@@ -295,7 +297,7 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+ if (!eeh_edev_actionable(edev))
return NULL;
device_lock(&dev->dev);
@@ -365,7 +367,7 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
bool was_in_error;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+ if (!eeh_edev_actionable(edev))
return NULL;
device_lock(&dev->dev);
@@ -412,7 +414,7 @@ static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+ if (!eeh_edev_actionable(edev))
return NULL;
device_lock(&dev->dev);
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 10/13] powerpc/eeh: Introduce eeh_set_channel_state()
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
To ease future refactoring, extract setting of the channel state
from the report functions out into their own functions. This increases
the amount of code that is identical across all of the report
functions.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/kernel/eeh_driver.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 30898866418b..76951ca701b2 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -199,6 +199,17 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
return NULL;
}
+static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
+{
+ struct eeh_pe *pe;
+ struct eeh_dev *edev, *tmp;
+
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ if (eeh_edev_actionable(edev))
+ edev->pdev->error_state = s;
+}
+
/**
* eeh_report_error - Report pci error to each device driver
* @data: eeh device
@@ -218,7 +229,6 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
return NULL;
device_lock(&dev->dev);
- dev->error_state = pci_channel_io_frozen;
driver = eeh_pcid_get(dev);
if (!driver) goto out_no_dev;
@@ -301,7 +311,6 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
return NULL;
device_lock(&dev->dev);
- dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
if (!driver) goto out_no_dev;
@@ -371,7 +380,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
return NULL;
device_lock(&dev->dev);
- dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
if (!driver) goto out_no_dev;
@@ -795,6 +803,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* hotplug for this case.
*/
pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
eeh_pe_dev_traverse(pe, eeh_report_error, &result);
if ((pe->type & EEH_PE_PHB) &&
result != PCI_ERS_RESULT_NONE &&
@@ -885,6 +894,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Notify device drivers "
"the completion of reset\n");
result = PCI_ERS_RESULT_NONE;
+ eeh_set_channel_state(pe, pci_channel_io_normal);
eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
}
@@ -906,6 +916,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
/* Tell all device drivers that they can resume operations */
pr_info("EEH: Notify device driver to resume\n");
+ eeh_set_channel_state(pe, pci_channel_io_normal);
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
pr_info("EEH: Recovery successful.\n");
@@ -924,6 +935,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_slot_error_detail(pe, EEH_LOG_PERM);
/* Notify all devices that they're about to go down. */
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
/* Mark the PE to be removed permanently */
@@ -1033,6 +1045,7 @@ void eeh_handle_special_event(void)
/* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL);
bus = eeh_pe_bus_get(phb_pe);
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 06/13] powerpc/eeh: Add message when PE processing at parent
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
To aid debugging, add a message to show when EEH processing for a PE
will be done at the device's parent, rather than directly at the
device.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/kernel/eeh.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f82dade4fb9a..1139821a9aec 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -541,8 +541,12 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Frozen parent PE ? */
ret = eeh_ops->get_state(parent_pe, NULL);
- if (ret > 0 && !eeh_state_active(ret))
+ if (ret > 0 && !eeh_state_active(ret)) {
pe = parent_pe;
+ pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n",
+ pe->phb->global_number, pe->addr,
+ pe->phb->global_number, parent_pe->addr);
+ }
/* Next parent level */
parent_pe = parent_pe->parent;
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 12/13] powerpc/eeh: Cleaner handling of EEH_DEV_NO_HANDLER
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
If a device without a driver is recovered via EEH, the flag
EEH_DEV_NO_HANDLER is incorrectly left set on the device after
recovery, because the test in eeh_report_resume() for the existence of
a bound driver is done before the flag is cleared. If a driver is
later bound, and EEH experienced again, some of the drivers EEH
handers are not called.
To correct this, clear the flag unconditionally after EEH processing
is complete.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/kernel/eeh_driver.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 8dd2a33424a1..42fe80ed6f59 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -405,7 +405,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
if (!driver->err_handler ||
!driver->err_handler->resume ||
(edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
- edev->mode &= ~EEH_DEV_NO_HANDLER;
goto out;
}
@@ -780,6 +779,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
{
struct pci_bus *bus;
struct eeh_dev *edev, *tmp;
+ struct eeh_pe *tmp_pe;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
@@ -934,6 +934,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_set_irq_state(pe, true);
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
pr_info("EEH: Recovery successful.\n");
goto final;
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 07/13] powerpc/eeh: Clean up pci_ers_result handling
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
As EEH event handling progresses, a cumulative result of type
pci_ers_result is built up by (some of) the eeh_report_*() functions
using either:
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
or:
if ((*res == PCI_ERS_RESULT_NONE) ||
(*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
if (*res == PCI_ERS_RESULT_DISCONNECT &&
rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
(Where *res is the accumulator.)
However, the intent is not immediately clear and the result in some
situations is order dependent.
Address this by assigning a priority to each result value, and always
merging to the highest priority. This renders the intent clear, and
provides a stable value for all orderings.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
====== v1 -> v2: ======
* Added the value, and missing newline, to some WARN()s.
* Improved name of merge_result() to pci_ers_merge_result().
* Adjusted the result priorities so that unknown doesn't overlap with _NONE.
arch/powerpc/kernel/eeh_driver.c | 36 ++++++++++++++++++++++++++----------
1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 188d15c4fe3a..2d3cac584899 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -39,6 +39,29 @@ struct eeh_rmv_data {
int removed;
};
+static int eeh_result_priority(enum pci_ers_result result)
+{
+ switch (result) {
+ case PCI_ERS_RESULT_NONE: return 1;
+ case PCI_ERS_RESULT_NO_AER_DRIVER: return 2;
+ case PCI_ERS_RESULT_RECOVERED: return 3;
+ case PCI_ERS_RESULT_CAN_RECOVER: return 4;
+ case PCI_ERS_RESULT_DISCONNECT: return 5;
+ case PCI_ERS_RESULT_NEED_RESET: return 6;
+ default:
+ WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
+ return 0;
+ }
+};
+
+static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
+ enum pci_ers_result new)
+{
+ if (eeh_result_priority(new) > eeh_result_priority(old))
+ return new;
+ return old;
+}
+
/**
* eeh_pcid_get - Get the PCI device driver
* @pdev: PCI device
@@ -206,9 +229,7 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
- /* A driver that needs a reset trumps all others */
- if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
- if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ *res = pci_ers_merge_result(*res, rc);
edev->in_error = true;
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
@@ -249,9 +270,7 @@ static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
rc = driver->err_handler->mmio_enabled(dev);
- /* A driver that needs a reset trumps all others */
- if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
- if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ *res = pci_ers_merge_result(*res, rc);
out:
eeh_pcid_put(dev);
@@ -294,10 +313,7 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
goto out;
rc = driver->err_handler->slot_reset(dev);
- if ((*res == PCI_ERS_RESULT_NONE) ||
- (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
- if (*res == PCI_ERS_RESULT_DISCONNECT &&
- rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ *res = pci_ers_merge_result(*res, rc);
out:
eeh_pcid_put(dev);
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 00/13] EEH refactoring 2
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
Hello everyone,
Here is a second, somewhat deeper, set of cleanups for the EEH code
(mostly eeh_drver.c).
These changes are not intended to significantly alter the actual processing,
but rather to improve the readability and maintainability of the code. They are
subjective by nature so I would appreciate comments and suggestions.
The earlier changes are mostly to support the last patch, where we're finally
able to use a common infrastructure for the reporting functions (basically
wrappers around the driver's handlers). This allows removal of a fair bit of
code, and the easy addition of some useful messaging which should make future
maintenance easier (as an example, a recent fix in this area "powerpc/eeh: Fix
race with driver un/bind" would have required adding two lines rather than
42+/26-).
Cheers,
Sam.
Patch set changelog follows:
====== v1 -> v2: ======
Patch 1/13: powerpc/eeh: Add eeh_max_freezes to initial EEH log line
Patch 2/13: powerpc/eeh: Add final message for successful recovery
Patch 3/13: powerpc/eeh: Fix use-after-release of EEH driver
Patch 4/13: powerpc/eeh: Remove unused eeh_pcid_name()
Patch 5/13: powerpc/eeh: Strengthen types of eeh traversal functions
Patch 6/13: powerpc/eeh: Add message when PE processing at parent
Patch 7/13: powerpc/eeh: Clean up pci_ers_result handling
* Added the value, and missing newline, to some WARN()s.
* Improved name of merge_result() to pci_ers_merge_result().
* Adjusted the result priorities so that unknown doesn't overlap with _NONE.
Patch 8/13: powerpc/eeh: Introduce eeh_for_each_pe()
Patch 9/13: powerpc/eeh: Introduce eeh_edev_actionable()
Patch 10/13: powerpc/eeh: Introduce eeh_set_channel_state()
Patch 11/13: powerpc/eeh: Introduce eeh_set_irq_state()
* Reorganised eeh_set_irq_state() to reduce nesting depth.
Patch 12/13: powerpc/eeh: Cleaner handling of EEH_DEV_NO_HANDLER
Patch 13/13: powerpc/eeh: Refactor report functions
* Better name for eeh_infoline() and implement using a function.
* Move the core of eeh_pe_report() into a new function to improve readability.
* pci_ers_result_name(): match parameter name to eeh_result_priority(), correct and improve warning message.
====== v1: ======
Patch 1/13: powerpc/eeh: Add eeh_max_freezes to initial EEH log line
Patch 2/13: powerpc/eeh: Add final message for successful recovery
Patch 3/13: powerpc/eeh: Fix use-after-release of EEH driver
Patch 4/13: powerpc/eeh: Remove unused eeh_pcid_name()
Patch 5/13: powerpc/eeh: Strengthen types of eeh traversal functions
Patch 6/13: powerpc/eeh: Add message when PE processing at parent
Patch 7/13: powerpc/eeh: Clean up pci_ers_result handling
Patch 8/13: powerpc/eeh: Introduce eeh_for_each_pe()
Patch 9/13: powerpc/eeh: Introduce eeh_edev_actionable()
Patch 10/13: powerpc/eeh: Introduce eeh_set_channel_state()
Patch 11/13: powerpc/eeh: Introduce eeh_set_irq_state()
Patch 12/13: powerpc/eeh: Cleaner handling of EEH_DEV_NO_HANDLER
Patch 13/13: powerpc/eeh: Refactor report functions
Sam Bobroff (13):
powerpc/eeh: Add eeh_max_freezes to initial EEH log line
powerpc/eeh: Add final message for successful recovery
powerpc/eeh: Fix use-after-release of EEH driver
powerpc/eeh: Remove unused eeh_pcid_name()
powerpc/eeh: Strengthen types of eeh traversal functions
powerpc/eeh: Add message when PE processing at parent
powerpc/eeh: Clean up pci_ers_result handling
powerpc/eeh: Introduce eeh_for_each_pe()
powerpc/eeh: Introduce eeh_edev_actionable()
powerpc/eeh: Introduce eeh_set_channel_state()
powerpc/eeh: Introduce eeh_set_irq_state()
powerpc/eeh: Cleaner handling of EEH_DEV_NO_HANDLER
powerpc/eeh: Refactor report functions
arch/powerpc/include/asm/eeh.h | 11 +-
arch/powerpc/kernel/eeh.c | 19 +-
arch/powerpc/kernel/eeh_driver.c | 493 +++++++++++++++++++++------------------
arch/powerpc/kernel/eeh_pe.c | 26 +--
4 files changed, 294 insertions(+), 255 deletions(-)
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply
* [PATCH v2 04/13] powerpc/eeh: Remove unused eeh_pcid_name()
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/kernel/eeh_driver.c | 14 --------------
1 file changed, 14 deletions(-)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 54333f6c9d67..ca9a73fe9cc5 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -39,20 +39,6 @@ struct eeh_rmv_data {
int removed;
};
-/**
- * eeh_pcid_name - Retrieve name of PCI device driver
- * @pdev: PCI device
- *
- * This routine is used to retrieve the name of PCI device driver
- * if that's valid.
- */
-static inline const char *eeh_pcid_name(struct pci_dev *pdev)
-{
- if (pdev && pdev->dev.driver)
- return pdev->dev.driver->name;
- return "";
-}
-
/**
* eeh_pcid_get - Get the PCI device driver
* @pdev: PCI device
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 08/13] powerpc/eeh: Introduce eeh_for_each_pe()
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
Add a for_each-style macro for iterating through PEs without the
boilerplate required by a traversal function. eeh_pe_next() is now
exported, as it is now used directly in place.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 4 ++++
arch/powerpc/kernel/eeh_pe.c | 7 +++----
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index f02e0400e6f2..677102baf3cd 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -106,6 +106,9 @@ struct eeh_pe {
#define eeh_pe_for_each_dev(pe, edev, tmp) \
list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
+#define eeh_for_each_pe(root, pe) \
+ for (pe = root; pe; pe = eeh_pe_next(pe, root))
+
static inline bool eeh_pe_passed(struct eeh_pe *pe)
{
return pe ? !!atomic_read(&pe->pass_dev_cnt) : false;
@@ -267,6 +270,7 @@ typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
+struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
int pe_no, int config_addr);
int eeh_add_to_parent_pe(struct eeh_dev *edev);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 38a4bcd8ed13..1b238ecc553e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -142,8 +142,7 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
* The function is used to retrieve the next PE in the
* hierarchy PE tree.
*/
-static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
- struct eeh_pe *root)
+struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root)
{
struct list_head *next = pe->child_list.next;
@@ -178,7 +177,7 @@ void *eeh_pe_traverse(struct eeh_pe *root,
struct eeh_pe *pe;
void *ret;
- for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ eeh_for_each_pe(root, pe) {
ret = fn(pe, flag);
if (ret) return ret;
}
@@ -209,7 +208,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
}
/* Traverse root PE */
- for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ eeh_for_each_pe(root, pe) {
eeh_pe_for_each_dev(pe, edev, tmp) {
ret = fn(edev, flag);
if (ret)
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 03/13] powerpc/eeh: Fix use-after-release of EEH driver
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
Correct two cases where eeh_pcid_get() is used to reference the driver's
module but the reference is dropped before the driver pointer is used.
In eeh_rmv_device() also refactor a little so that only two calls to
eeh_pcid_put() are needed, rather than three and the reference isn't
taken at all if it wasn't needed.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/kernel/eeh_driver.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 07e0a42035ce..54333f6c9d67 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -458,9 +458,11 @@ static void *eeh_add_virt_device(void *data, void *userdata)
driver = eeh_pcid_get(dev);
if (driver) {
- eeh_pcid_put(dev);
- if (driver->err_handler)
+ if (driver->err_handler) {
+ eeh_pcid_put(dev);
return NULL;
+ }
+ eeh_pcid_put(dev);
}
#ifdef CONFIG_PCI_IOV
@@ -497,17 +499,19 @@ static void *eeh_rmv_device(void *data, void *userdata)
if (eeh_dev_removed(edev))
return NULL;
- driver = eeh_pcid_get(dev);
- if (driver) {
- eeh_pcid_put(dev);
- if (removed &&
- eeh_pe_passed(edev->pe))
- return NULL;
- if (removed &&
- driver->err_handler &&
- driver->err_handler->error_detected &&
- driver->err_handler->slot_reset)
+ if (removed) {
+ if (eeh_pe_passed(edev->pe))
return NULL;
+ driver = eeh_pcid_get(dev);
+ if (driver) {
+ if (driver->err_handler &&
+ driver->err_handler->error_detected &&
+ driver->err_handler->slot_reset) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+ eeh_pcid_put(dev);
+ }
}
/* Remove it from PCI subsystem */
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 02/13] powerpc/eeh: Add final message for successful recovery
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
Add a single log line at the end of successful EEH recovery, so that
it's clear that event processing has finished.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/kernel/eeh_driver.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 56a60b9eb397..07e0a42035ce 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -910,6 +910,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Notify device driver to resume\n");
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
+ pr_info("EEH: Recovery successful.\n");
goto final;
hard_fail:
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* [PATCH v2 01/13] powerpc/eeh: Add eeh_max_freezes to initial EEH log line
From: Sam Bobroff @ 2018-05-25 3:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <cover.1527217866.git.sbobroff@linux.ibm.com>
The current failure message includes the number of failures that have
occurred in the last hour (for a device) but it does not indicate
how many failures will be tolerated before the device is permanently
disabled.
Include the limit (eeh_max_freezes) to make this less surprising when
it happens.
Also remove the embedded newline from the existing message to make it
easier to grep for.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
arch/powerpc/kernel/eeh_driver.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index b8a329f04814..56a60b9eb397 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -778,14 +778,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
if (pe->freeze_count > eeh_max_freezes) {
- pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n"
- "last hour and has been permanently disabled.\n",
+ pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr,
pe->freeze_count);
goto hard_fail;
}
- pr_warn("EEH: This PCI device has failed %d times in the last hour\n",
- pe->freeze_count);
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* Re: [PATCH net-next 0/8] ibmvnic: Failover hardening
From: David Miller @ 2018-05-25 2:19 UTC (permalink / raw)
To: tlfalcon; +Cc: netdev, nfont, jallen, linuxppc-dev
In-Reply-To: <1527100682-23099-1-git-send-email-tlfalcon@linux.vnet.ibm.com>
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Wed, 23 May 2018 13:37:54 -0500
> Introduce additional transport event hardening to handle
> events during device reset. In the driver's current state,
> if a transport event is received during device reset, it can
> cause the device to become unresponsive as invalid operations
> are processed as the backing device context changes. After
> a transport event, the device expects a request to begin the
> initialization process. If the driver is still processing
> a previously queued device reset in this state, it is likely
> to fail as firmware will reject any commands other than the
> one to initialize the client driver's Command-Response Queue.
>
> Instead of failing and becoming dormant, the driver will make
> one more attempt to recover and continue operation. This is
> achieved by setting a state flag, which if true will direct
> the driver to clean up all allocated resources and perform
> a hard reset in an attempt to bring the driver back to an
> operational state.
Series applied.
^ permalink raw reply
* Re: [PATCH 1/2] selftests/powerpc: Add ptrace tests for Protection Key registers
From: Thiago Jung Bauermann @ 2018-05-25 2:14 UTC (permalink / raw)
To: Michael Ellerman
Cc: linuxppc-dev, linux-kselftest, linux-kernel, Ram Pai,
Thiago Jung Bauermann
In-Reply-To: <87tvr6mlbe.fsf@concordia.ellerman.id.au>
Michael Ellerman <mpe@ellerman.id.au> writes:
> Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> writes:
>
>> This test exercises read and write access to the AMR, IAMR and UAMOR.
>>
>> Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
>> ---
>> tools/testing/selftests/powerpc/include/reg.h | 1 +
>> tools/testing/selftests/powerpc/ptrace/Makefile | 5 +-
>> tools/testing/selftests/powerpc/ptrace/child.h | 130 ++++++++
>> .../testing/selftests/powerpc/ptrace/ptrace-pkey.c | 326 +++++++++++++++++++++
>
> This is failing on machines without pkeys:
>
> test: ptrace_pkey
> tags: git_version:52e7d87
> [FAIL] Test FAILED on line 117
> [FAIL] Test FAILED on line 191
> failure: ptrace_pkey
>
>
> I think the first fail is in the child here:
>
> int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[],
> int n)
> {
> struct iovec iov;
> long ret;
>
> FAIL_IF(start_trace(child));
>
> iov.iov_base = regs;
> iov.iov_len = n * sizeof(unsigned long);
>
> ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
> FAIL_IF(ret != 0);
>
>
> Which makes sense.
Yes, that is indeed what is going on.
> The test needs to skip if pkeys are not available/enabled. Using the
> availability of the REGSET might actually be a nice way to detect that,
> because it's read-only.
I forgot to consider the case of pkeys not available or not enabled,
sorry about that.
I just sent a v2 which implements your suggestion above.
--
Thiago Jung Bauermann
IBM Linux Technology Center
^ permalink raw reply
* [PATCH v2 2/2] selftests/powerpc: Add core file test for Protection Key registers
From: Thiago Jung Bauermann @ 2018-05-25 2:11 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-kselftest, linux-kernel, Michael Ellerman, Ram Pai,
Thiago Jung Bauermann
In-Reply-To: <20180525021145.4520-1-bauerman@linux.ibm.com>
This test verifies that the AMR, IAMR and UAMOR are being written to a
process' core file.
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/powerpc/ptrace/Makefile | 5 +-
tools/testing/selftests/powerpc/ptrace/core-pkey.c | 461 +++++++++++++++++++++
2 files changed, 465 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/powerpc/ptrace/core-pkey.c
Changes in v2:
- Use PARENT_SKIP_IF_UNSUPPORTED macro in first call to
ptrace_read_regs(NT_PPC_PKEY). (Suggested by Michael Ellerman)
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 707ba734faf2..a10916c3f3e1 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
- ptrace-tm-spd-vsx ptrace-tm-spr ptrace-pkey
+ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-pkey core-pkey
include ../../lib.mk
@@ -12,6 +12,9 @@ CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
ptrace-pkey: ../harness.c ../utils.c ../lib/reg.S ptrace.h child.h ptrace-pkey.c
$(LINK.c) $^ $(LDLIBS) -pthread -o $@
+core-pkey: ../harness.c ../utils.c ../lib/reg.S ptrace.h child.h core-pkey.c
+ $(LINK.c) $^ $(LDLIBS) -pthread -o $@
+
$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
clean:
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
new file mode 100644
index 000000000000..36bc312b1f5c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include <limits.h>
+#include <linux/kernel.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc 384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free 385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY 0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+#define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */
+
+static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern";
+
+static const char user_write[] = "[User Write (Running)]";
+static const char core_read_running[] = "[Core Read (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+ struct child_sync child_sync;
+
+ /* AMR value the parent expects to read in the core file. */
+ unsigned long amr;
+
+ /* IAMR value the parent expects to read in the core file. */
+ unsigned long iamr;
+
+ /* UAMOR value the parent expects to read in the core file. */
+ unsigned long uamor;
+
+ /* When the child crashed. */
+ time_t core_time;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+ return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+ return syscall(__NR_pkey_free, pkey);
+}
+
+static int increase_core_file_limit(void)
+{
+ struct rlimit rlim;
+ int ret;
+
+ ret = getrlimit(RLIMIT_CORE, &rlim);
+ FAIL_IF(ret);
+
+ if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+ rlim.rlim_cur = CORE_FILE_LIMIT;
+
+ if (rlim.rlim_max != RLIM_INFINITY &&
+ rlim.rlim_max < CORE_FILE_LIMIT)
+ rlim.rlim_max = CORE_FILE_LIMIT;
+
+ ret = setrlimit(RLIMIT_CORE, &rlim);
+ FAIL_IF(ret);
+ }
+
+ ret = getrlimit(RLIMIT_FSIZE, &rlim);
+ FAIL_IF(ret);
+
+ if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+ rlim.rlim_cur = CORE_FILE_LIMIT;
+
+ if (rlim.rlim_max != RLIM_INFINITY &&
+ rlim.rlim_max < CORE_FILE_LIMIT)
+ rlim.rlim_max = CORE_FILE_LIMIT;
+
+ ret = setrlimit(RLIMIT_FSIZE, &rlim);
+ FAIL_IF(ret);
+ }
+
+ return TEST_PASS;
+}
+
+static int child(struct shared_info *info)
+{
+ bool disable_execute = true;
+ int pkey1, pkey2, pkey3;
+ int *ptr, ret;
+
+ /* Wait until parent fills out the initial register values. */
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ ret = increase_core_file_limit();
+ FAIL_IF(ret);
+
+ /* Get some pkeys so that we can change their bits in the AMR. */
+ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+ if (pkey1 < 0) {
+ pkey1 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey1 < 0);
+
+ disable_execute = false;
+ }
+
+ pkey2 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey2 < 0);
+
+ pkey3 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey3 < 0);
+
+ info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
+
+ if (disable_execute)
+ info->iamr |= 1ul << pkeyshift(pkey1);
+
+ info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2);
+
+ printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+ user_write, info->amr, pkey1, pkey2, pkey3);
+
+ mtspr(SPRN_AMR, info->amr);
+
+ /*
+ * We won't use pkey3. This tests whether the kernel restores the UAMOR
+ * permissions after a key is freed.
+ */
+ sys_pkey_free(pkey3);
+
+ info->core_time = time(NULL);
+
+ /* Crash. */
+ ptr = 0;
+ *ptr = 1;
+
+ /* Shouldn't get here. */
+ FAIL_IF(true);
+
+ return TEST_FAIL;
+}
+
+/* Return file size if filename exists and pass sanity check, or zero if not. */
+static off_t try_core_file(const char *filename, struct shared_info *info,
+ pid_t pid)
+{
+ struct stat buf;
+ int ret;
+
+ ret = stat(filename, &buf);
+ if (ret == -1)
+ return TEST_FAIL;
+
+ /* Make sure we're not using a stale core file. */
+ return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL;
+}
+
+static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr)
+{
+ return (void *) nhdr + sizeof(*nhdr) +
+ __ALIGN_KERNEL(nhdr->n_namesz, 4) +
+ __ALIGN_KERNEL(nhdr->n_descsz, 4);
+}
+
+static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr,
+ off_t core_size)
+{
+ unsigned long *regs;
+ Elf64_Phdr *phdr;
+ Elf64_Nhdr *nhdr;
+ size_t phdr_size;
+ void *p = ehdr, *note;
+ int ret;
+
+ ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG);
+ FAIL_IF(ret);
+
+ FAIL_IF(ehdr->e_type != ET_CORE);
+ FAIL_IF(ehdr->e_machine != EM_PPC64);
+ FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0);
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(*phdr) * ehdr->e_phnum;
+
+ /* Sanity check the program header table location. */
+ FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff);
+ FAIL_IF(ehdr->e_phoff + phdr_size > core_size);
+
+ /* Find the PT_NOTE segment. */
+ for (phdr = p + ehdr->e_phoff;
+ (void *) phdr < p + ehdr->e_phoff + phdr_size;
+ phdr += ehdr->e_phentsize)
+ if (phdr->p_type == PT_NOTE)
+ break;
+
+ FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size);
+
+ /* Find the NT_PPC_PKEY note. */
+ for (nhdr = p + phdr->p_offset;
+ (void *) nhdr < p + phdr->p_offset + phdr->p_filesz;
+ nhdr = next_note(nhdr))
+ if (nhdr->n_type == NT_PPC_PKEY)
+ break;
+
+ FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz);
+ FAIL_IF(nhdr->n_descsz == 0);
+
+ p = nhdr;
+ note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4);
+
+ regs = (unsigned long *) note;
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ core_read_running, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(regs[0] != info->amr);
+ FAIL_IF(regs[1] != info->iamr);
+ FAIL_IF(regs[2] != info->uamor);
+
+ return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+ char *filenames, *filename[3];
+ int fd, i, ret, status;
+ unsigned long regs[3];
+ off_t core_size;
+ void *core;
+
+ /*
+ * Get the initial values for AMR, IAMR and UAMOR and communicate them
+ * to the child.
+ */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ info->amr = regs[0];
+ info->iamr = regs[1];
+ info->uamor = regs[2];
+
+ /* Wake up child so that it can set itself up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait(&status);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ } else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) {
+ printf("Child didn't dump core\n");
+ return TEST_FAIL;
+ }
+
+ /* Construct array of core file names to try. */
+
+ filename[0] = filenames = malloc(PATH_MAX);
+ if (!filenames) {
+ perror("Error allocating memory");
+ return TEST_FAIL;
+ }
+
+ ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid);
+ if (ret < 0 || ret >= PATH_MAX) {
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ filename[1] = filename[0] + ret + 1;
+ ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid);
+ if (ret < 0 || ret >= PATH_MAX - ret - 1) {
+ ret = TEST_FAIL;
+ goto out;
+ }
+ filename[2] = "core";
+
+ for (i = 0; i < 3; i++) {
+ core_size = try_core_file(filename[i], info, pid);
+ if (core_size != TEST_FAIL)
+ break;
+ }
+
+ if (i == 3) {
+ printf("Couldn't find core file\n");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ fd = open(filename[i], O_RDONLY);
+ if (fd == -1) {
+ perror("Error opening core file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (core == (void *) -1) {
+ perror("Error mmaping core file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ ret = check_core_file(info, core, core_size);
+
+ munmap(core, core_size);
+ close(fd);
+ unlink(filename[i]);
+
+ out:
+ free(filenames);
+
+ return ret;
+}
+
+static int write_core_pattern(const char *core_pattern)
+{
+ size_t len = strlen(core_pattern), ret;
+ FILE *f;
+
+ f = fopen(core_pattern_file, "w");
+ if (!f) {
+ perror("Error writing to core_pattern file");
+ return TEST_FAIL;
+ }
+
+ ret = fwrite(core_pattern, 1, len, f);
+ fclose(f);
+ if (ret != len) {
+ perror("Error writing to core_pattern file");
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+}
+
+static int setup_core_pattern(char **core_pattern_, bool *changed_)
+{
+ FILE *f;
+ char *core_pattern;
+ int ret;
+
+ core_pattern = malloc(PATH_MAX);
+ if (!core_pattern) {
+ perror("Error allocating memory");
+ return TEST_FAIL;
+ }
+
+ f = fopen(core_pattern_file, "r");
+ if (!f) {
+ perror("Error opening core_pattern file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ ret = fread(core_pattern, 1, PATH_MAX, f);
+ fclose(f);
+ if (!ret) {
+ perror("Error reading core_pattern file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ /* Check whether we can predict the name of the core file. */
+ if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
+ *changed_ = false;
+ else {
+ ret = write_core_pattern("core-pkey.%p");
+ if (ret)
+ goto out;
+
+ *changed_ = true;
+ }
+
+ *core_pattern_ = core_pattern;
+ ret = TEST_PASS;
+
+ out:
+ if (ret)
+ free(core_pattern);
+
+ return ret;
+}
+
+static int core_pkey(void)
+{
+ char *core_pattern;
+ bool changed_core_pattern;
+ struct shared_info *info;
+ int shm_id;
+ int ret;
+ pid_t pid;
+
+ ret = setup_core_pattern(&core_pattern, &changed_core_pattern);
+ if (ret)
+ return ret;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+ info = shmat(shm_id, NULL, 0);
+
+ ret = init_child_sync(&info->child_sync);
+ if (ret)
+ return ret;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ ret = TEST_FAIL;
+ } else if (pid == 0)
+ ret = child(info);
+ else
+ ret = parent(info, pid);
+
+ shmdt(info);
+
+ if (pid) {
+ destroy_child_sync(&info->child_sync);
+ shmctl(shm_id, IPC_RMID, NULL);
+
+ if (changed_core_pattern)
+ write_core_pattern(core_pattern);
+ }
+
+ free(core_pattern);
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(core_pkey, "core_pkey");
+}
^ permalink raw reply related
* [PATCH v2 1/2] selftests/powerpc: Add ptrace tests for Protection Key registers
From: Thiago Jung Bauermann @ 2018-05-25 2:11 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-kselftest, linux-kernel, Michael Ellerman, Ram Pai,
Thiago Jung Bauermann
This test exercises read and write access to the AMR, IAMR and UAMOR.
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/powerpc/include/reg.h | 1 +
tools/testing/selftests/powerpc/ptrace/Makefile | 5 +-
tools/testing/selftests/powerpc/ptrace/child.h | 139 +++++++++
.../testing/selftests/powerpc/ptrace/ptrace-pkey.c | 327 +++++++++++++++++++++
tools/testing/selftests/powerpc/ptrace/ptrace.h | 38 +++
5 files changed, 509 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/powerpc/ptrace/child.h
create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
Changes in v2:
- Added PARENT_SKIP_IF_UNSUPPORTED macro in child.h and use it in first
call to ptrace_read_regs(NT_PPC_PKEY). (Suggested by Michael Ellerman)
- Changed ptrace_read_regs() to simply return the ptrace error value in
case of error and avoid calling fprintf() so that errno is preserved.
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 4afdebcce4cd..7f348c059bc2 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -54,6 +54,7 @@
#define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */
#define SPRN_DSCR 0x03 /* Data Stream Control Register */
#define SPRN_PPR 896 /* Program Priority Register */
+#define SPRN_AMR 13 /* Authority Mask Register - problem state */
/* TEXASR register bits */
#define TEXASR_FC 0xFE00000000000000
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 480305266504..707ba734faf2 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
- ptrace-tm-spd-vsx ptrace-tm-spr
+ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-pkey
include ../../lib.mk
@@ -9,6 +9,9 @@ all: $(TEST_PROGS)
CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
+ptrace-pkey: ../harness.c ../utils.c ../lib/reg.S ptrace.h child.h ptrace-pkey.c
+ $(LINK.c) $^ $(LDLIBS) -pthread -o $@
+
$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
clean:
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
new file mode 100644
index 000000000000..d7275b7b33dc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Helper functions to sync execution between parent and child processes.
+ *
+ * Copyright 2018, Thiago Jung Bauermann, IBM Corporation.
+ */
+#include <stdio.h>
+#include <stdbool.h>
+#include <semaphore.h>
+
+/*
+ * Information in a shared memory location for synchronization between child and
+ * parent.
+ */
+struct child_sync {
+ /* The parent waits on this semaphore. */
+ sem_t sem_parent;
+
+ /* If true, the child should give up as well. */
+ bool parent_gave_up;
+
+ /* The child waits on this semaphore. */
+ sem_t sem_child;
+
+ /* If true, the parent should give up as well. */
+ bool child_gave_up;
+};
+
+#define CHILD_FAIL_IF(x, sync) \
+ do { \
+ if (x) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ (sync)->child_gave_up = true; \
+ prod_parent(sync); \
+ return 1; \
+ } \
+ } while (0)
+
+#define PARENT_FAIL_IF(x, sync) \
+ do { \
+ if (x) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ (sync)->parent_gave_up = true; \
+ prod_child(sync); \
+ return 1; \
+ } \
+ } while (0)
+
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \
+ do { \
+ if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
+ (sync)->parent_gave_up = true; \
+ prod_child(sync); \
+ SKIP_IF(1); \
+ } \
+ } while (0)
+
+int init_child_sync(struct child_sync *sync)
+{
+ int ret;
+
+ ret = sem_init(&sync->sem_parent, 1, 0);
+ if (ret) {
+ perror("Semaphore initialization failed");
+ return 1;
+ }
+
+ ret = sem_init(&sync->sem_child, 1, 0);
+ if (ret) {
+ perror("Semaphore initialization failed");
+ return 1;
+ }
+
+ return 0;
+}
+
+void destroy_child_sync(struct child_sync *sync)
+{
+ sem_destroy(&sync->sem_parent);
+ sem_destroy(&sync->sem_child);
+}
+
+int wait_child(struct child_sync *sync)
+{
+ int ret;
+
+ /* Wait until the child prods us. */
+ ret = sem_wait(&sync->sem_parent);
+ if (ret) {
+ perror("Error waiting for child");
+ return 1;
+ }
+
+ return sync->child_gave_up;
+}
+
+int prod_child(struct child_sync *sync)
+{
+ int ret;
+
+ /* Unblock the child now. */
+ ret = sem_post(&sync->sem_child);
+ if (ret) {
+ perror("Error prodding child");
+ return 1;
+ }
+
+ return 0;
+}
+
+int wait_parent(struct child_sync *sync)
+{
+ int ret;
+
+ /* Wait until the parent prods us. */
+ ret = sem_wait(&sync->sem_child);
+ if (ret) {
+ perror("Error waiting for parent");
+ return 1;
+ }
+
+ return sync->parent_gave_up;
+}
+
+int prod_parent(struct child_sync *sync)
+{
+ int ret;
+
+ /* Unblock the parent now. */
+ ret = sem_post(&sync->sem_parent);
+ if (ret) {
+ perror("Error prodding parent");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
new file mode 100644
index 000000000000..5cf631f792cc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc 384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free 385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY 0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+static const char user_read[] = "[User Read (Running)]";
+static const char user_write[] = "[User Write (Running)]";
+static const char ptrace_read_running[] = "[Ptrace Read (Running)]";
+static const char ptrace_write_running[] = "[Ptrace Write (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+ struct child_sync child_sync;
+
+ /* AMR value the parent expects to read from the child. */
+ unsigned long amr1;
+
+ /* AMR value the parent is expected to write to the child. */
+ unsigned long amr2;
+
+ /* AMR value that ptrace should refuse to write to the child. */
+ unsigned long amr3;
+
+ /* IAMR value the parent expects to read from the child. */
+ unsigned long expected_iamr;
+
+ /* UAMOR value the parent expects to read from the child. */
+ unsigned long expected_uamor;
+
+ /*
+ * IAMR and UAMOR values that ptrace should refuse to write to the child
+ * (even though they're valid ones) because userspace doesn't have
+ * access to those registers.
+ */
+ unsigned long new_iamr;
+ unsigned long new_uamor;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+ return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+ return syscall(__NR_pkey_free, pkey);
+}
+
+static int child(struct shared_info *info)
+{
+ unsigned long reg;
+ bool disable_execute = true;
+ int pkey1, pkey2, pkey3;
+ int ret;
+
+ /* Wait until parent fills out the initial register values. */
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Get some pkeys so that we can change their bits in the AMR. */
+ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+ if (pkey1 < 0) {
+ pkey1 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
+
+ disable_execute = false;
+ }
+
+ pkey2 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
+
+ pkey3 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
+
+ info->amr1 |= 3ul << pkeyshift(pkey1);
+ info->amr2 |= 3ul << pkeyshift(pkey2);
+ info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+
+ if (disable_execute)
+ info->expected_iamr |= 1ul << pkeyshift(pkey1);
+
+ info->expected_uamor |= 3ul << pkeyshift(pkey1) |
+ 3ul << pkeyshift(pkey2);
+ info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
+ info->new_uamor |= 3ul << pkeyshift(pkey1);
+
+ /*
+ * We won't use pkey3. We just want a plausible but invalid key to test
+ * whether ptrace will let us write to AMR bits we are not supposed to.
+ *
+ * This also tests whether the kernel restores the UAMOR permissions
+ * after a key is freed.
+ */
+ sys_pkey_free(pkey3);
+
+ printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+ user_write, info->amr1, pkey1, pkey2, pkey3);
+
+ mtspr(SPRN_AMR, info->amr1);
+
+ /* Wait for parent to read our AMR value and write a new one. */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /*
+ * Wait for parent to try to write an invalid AMR value.
+ */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /*
+ * Wait for parent to try to write an IAMR and a UAMOR value. We can't
+ * verify them, but we can verify that the AMR didn't change.
+ */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /* Now let parent now that we are finished. */
+
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+ unsigned long regs[3];
+ int ret, status;
+
+ /*
+ * Get the initial values for AMR, IAMR and UAMOR and communicate them
+ * to the child.
+ */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ info->amr1 = info->amr2 = info->amr3 = regs[0];
+ info->expected_iamr = info->new_iamr = regs[1];
+ info->expected_uamor = info->new_uamor = regs[2];
+
+ /* Wake up child so that it can set itself up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Verify that we can read the pkey registers from the child. */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ PARENT_FAIL_IF(regs[0] != info->amr1, &info->child_sync);
+ PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+ PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+ /* Write valid AMR value in child. */
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr2, 1);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2);
+
+ /* Wake up child so that it can verify it changed. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Write invalid AMR value in child. */
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+
+ /* Wake up child so that it can verify it didn't change. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Try to write to IAMR. */
+ regs[0] = info->amr1;
+ regs[1] = info->new_iamr;
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
+ PARENT_FAIL_IF(!ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx\n",
+ ptrace_write_running, regs[0], regs[1]);
+
+ /* Try to write to IAMR and UAMOR. */
+ regs[2] = info->new_uamor;
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(!ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_write_running, regs[0], regs[1], regs[2]);
+
+ /* Verify that all registers still have their expected values. */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ PARENT_FAIL_IF(regs[0] != info->amr2, &info->child_sync);
+ PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+ PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+ /* Wake up child so that it can verify AMR didn't change and wrap up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait(&status);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ ret = TEST_PASS;
+ } else if (!WIFEXITED(status)) {
+ printf("Child exited abnormally\n");
+ ret = TEST_FAIL;
+ } else
+ ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS;
+
+ return ret;
+}
+
+static int ptrace_pkey(void)
+{
+ struct shared_info *info;
+ int shm_id;
+ int ret;
+ pid_t pid;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+ info = shmat(shm_id, NULL, 0);
+
+ ret = init_child_sync(&info->child_sync);
+ if (ret)
+ return ret;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ ret = TEST_FAIL;
+ } else if (pid == 0)
+ ret = child(info);
+ else
+ ret = parent(info, pid);
+
+ shmdt(info);
+
+ if (pid) {
+ destroy_child_sync(&info->child_sync);
+ shmctl(shm_id, IPC_RMID, NULL);
+ }
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_pkey, "ptrace_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
index 19fb825270a1..34201cfa8335 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace.h
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -102,6 +102,44 @@ int cont_trace(pid_t child)
return TEST_PASS;
}
+int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[],
+ int n)
+{
+ struct iovec iov;
+ long ret;
+
+ FAIL_IF(start_trace(child));
+
+ iov.iov_base = regs;
+ iov.iov_len = n * sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+ if (ret)
+ return ret;
+
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+long ptrace_write_regs(pid_t child, unsigned long type, unsigned long regs[],
+ int n)
+{
+ struct iovec iov;
+ long ret;
+
+ FAIL_IF(start_trace(child));
+
+ iov.iov_base = regs;
+ iov.iov_len = n * sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_SETREGSET, child, type, &iov);
+
+ FAIL_IF(stop_trace(child));
+
+ return ret;
+}
+
/* TAR, PPR, DSCR */
int show_tar_registers(pid_t child, unsigned long *out)
{
^ permalink raw reply related
* Re: [PATCH] KVM: PPC: remove mmio_vsx_tx_sx_enabled in PR KVM MMIO emulation
From: Simon Guo @ 2018-05-24 9:21 UTC (permalink / raw)
To: kvm-ppc; +Cc: Paul Mackerras, kvm, linuxppc-dev
In-Reply-To: <1527152486-13843-1-git-send-email-wei.guo.simon@gmail.com>
On Thu, May 24, 2018 at 05:01:26PM +0800, wei.guo.simon@gmail.com wrote:
> From: Simon Guo <wei.guo.simon@gmail.com>
>
> Originally PR KVM MMIO emulation uses only 0~31#(5 bits) for VSR
> reg number, and use mmio_vsx_tx_sx_enabled field together for
> 0~63# VSR regs.
>
> Currently PR KVM MMIO emulation is reimplemented with analyse_instr()
> assistence. analyse_instr() returns 0~63 for VSR register number, so
> it is not necessary to use additional mmio_vsx_tx_sx_enabled field
> any more.
>
> This patch extends related reg bits(expand io_gpr to u16 from u8
> and use 6 bits for VSR reg#), so that mmio_vsx_tx_sx_enabled can
> be removed.
>
> Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
It applies to HV KVM too. I will correct the commit message in next
version.
Thanks,
- Simon
^ permalink raw reply
* Re: [v4, 03/29] powerpc: export tm_enable()/tm_disable/tm_abort() APIs
From: Michael Ellerman @ 2018-05-25 1:59 UTC (permalink / raw)
To: wei.guo.simon, linuxppc-dev; +Cc: Simon Guo, kvm-ppc, kvm
In-Reply-To: <1527058932-7434-4-git-send-email-wei.guo.simon@gmail.com>
On Wed, 2018-05-23 at 07:01:46 UTC, wei.guo.simon@gmail.com wrote:
> From: Simon Guo <wei.guo.simon@gmail.com>
>
> This patch exports tm_enable()/tm_disable/tm_abort() APIs, which
> will be used for PR KVM transaction memory logic.
>
> Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
> Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Applied to powerpc topic/ppc-kvm, thanks.
https://git.kernel.org/powerpc/c/eacbb218fbbab5923775059f7232a9
cheers
^ permalink raw reply
* Re: [v4,02/29] powerpc: add TEXASR related macros
From: Michael Ellerman @ 2018-05-25 1:59 UTC (permalink / raw)
To: wei.guo.simon, linuxppc-dev; +Cc: Simon Guo, kvm-ppc, kvm
In-Reply-To: <1527058932-7434-3-git-send-email-wei.guo.simon@gmail.com>
On Wed, 2018-05-23 at 07:01:45 UTC, wei.guo.simon@gmail.com wrote:
> From: Simon Guo <wei.guo.simon@gmail.com>
>
> This patches add some macros for CR0/TEXASR bits so that PR KVM TM
> logic(tbegin./treclaim./tabort.) can make use of them later.
>
> Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
> Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Applied to powerpc topic/ppc-kvm, thanks.
https://git.kernel.org/powerpc/c/ab3759b5734544dd1430527c3d8973
cheers
^ permalink raw reply
* Re: [v4,01/29] powerpc: export symbol msr_check_and_set().
From: Michael Ellerman @ 2018-05-25 1:59 UTC (permalink / raw)
To: wei.guo.simon, linuxppc-dev; +Cc: Simon Guo, kvm-ppc, kvm
In-Reply-To: <1527058932-7434-2-git-send-email-wei.guo.simon@gmail.com>
On Wed, 2018-05-23 at 07:01:44 UTC, wei.guo.simon@gmail.com wrote:
> From: Simon Guo <wei.guo.simon@gmail.com>
>
> PR KVM will need to reuse msr_check_and_set().
> This patch exports this API for reuse.
>
> Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
> Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Applied to powerpc topic/ppc-kvm, thanks.
https://git.kernel.org/powerpc/c/d1c7211281c5e1799f00b222815753
cheers
^ permalink raw reply
* Re: [v2, 5/9] powerpc/mm/radix: implement LPID based TLB flushes to be used by KVM
From: Michael Ellerman @ 2018-05-25 1:59 UTC (permalink / raw)
To: Nicholas Piggin, kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20180509022022.21226-6-npiggin@gmail.com>
On Wed, 2018-05-09 at 02:20:18 UTC, Nicholas Piggin wrote:
> Implement a local TLB flush for invalidating an LPID with variants for
> process or partition scope. And a global TLB flush for invalidating
> a partition scoped page of an LPID.
>
> These will be used by KVM in subsequent patches.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Applied to powerpc topic/ppc-kvm, thanks.
https://git.kernel.org/powerpc/c/0078778a86b14f85bf50e96d9ddeb3
cheers
^ permalink raw reply
* Re: [v2,04/19] powerpc/kvm: Prefer fault_in_pages_readable function
From: Michael Ellerman @ 2018-05-25 1:59 UTC (permalink / raw)
To: Mathieu Malaterre; +Cc: Mathieu Malaterre, linux-kernel, kvm-ppc, linuxppc-dev
In-Reply-To: <20180328195811.27758-1-malat@debian.org>
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 583 bytes --]
On Wed, 2018-03-28 at 19:58:11 UTC, Mathieu Malaterre wrote:
> Directly use fault_in_pages_readable instead of manual __get_user code. Fix
> warning treated as error with W=1:
>
> arch/powerpc/kernel/kvm.c:675:6: error: variable ‘tmp’ set but not used [-Werror=unused-but-set-variable]
>
> Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
> Signed-off-by: Mathieu Malaterre <malat@debian.org>
> Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
Applied to powerpc topic/ppc-kvm, thanks.
https://git.kernel.org/powerpc/c/9f9eae5ce717f497812dfc1bda5219
cheers
^ permalink raw reply
* [PATCH v2 1/1] powerpc/pseries: fix EEH recovery of some IOV devices
From: Sam Bobroff @ 2018-05-25 0:31 UTC (permalink / raw)
To: linuxppc-dev, linux-pci; +Cc: mpe, bhelgaas, bryantly
EEH recovery currently fails on pSeries for some IOV capable PCI
devices, if CONFIG_PCI_IOV is on and the hypervisor doesn't provide
certain device tree properties for the device. (Found on an IOV
capable device using the ipr driver.)
Recovery fails in pci_enable_resources() at the check on r->parent,
because r->flags is set and r->parent is not. This state is due to
sriov_init() setting the start, end and flags members of the IOV BARs
but the parent not being set later in
pseries_pci_fixup_iov_resources(), because the
"ibm,open-sriov-vf-bar-info" property is missing.
Correct this by zeroing the resource flags for IOV BARs when they
can't be configured.
Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
Hi,
This is a fix to allow EEH recovery to succeed in a specific situation,
which I've tried to explain in the commit message.
As with the RFC version, the IOV BARs are disabled by setting the resource
flags to 0 but the other fields are now left as-is because that is what is done
elsewhere (see sriov_init() and __pci_read_base()).
I've also examined the concern raised by Bjorn Helgaas, that VFs could be
enabled later after the BARs are disabled, and it already seems safe: enabling
VFs (on pseries) depends on another device tree property,
"ibm,number-of-configurable-vfs" as well as support for the RTAS function
"ibm_map_pes". Since these are all part of the hypervisor's support for IOV it
seems unlikely that we would ever see some of them but not all. (None are
currently provided by QEMU/KVM.) (Additionally, the ipr driver on which the EEH
recovery failure was discovered doesn't even seem to have SR-IOV support so it
certainly can't enable VFs.)
Cheers,
Sam.
====== v1 -> v2: ======
Patch 1/1: powerpc/pseries: fix EEH recovery of some IOV devices
* Moved the BAR disabling code to a function.
* Also check in pseries_pci_fixup_resources().
====== v1: ======
Patch 1/1: powerpc/pseries: fix EEH recovery of IOV devices
arch/powerpc/platforms/pseries/setup.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index b55ad4286dc7..0a9e4243ae1d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -645,6 +645,15 @@ void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
}
}
+static void pseries_disable_sriov_resources(struct pci_dev *pdev)
+{
+ int i;
+
+ pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+ pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
+}
+
static void pseries_pci_fixup_resources(struct pci_dev *pdev)
{
const int *indexes;
@@ -652,10 +661,10 @@ static void pseries_pci_fixup_resources(struct pci_dev *pdev)
/*Firmware must support open sriov otherwise dont configure*/
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
- if (!indexes)
- return;
- /* Assign the addresses from device tree*/
- of_pci_set_vf_bar_size(pdev, indexes);
+ if (indexes)
+ of_pci_set_vf_bar_size(pdev, indexes);
+ else
+ pseries_disable_sriov_resources(pdev);
}
static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
@@ -667,10 +676,10 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
return;
/*Firmware must support open sriov otherwise dont configure*/
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
- if (!indexes)
- return;
- /* Assign the addresses from device tree*/
- of_pci_parse_iov_addrs(pdev, indexes);
+ if (indexes)
+ of_pci_parse_iov_addrs(pdev, indexes);
+ else
+ pseries_disable_sriov_resources(pdev);
}
static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
--
2.16.1.74.g9b0b1f47b
^ permalink raw reply related
* Re: [PATCH 2/2] i2c: opal: don't check number of messages in the driver
From: Wolfram Sang @ 2018-05-24 20:07 UTC (permalink / raw)
To: linux-i2c
Cc: Peter Rosin, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman, linuxppc-dev, linux-kernel
In-Reply-To: <20180520065035.7920-3-wsa@the-dreams.de>
[-- Attachment #1: Type: text/plain, Size: 346 bytes --]
On Sun, May 20, 2018 at 08:50:34AM +0200, Wolfram Sang wrote:
> Since commit 1eace8344c02 ("i2c: add param sanity check to
> i2c_transfer()") and b7f625840267 ("i2c: add quirk checks to core"), the
> I2C core does this check now. We can remove it here.
>
> Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Applied to for-next, thanks!
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH v4] powerpc: Implement csum_ipv6_magic in assembly
From: Segher Boessenkool @ 2018-05-24 20:03 UTC (permalink / raw)
To: Christophe Leroy
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
linux-kernel, linuxppc-dev
In-Reply-To: <7a756b816161007903ca8b28aec662de27135c55.1527161282.git.christophe.leroy@c-s.fr>
Looks fine to me (one comment below):
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
On Thu, May 24, 2018 at 11:33:18AM +0000, Christophe Leroy wrote:
> +_GLOBAL(csum_ipv6_magic)
> + lwz r8, 0(r3)
> + lwz r9, 4(r3)
> + addc r0, r7, r8
> + lwz r10, 8(r3)
> + adde r0, r0, r9
> + lwz r11, 12(r3)
> + adde r0, r0, r10
> + lwz r8, 0(r4)
> + adde r0, r0, r11
> + lwz r9, 4(r4)
> + adde r0, r0, r8
> + lwz r10, 8(r4)
> + adde r0, r0, r9
> + lwz r11, 12(r4)
> + adde r0, r0, r10
> + add r5, r5, r6 /* assumption: len + proto doesn't carry */
> + adde r0, r0, r11
> + adde r0, r0, r5
> + addze r0, r0
> + rotlwi r3, r0, 16
> + add r3, r0, r3
> + not r3, r3
> + rlwinm r3, r3, 16, 16, 31
> + blr
> +EXPORT_SYMBOL(csum_ipv6_magic)
> diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
> index d7f1a966136e..bf0546e546fc 100644
> --- a/arch/powerpc/lib/checksum_64.S
> +++ b/arch/powerpc/lib/checksum_64.S
> @@ -429,3 +429,31 @@ dstnr; stb r6,0(r4)
> stw r6,0(r8)
> blr
> EXPORT_SYMBOL(csum_partial_copy_generic)
> +
> +/*
> + * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
> + * const struct in6_addr *daddr,
> + * __u32 len, __u8 proto, __wsum sum)
> + */
> +
> +_GLOBAL(csum_ipv6_magic)
> + ld r8, 0(r3)
> + ld r9, 8(r3)
> + add r5, r5, r6
> + addc r0, r8, r9
> + ld r10, 0(r4)
> + ld r11, 8(r4)
> + adde r0, r0, r10
> + add r5, r5, r7
> + adde r0, r0, r11
> + adde r0, r0, r5
> + addze r0, r0
> + rotldi r3 ,r0, 32 /* fold two 32 bit halves together */
Typo (s/ ,/, /).
> + add r3, r0, r3
> + srdi r0, r3, 32
> + rotlwi r3, r0, 16 /* fold two 16 bit halves together */
> + add r3, r0, r3
> + not r3, r3
> + rlwinm r3, r3, 16, 16, 31
> + blr
> +EXPORT_SYMBOL(csum_ipv6_magic)
> --
> 2.13.3
^ permalink raw reply
* Re: [PATCH] powerpc/32: Optimise __csum_partial()
From: Segher Boessenkool @ 2018-05-24 19:58 UTC (permalink / raw)
To: Christophe Leroy
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
linux-kernel, linuxppc-dev
In-Reply-To: <484bcfaccc1ec3d91b74aeaaa26a0ae66fe0955a.1527160868.git.christophe.leroy@c-s.fr>
On Thu, May 24, 2018 at 11:22:27AM +0000, Christophe Leroy wrote:
> Improve __csum_partial by interleaving loads and adds.
>
> On a 8xx, it brings neither improvement nor degradation.
> On a 83xx, it brings a 25% improvement.
Thanks! Looks fine to me.
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
> ---
> arch/powerpc/lib/checksum_32.S | 13 +++++++++++--
> 1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
> index d2238ea82209..aa224069f93a 100644
> --- a/arch/powerpc/lib/checksum_32.S
> +++ b/arch/powerpc/lib/checksum_32.S
> @@ -47,16 +47,25 @@ _GLOBAL(__csum_partial)
> bdnz 2b
> 21: srwi. r6,r4,4 /* # blocks of 4 words to do */
> beq 3f
> + lwz r0,4(r3)
> mtctr r6
> -22: lwz r0,4(r3)
> lwz r6,8(r3)
> + adde r5,r5,r0
> lwz r7,12(r3)
> + adde r5,r5,r6
> lwzu r8,16(r3)
> + adde r5,r5,r7
> + bdz 23f
> +22: lwz r0,4(r3)
> + adde r5,r5,r8
> + lwz r6,8(r3)
> adde r5,r5,r0
> + lwz r7,12(r3)
> adde r5,r5,r6
> + lwzu r8,16(r3)
> adde r5,r5,r7
> - adde r5,r5,r8
> bdnz 22b
> +23: adde r5,r5,r8
> 3: andi. r0,r4,2
> beq+ 4f
> lhz r0,4(r3)
> --
> 2.13.3
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox