LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 2/3] powerpc/eeh: Hotplug improvement
From: Gavin Shan @ 2014-01-15  5:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1389762974-14176-1-git-send-email-shangw@linux.vnet.ibm.com>

When EEH error comes to one specific PCI device before its driver
is loaded, we will apply hotplug to recover the error. During the
plug time, the PCI device will be probed and its driver is loaded.
Then we wrongly calls to the error handlers if the driver supports
EEH explicitly.

The patch intends to fix by introducing flag EEH_DEV_NO_HANDLER and
set it before we remove the PCI device. In turn, we can avoid wrongly
calls the error handlers of the PCI device after its driver loaded.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h   |    3 ++-
 arch/powerpc/kernel/eeh.c        |   15 +++++++++++++++
 arch/powerpc/kernel/eeh_driver.c |   10 +++++++---
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index e37db7f..8e31dad 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -90,7 +90,8 @@ struct eeh_pe {
 #define EEH_DEV_IRQ_DISABLED	(1 << 3)	/* Interrupt disabled	*/
 #define EEH_DEV_DISCONNECTED	(1 << 4)	/* Removing from PE	*/
 
-#define EEH_DEV_SYSFS		(1 << 8)	/* Sysfs created        */
+#define EEH_DEV_NO_HANDLER	(1 << 8)	/* No error handler	*/
+#define EEH_DEV_SYSFS		(1 << 9)	/* Sysfs created	*/
 
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 4bd687d..6a118db 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -921,6 +921,13 @@ void eeh_add_device_late(struct pci_dev *dev)
 		eeh_sysfs_remove_device(edev->pdev);
 		edev->mode &= ~EEH_DEV_SYSFS;
 
+		/*
+		 * We definitely should have the PCI device removed
+		 * though it wasn't correctly. So we needn't call
+		 * into error handler afterwards.
+		 */
+		edev->mode |= EEH_DEV_NO_HANDLER;
+
 		edev->pdev = NULL;
 		dev->dev.archdata.edev = NULL;
 	}
@@ -1023,6 +1030,14 @@ void eeh_remove_device(struct pci_dev *dev)
 	else
 		edev->mode |= EEH_DEV_DISCONNECTED;
 
+	/*
+	 * We're removing from the PCI subsystem, that means
+	 * the PCI device driver can't support EEH or not
+	 * well. So we rely on hotplug completely to do recovery
+	 * for the specific PCI device.
+	 */
+	edev->mode |= EEH_DEV_NO_HANDLER;
+
 	eeh_addr_cache_rmv_dev(dev);
 	eeh_sysfs_remove_device(dev);
 	edev->mode &= ~EEH_DEV_SYSFS;
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index d3a132c..ce3a698 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -217,7 +217,8 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
 	if (!driver) return NULL;
 
 	if (!driver->err_handler ||
-	    !driver->err_handler->mmio_enabled) {
+	    !driver->err_handler->mmio_enabled ||
+	    (edev->mode & EEH_DEV_NO_HANDLER)) {
 		eeh_pcid_put(dev);
 		return NULL;
 	}
@@ -258,7 +259,8 @@ static void *eeh_report_reset(void *data, void *userdata)
 	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
-	    !driver->err_handler->slot_reset) {
+	    !driver->err_handler->slot_reset ||
+	    (edev->mode & EEH_DEV_NO_HANDLER)) {
 		eeh_pcid_put(dev);
 		return NULL;
 	}
@@ -297,7 +299,9 @@ static void *eeh_report_resume(void *data, void *userdata)
 	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
-	    !driver->err_handler->resume) {
+	    !driver->err_handler->resume ||
+	    (edev->mode & EEH_DEV_NO_HANDLER)) {
+		edev->mode &= ~EEH_DEV_NO_HANDLER;
 		eeh_pcid_put(dev);
 		return NULL;
 	}
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH v2 1/3] powerpc/eeh: Handle multiple EEH errors
From: Gavin Shan @ 2014-01-15  5:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

For one PCI error relevant OPAL event, we possibly have multiple
EEH errors for that. For example, multiple frozen PEs detected on
different PHBs. Unfortunately, we didn't cover the case. The patch
enumarates the return value from eeh_ops::next_error() and change
eeh_handle_special_event() and eeh_ops::next_error() to handle all
existing EEH errors.

As Ben pointed out, we needn't list_for_each_entry_safe() since we
are not deleting any PHB from the hose_list and the EEH serialized
lock should be held while purging EEH events. The patch covers those
suggestions as well.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h            |   10 ++
 arch/powerpc/kernel/eeh_driver.c          |  150 +++++++++++++++--------------
 arch/powerpc/platforms/powernv/eeh-ioda.c |   39 +++++---
 3 files changed, 112 insertions(+), 87 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d3e5e9b..e37db7f 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -117,6 +117,16 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
 	return edev ? edev->pdev : NULL;
 }
 
+/* Return values from eeh_ops::next_error */
+enum {
+	EEH_NEXT_ERR_NONE = 0,
+	EEH_NEXT_ERR_INF,
+	EEH_NEXT_ERR_FROZEN_PE,
+	EEH_NEXT_ERR_FENCED_PHB,
+	EEH_NEXT_ERR_DEAD_PHB,
+	EEH_NEXT_ERR_DEAD_IOC
+};
+
 /*
  * The struct is used to trace the registered EEH operation
  * callback functions. Actually, those operation callback
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 36bed5a..d3a132c 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -626,84 +626,90 @@ static void eeh_handle_special_event(void)
 {
 	struct eeh_pe *pe, *phb_pe;
 	struct pci_bus *bus;
-	struct pci_controller *hose, *tmp;
+	struct pci_controller *hose;
 	unsigned long flags;
-	int rc = 0;
+	int rc;
 
-	/*
-	 * The return value from next_error() has been classified as follows.
-	 * It might be good to enumerate them. However, next_error() is only
-	 * supported by PowerNV platform for now. So it would be fine to use
-	 * integer directly:
-	 *
-	 * 4 - Dead IOC           3 - Dead PHB
-	 * 2 - Fenced PHB         1 - Frozen PE
-	 * 0 - No error found
-	 *
-	 */
-	rc = eeh_ops->next_error(&pe);
-	if (rc <= 0)
-		return;
 
-	switch (rc) {
-	case 4:
-		/* Mark all PHBs in dead state */
-		eeh_serialize_lock(&flags);
-		list_for_each_entry_safe(hose, tmp,
-				&hose_list, list_node) {
-			phb_pe = eeh_phb_pe_get(hose);
-			if (!phb_pe) continue;
-
-			eeh_pe_state_mark(phb_pe,
-				EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+	do {
+		rc = eeh_ops->next_error(&pe);
+
+		switch (rc) {
+		case EEH_NEXT_ERR_DEAD_IOC:
+			/* Mark all PHBs in dead state */
+			eeh_serialize_lock(&flags);
+
+			/* Purge all events */
+			eeh_remove_event(NULL);
+
+			list_for_each_entry(hose, &hose_list, list_node) {
+				phb_pe = eeh_phb_pe_get(hose);
+				if (!phb_pe) continue;
+
+				eeh_pe_state_mark(phb_pe,
+					EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+			}
+
+			eeh_serialize_unlock(flags);
+
+			break;
+		case EEH_NEXT_ERR_FROZEN_PE:
+		case EEH_NEXT_ERR_FENCED_PHB:
+		case EEH_NEXT_ERR_DEAD_PHB:
+			/* Mark the PE in fenced state */
+			eeh_serialize_lock(&flags);
+
+			/* Purge all events of the PHB */
+			eeh_remove_event(pe);
+
+			if (rc == EEH_NEXT_ERR_DEAD_PHB)
+				eeh_pe_state_mark(pe,
+					EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+			else
+				eeh_pe_state_mark(pe,
+					EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+
+			eeh_serialize_unlock(flags);
+
+			break;
+		case EEH_NEXT_ERR_NONE:
+			return;
+		default:
+			pr_warn("%s: Invalid value %d from next_error()\n",
+				__func__, rc);
+			return;
 		}
-		eeh_serialize_unlock(flags);
-
-		/* Purge all events */
-		eeh_remove_event(NULL);
-		break;
-	case 3:
-	case 2:
-	case 1:
-		/* Mark the PE in fenced state */
-		eeh_serialize_lock(&flags);
-		if (rc == 3)
-			eeh_pe_state_mark(pe,
-				EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
-		else
-			eeh_pe_state_mark(pe,
-				EEH_PE_ISOLATED | EEH_PE_RECOVERING);
-		eeh_serialize_unlock(flags);
-
-		/* Purge all events of the PHB */
-		eeh_remove_event(pe);
-		break;
-	default:
-		pr_err("%s: Invalid value %d from next_error()\n",
-		       __func__, rc);
-		return;
-	}
 
-	/*
-	 * For fenced PHB and frozen PE, it's handled as normal
-	 * event. We have to remove the affected PHBs for dead
-	 * PHB and IOC
-	 */
-	if (rc == 2 || rc == 1)
-		eeh_handle_normal_event(pe);
-	else {
-		list_for_each_entry_safe(hose, tmp,
-			&hose_list, list_node) {
-			phb_pe = eeh_phb_pe_get(hose);
-			if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD))
-				continue;
-
-			bus = eeh_pe_bus_get(phb_pe);
-			/* Notify all devices that they're about to go down. */
-			eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
-			pcibios_remove_pci_devices(bus);
+		/*
+		 * For fenced PHB and frozen PE, it's handled as normal
+		 * event. We have to remove the affected PHBs for dead
+		 * PHB and IOC
+		 */
+		if (rc == EEH_NEXT_ERR_FROZEN_PE ||
+		    rc == EEH_NEXT_ERR_FENCED_PHB) {
+			eeh_handle_normal_event(pe);
+		} else {
+			list_for_each_entry(hose, &hose_list, list_node) {
+				phb_pe = eeh_phb_pe_get(hose);
+				if (!phb_pe ||
+				    !(phb_pe->state & EEH_PE_PHB_DEAD))
+					continue;
+
+				/* Notify all devices to be down */
+				bus = eeh_pe_bus_get(phb_pe);
+				eeh_pe_dev_traverse(pe,
+					eeh_report_failure, NULL);
+				pcibios_remove_pci_devices(bus);
+			}
 		}
-	}
+
+		/*
+		 * If we have detected dead IOC, we needn't proceed
+		 * any more since all PHBs would have been removed
+		 */
+		if (rc == EEH_NEXT_ERR_DEAD_IOC)
+			break;
+	} while (rc != EEH_NEXT_ERR_NONE);
 }
 
 /**
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index d7ddcee..e0b12d0 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -884,12 +884,12 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
  */
 static int ioda_eeh_next_error(struct eeh_pe **pe)
 {
-	struct pci_controller *hose, *tmp;
+	struct pci_controller *hose;
 	struct pnv_phb *phb;
 	u64 frozen_pe_no;
 	u16 err_type, severity;
 	long rc;
-	int ret = 1;
+	int ret = EEH_NEXT_ERR_NONE;
 
 	/*
 	 * While running here, it's safe to purge the event queue.
@@ -899,7 +899,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 	eeh_remove_event(NULL);
 	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
-	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+	list_for_each_entry(hose, &hose_list, list_node) {
 		/*
 		 * If the subordinate PCI buses of the PHB has been
 		 * removed, we needn't take care of it any more.
@@ -938,19 +938,19 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 		switch (err_type) {
 		case OPAL_EEH_IOC_ERROR:
 			if (severity == OPAL_EEH_SEV_IOC_DEAD) {
-				list_for_each_entry_safe(hose, tmp,
-						&hose_list, list_node) {
+				list_for_each_entry(hose, &hose_list,
+						    list_node) {
 					phb = hose->private_data;
 					phb->eeh_state |= PNV_EEH_STATE_REMOVED;
 				}
 
 				pr_err("EEH: dead IOC detected\n");
-				ret = 4;
-				goto out;
+				ret = EEH_NEXT_ERR_DEAD_IOC;
 			} else if (severity == OPAL_EEH_SEV_INF) {
 				pr_info("EEH: IOC informative error "
 					"detected\n");
 				ioda_eeh_hub_diag(hose);
+				ret = EEH_NEXT_ERR_NONE;
 			}
 
 			break;
@@ -962,21 +962,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 				pr_err("EEH: dead PHB#%x detected\n",
 					hose->global_number);
 				phb->eeh_state |= PNV_EEH_STATE_REMOVED;
-				ret = 3;
-				goto out;
+				ret = EEH_NEXT_ERR_DEAD_PHB;
 			} else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
 				if (ioda_eeh_get_phb_pe(hose, pe))
 					break;
 
 				pr_err("EEH: fenced PHB#%x detected\n",
 					hose->global_number);
-				ret = 2;
-				goto out;
+				ret = EEH_NEXT_ERR_FENCED_PHB;
 			} else if (severity == OPAL_EEH_SEV_INF) {
 				pr_info("EEH: PHB#%x informative error "
 					"detected\n",
 					hose->global_number);
 				ioda_eeh_phb_diag(hose);
+				ret = EEH_NEXT_ERR_NONE;
 			}
 
 			break;
@@ -986,13 +985,23 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 
 			pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
 				(*pe)->addr, (*pe)->phb->global_number);
-			ret = 1;
-			goto out;
+			ret = EEH_NEXT_ERR_FROZEN_PE;
+			break;
+		default:
+			pr_warn("%s: Unexpected error type %d\n",
+				__func__, err_type);
 		}
+
+		/*
+		 * If we have no errors on the specific PHB or only
+		 * informative error there, we continue poking it.
+		 * Otherwise, we need actions to be taken by upper
+		 * layer.
+		 */
+		if (ret > EEH_NEXT_ERR_INF)
+			break;
 	}
 
-	ret = 0;
-out:
 	return ret;
 }
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH v2 3/3] powerpc/eeh: Escalate error on non-existing PE
From: Gavin Shan @ 2014-01-15  5:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1389762974-14176-1-git-send-email-shangw@linux.vnet.ibm.com>

Sometimes, especially in sinario of loading another kernel with kdump,
we got EEH error on non-existing PE. That means the PEEV / PEST in
the corresponding PHB would be messy and we can't handle that case.
The patch escalates the error to fenced PHB so that the PHB could be
rested in order to revoer the errors on non-existing PEs.

Reported-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Tested-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |   31 +++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index e0b12d0..92aa1f9 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -862,11 +862,7 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
 	dev.phb = hose;
 	dev.pe_config_addr = pe_no;
 	dev_pe = eeh_pe_get(&dev);
-	if (!dev_pe) {
-		pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n",
-			   __func__, hose->global_number, pe_no);
-		return -EEXIST;
-	}
+	if (!dev_pe) return -EEXIST;
 
 	*pe = dev_pe;
 	return 0;
@@ -980,12 +976,27 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 
 			break;
 		case OPAL_EEH_PE_ERROR:
-			if (ioda_eeh_get_pe(hose, frozen_pe_no, pe))
-				break;
+			/*
+			 * If we can't find the corresponding PE, the
+			 * PEEV / PEST would be messy. So we force an
+			 * fenced PHB so that it can be recovered.
+			 */
+			if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) {
+				if (!ioda_eeh_get_phb_pe(hose, pe)) {
+					pr_err("EEH: Escalated fenced PHB#%x "
+					       "detected for PE#%llx\n",
+						hose->global_number,
+						frozen_pe_no);
+					ret = EEH_NEXT_ERR_FENCED_PHB;
+				} else {
+					ret = EEH_NEXT_ERR_NONE;
+				}
+			} else {
+				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
+					(*pe)->addr, (*pe)->phb->global_number);
+				ret = EEH_NEXT_ERR_FROZEN_PE;
+			}
 
-			pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
-				(*pe)->addr, (*pe)->phb->global_number);
-			ret = EEH_NEXT_ERR_FROZEN_PE;
 			break;
 		default:
 			pr_warn("%s: Unexpected error type %d\n",
-- 
1.7.10.4

^ permalink raw reply related

* [git pull] Please pull powerpc.git merge branch
From: Benjamin Herrenschmidt @ 2014-01-15  5:01 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linuxppc-dev list, Andrew Morton, Linux Kernel list

Hi Linus !

So you make the call onto whether taking that one now or waiting for the
merge window. It's a bug fix for a crash in mremap that occurs on
powerpc with THP enabled.

The fix however requires a small change in the generic code. It moves a
condition into a helper we can override from the arch which is harmless,
but it *also* slightly changes the order of the set_pmd and the withdraw
& deposit, which should be fine according to Kirill (who wrote that
code) but I agree -rc8 is a bit late...

It was acked by Kirill and Andrew told me to just merge it via powerpc.

My original intend was to put it in powerpc-next and then shoot it to
stable, but it got a tad annoying (due to churn it needs to be applied
at least on rc4 or later while my next is at rc1 and clean that way), so
I put it in the merge branch.

>From there, you tell me if you want to take it now, if not, I'll send
you that branch along with my normal next one after you open the merge
window.

Cheers,
Ben.

The following changes since commit a6da83f98267bc8ee4e34aa899169991eb0ceb93:

  Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc (2014-01-13 10:59:05 +0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git merge

for you to fetch changes up to b3084f4db3aeb991c507ca774337c7e7893ed04f:

  powerpc/thp: Fix crash on mremap (2014-01-15 15:46:38 +1100)

----------------------------------------------------------------
Aneesh Kumar K.V (1):
      powerpc/thp: Fix crash on mremap

 arch/powerpc/include/asm/pgtable-ppc64.h | 14 ++++++++++++++
 include/asm-generic/pgtable.h            | 12 ++++++++++++
 mm/huge_memory.c                         | 14 +++++---------
 3 files changed, 31 insertions(+), 9 deletions(-)

^ permalink raw reply

* Re: Pull request: scottwood/linux.git
From: Benjamin Herrenschmidt @ 2014-01-15  3:54 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc-dev
In-Reply-To: <20140111004451.GA7353@home.buserror.net>

On Fri, 2014-01-10 at 18:44 -0600, Scott Wood wrote:
> Highlights include 32-bit booke relocatable support, e6500 hardware
> tablewalk support, various e500 SPE fixes, some new/revived boards, and
> e6500 deeper idle and altivec powerdown modes.

This breaks WSP (A2) build with 64K pages:

/home/benh/linux-powerpc-test/arch/powerpc/mm/tlb_low_64e.S: Assembler messages:
/home/benh/linux-powerpc-test/arch/powerpc/mm/tlb_low_64e.S:334: Error: can't resolve `L0^A' {*ABS* section} - `PUD_SHIFT' {*UND* section}
/home/benh/linux-powerpc-test/arch/powerpc/mm/tlb_low_64e.S:334: Error: expression too complex
/home/benh/linux-powerpc-test/arch/powerpc/mm/tlb_low_64e.S:334: Error: operand out of range (67 is not between 0 and 63)
make[2]: *** [arch/powerpc/mm/tlb_low_64e.o] Error 1

I'm merging anyway because nobody uses WSP anymore (I'm keen to remove it by 3.15 or so)
but in the meantime you may want to fix it (probably just ifdef the PUD level walk on
64k pages, look at what I do elsewhere).

Cheers,
Ben.

> The following changes since commit dece8ada993e1764a115bdff0f1effffaa5fc8dc:
> 
>   Merge branch 'merge' into next (2013-12-30 15:19:31 +1100)
> 
> are available in the git repository at:
> 
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git next
> 
> for you to fetch changes up to d064f30e5063ec54ab50af08c64fb5055e759bfd:
> 
>   powerpc/fsl_pci: add versionless pci compatible (2014-01-10 17:38:56 -0600)
> 
> ----------------------------------------------------------------
> Anton Blanchard (1):
>       drivers/tty: ehv_bytechan fails to build as a module
> 
> Christian Engelmayer (1):
>       powerpc/sysdev: Fix a pci section mismatch for Book E
> 
> Diana Craciun (1):
>       powerpc: Replaced tlbilx with tlbwe in the initialization code
> 
> Joseph Myers (6):
>       powerpc: fix exception clearing in e500 SPE float emulation
>       powerpc: fix e500 SPE float rounding inexactness detection
>       math-emu: fix floating-point to integer unsigned saturation
>       math-emu: fix floating-point to integer overflow detection
>       powerpc: fix e500 SPE float to integer and fixed-point conversions
>       powerpc: fix e500 SPE float SIGFPE generation
> 
> Kevin Hao (11):
>       powerpc/85xx: don't init the mpic ipi for the SoC which has doorbell support
>       powerpc/fsl_booke: protect the access to MAS7
>       powerpc/fsl_booke: introduce get_phys_addr function
>       powerpc: introduce macro LOAD_REG_ADDR_PIC
>       powerpc: enable the relocatable support for the fsl booke 32bit kernel
>       powerpc/fsl_booke: set the tlb entry for the kernel address in AS1
>       powerpc: introduce early_get_first_memblock_info
>       powerpc/fsl_booke: introduce map_mem_in_cams_addr
>       powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for relocatable kernel
>       powerpc/fsl_booke: smp support for booting a relocatable kernel above 64M
>       powerpc/fsl_booke: enable the relocatable for the kdump kernel
> 
> LEROY Christophe (1):
>       powerpc 8xx: defconfig: slice by 4 is more efficient than the default slice by 8 on Powerpc 8xx.
> 
> Lijun Pan (1):
>       powerpc/85xx: Merge 85xx/p1023_defconfig into mpc85xx_smp and mpc85xx
> 
> Mihai Caraman (1):
>       powerpc/booke64: Add LRAT error exception handler
> 
> Paul Gortmaker (1):
>       powerpc: fix 8xx and 6xx final link failures
> 
> Scott Wood (5):
>       powerpc/fsl-booke: Use SPRN_SPRGn rather than mfsprg/mtsprg
>       powerpc: add barrier after writing kernel PTE
>       powerpc/e6500: TLB miss handler with hardware tablewalk support
>       powerpc/fsl-book3e-64: Use paca for hugetlb TLB1 entry selection
>       powerpc/booke-64: fix tlbsrx. path in bolted tlb handler
> 
> Shaohui Xie (1):
>       powerpc/85xx: handle the eLBC error interrupt if it exists in dts
> 
> Shengzhou Liu (2):
>       powerpc/85xx/dts: add third elo3 dma component
>       powerpc/fsl_pci: add versionless pci compatible
> 
> Stephen Chivers (1):
>       powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100
> 
> Wang Dongsheng (9):
>       powerpc/fsl: add E6500 PVR and SPRN_PWRMGTCR0 define
>       powerpc/85xx: add hardware automatically enter altivec idle state
>       powerpc/85xx: add hardware automatically enter pw20 state
>       powerpc/85xx: add sysfs for pw20 state and altivec idle
>       powerpc/p1022ds: fix rtc compatible string
>       powerpc/p1022ds: add a interrupt for rtc node
>       powerpc/mpic_timer: fix the time is not accurate caused by GTCRR toggle bit
>       powerpc/mpic_timer: fix convert ticks to time subtraction overflow
>       powerpc/dts: fix lbc lack of error interrupt
> 
> Xie Xiaobo (2):
>       powerpc/85xx: Add QE common init function
>       powerpc/85xx: Add TWR-P1025 board support
> 
> Zhao Qiang (3):
>       powerpc/p1010rdb:update dts to adapt to both old and new p1010rdb
>       powerpc/p1010rdb:update mtd of nand to adapt to both old and new p1010rdb
>       powerpc/p1010rdb-pa: modify phy interrupt.
> 
>  .../devicetree/bindings/video/ssd1289fb.txt        |  13 +
>  arch/powerpc/Kconfig                               |   5 +-
>  arch/powerpc/boot/Makefile                         |   7 +-
>  arch/powerpc/boot/dts/fsl/elo3-dma-2.dtsi          |  82 ++++++
>  arch/powerpc/boot/dts/fsl/p1020si-post.dtsi        |   3 +-
>  arch/powerpc/boot/dts/fsl/p1021si-post.dtsi        |   3 +-
>  arch/powerpc/boot/dts/fsl/p1022si-post.dtsi        |   3 +-
>  arch/powerpc/boot/dts/fsl/p1023si-post.dtsi        |   3 +-
>  arch/powerpc/boot/dts/mvme5100.dts                 | 185 ++++++++++++
>  arch/powerpc/boot/dts/p1010rdb-pa.dts              |  23 ++
>  arch/powerpc/boot/dts/p1010rdb-pa.dtsi             |  85 ++++++
>  .../dts/{p1010rdb_36b.dts => p1010rdb-pa_36b.dts}  |  47 +--
>  arch/powerpc/boot/dts/p1010rdb-pb.dts              |  35 +++
>  arch/powerpc/boot/dts/p1010rdb-pb_36b.dts          |  58 ++++
>  arch/powerpc/boot/dts/p1010rdb.dts                 |  66 -----
>  arch/powerpc/boot/dts/p1010rdb.dtsi                |  43 +--
>  arch/powerpc/boot/dts/p1010rdb_32b.dtsi            |  79 ++++++
>  arch/powerpc/boot/dts/p1010rdb_36b.dtsi            |  79 ++++++
>  arch/powerpc/boot/dts/p1022ds.dtsi                 |   3 +-
>  arch/powerpc/boot/dts/p1025twr.dts                 |  95 +++++++
>  arch/powerpc/boot/dts/p1025twr.dtsi                | 280 ++++++++++++++++++
>  arch/powerpc/boot/mvme5100.c                       |  27 ++
>  arch/powerpc/boot/wrapper                          |   4 +
>  arch/powerpc/configs/85xx/p1023_defconfig          | 188 ------------
>  arch/powerpc/configs/adder875_defconfig            |   1 +
>  arch/powerpc/configs/ep88xc_defconfig              |   1 +
>  arch/powerpc/configs/mpc85xx_defconfig             |   3 +
>  arch/powerpc/configs/mpc85xx_smp_defconfig         |   3 +
>  arch/powerpc/configs/mpc866_ads_defconfig          |   1 +
>  arch/powerpc/configs/mpc885_ads_defconfig          |   1 +
>  arch/powerpc/configs/mvme5100_defconfig            | 144 ++++++++++
>  arch/powerpc/configs/tqm8xx_defconfig              |   1 +
>  arch/powerpc/include/asm/fsl_lbc.h                 |   2 +-
>  arch/powerpc/include/asm/kvm_asm.h                 |   1 +
>  arch/powerpc/include/asm/mmu-book3e.h              |  13 +
>  arch/powerpc/include/asm/mmu.h                     |  21 +-
>  arch/powerpc/include/asm/paca.h                    |   6 +
>  arch/powerpc/include/asm/ppc_asm.h                 |  13 +
>  arch/powerpc/include/asm/processor.h               |   6 +-
>  arch/powerpc/include/asm/reg.h                     |   2 +
>  arch/powerpc/include/asm/reg_booke.h               |  10 +
>  arch/powerpc/kernel/asm-offsets.c                  |   9 +
>  arch/powerpc/kernel/cpu_setup_fsl_booke.S          |  54 ++++
>  arch/powerpc/kernel/exceptions-64e.S               |  27 +-
>  arch/powerpc/kernel/fsl_booke_entry_mapping.S      |   2 +
>  arch/powerpc/kernel/head_fsl_booke.S               | 266 +++++++++++++++--
>  arch/powerpc/kernel/paca.c                         |   5 +
>  arch/powerpc/kernel/process.c                      |  30 +-
>  arch/powerpc/kernel/prom.c                         |  41 ++-
>  arch/powerpc/kernel/setup_64.c                     |  31 ++
>  arch/powerpc/kernel/swsusp_booke.S                 |  32 +--
>  arch/powerpc/kernel/sysfs.c                        | 316 +++++++++++++++++++++
>  arch/powerpc/kvm/bookehv_interrupts.S              |   2 +
>  arch/powerpc/math-emu/math_efp.c                   | 316 ++++++++++++++++-----
>  arch/powerpc/mm/fsl_booke_mmu.c                    |  80 +++++-
>  arch/powerpc/mm/hugetlbpage-book3e.c               |  54 +++-
>  arch/powerpc/mm/mem.c                              |   6 +
>  arch/powerpc/mm/mmu_decl.h                         |   2 +
>  arch/powerpc/mm/pgtable_32.c                       |   1 +
>  arch/powerpc/mm/pgtable_64.c                       |  12 +
>  arch/powerpc/mm/tlb_low_64e.S                      | 174 +++++++++++-
>  arch/powerpc/mm/tlb_nohash.c                       |  93 ++++--
>  arch/powerpc/mm/tlb_nohash_low.S                   |   4 +-
>  arch/powerpc/platforms/85xx/Kconfig                |   6 +
>  arch/powerpc/platforms/85xx/Makefile               |   1 +
>  arch/powerpc/platforms/85xx/common.c               |  38 +++
>  arch/powerpc/platforms/85xx/mpc85xx.h              |   6 +
>  arch/powerpc/platforms/85xx/mpc85xx_mds.c          |  29 +-
>  arch/powerpc/platforms/85xx/mpc85xx_rdb.c          |  25 +-
>  arch/powerpc/platforms/85xx/smp.c                  |  17 +-
>  arch/powerpc/platforms/85xx/twr_p102x.c            | 147 ++++++++++
>  arch/powerpc/platforms/embedded6xx/Kconfig         |  13 +-
>  arch/powerpc/platforms/embedded6xx/Makefile        |   1 +
>  arch/powerpc/platforms/embedded6xx/mvme5100.c      | 221 ++++++++++++++
>  arch/powerpc/sysdev/fsl_lbc.c                      |  31 +-
>  arch/powerpc/sysdev/fsl_pci.c                      |   3 +-
>  arch/powerpc/sysdev/indirect_pci.c                 |   6 +-
>  arch/powerpc/sysdev/mpic_timer.c                   |  10 +-
>  drivers/tty/Kconfig                                |   2 +-
>  include/linux/of_fdt.h                             |   1 +
>  include/math-emu/op-common.h                       |   9 +-
>  81 files changed, 3154 insertions(+), 614 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/video/ssd1289fb.txt
>  create mode 100644 arch/powerpc/boot/dts/fsl/elo3-dma-2.dtsi
>  create mode 100644 arch/powerpc/boot/dts/mvme5100.dts
>  create mode 100644 arch/powerpc/boot/dts/p1010rdb-pa.dts
>  create mode 100644 arch/powerpc/boot/dts/p1010rdb-pa.dtsi
>  rename arch/powerpc/boot/dts/{p1010rdb_36b.dts => p1010rdb-pa_36b.dts} (64%)
>  create mode 100644 arch/powerpc/boot/dts/p1010rdb-pb.dts
>  create mode 100644 arch/powerpc/boot/dts/p1010rdb-pb_36b.dts
>  delete mode 100644 arch/powerpc/boot/dts/p1010rdb.dts
>  create mode 100644 arch/powerpc/boot/dts/p1010rdb_32b.dtsi
>  create mode 100644 arch/powerpc/boot/dts/p1010rdb_36b.dtsi
>  create mode 100644 arch/powerpc/boot/dts/p1025twr.dts
>  create mode 100644 arch/powerpc/boot/dts/p1025twr.dtsi
>  create mode 100644 arch/powerpc/boot/mvme5100.c
>  delete mode 100644 arch/powerpc/configs/85xx/p1023_defconfig
>  create mode 100644 arch/powerpc/configs/mvme5100_defconfig
>  create mode 100644 arch/powerpc/platforms/85xx/twr_p102x.c
>  create mode 100644 arch/powerpc/platforms/embedded6xx/mvme5100.c

^ permalink raw reply

* Re: [PATCH] powerpc: dma-mapping: Return dma_direct_ops variable when dev == NULL
From: Benjamin Herrenschmidt @ 2014-01-15  3:47 UTC (permalink / raw)
  To: Chunhe Lan; +Cc: linux-pci, linuxppc-dev, Chunhe Lan
In-Reply-To: <52D60238.2040204@freescale.com>

On Wed, 2014-01-15 at 11:36 +0800, Chunhe Lan wrote:

> >
> >> Signed-off-by: Chunhe Lan <Chunhe.Lan@freescale.com>
> >> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> >> Tested-by: Chunhe Lan <Chunhe.Lan@freescale.com>
> >> ---
> >>   arch/powerpc/include/asm/dma-mapping.h |   13 +++++++++----
> >>   1 files changed, 9 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
> >> index e27e9ad..b8c10de 100644
> >> --- a/arch/powerpc/include/asm/dma-mapping.h
> >> +++ b/arch/powerpc/include/asm/dma-mapping.h
> >> @@ -84,10 +84,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
>          I see the get_dma_ops function in 
> arch/*x86*/include/asm/dma-mapping.h as the following:
> 
>   32 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
>   33 {
>   34 #ifndef CONFIG_X86_DEV_DMA_OPS
>   35         return dma_ops;
>   36 #else
>   37         if (unlikely(!dev) || !dev->archdata.dma_ops)
>   38                 return dma_ops;
>   39         else
>   40                 return dev->archdata.dma_ops;
>   41 #endif
>   42 }
> 
>          And also  see the get_dma_ops function in  
> arch/*arm*/include/asm/dma-mapping.h as the following:
> 
>   18 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
>   19 {
>   20         if (dev && dev->archdata.dma_ops)
>   21                 return dev->archdata.dma_ops;
>   22         return &arm_dma_ops;
>   23 }
> 
>        Why not powerpc use this method to process dev == NULL ?

Because we don't :-) We used to and removed this. Due to how our HW
works it might not be correct. When an iommu is enabled for example
you simply cannot use the direct ops.

So the right fix is to properly establish the iommu for the VFs like
we do for the PFs.

> Thanks,
> -Chunhe
> 
> >>   	 * only ISA DMA device we support is the floppy and we have a hack
> >>   	 * in the floppy driver directly to get a device for us.
> >>   	 */
> >> -	if (unlikely(dev == NULL))
> >> -		return NULL;
> >> -
> >> -	return dev->archdata.dma_ops;
> >> +	if (dev && dev->archdata.dma_ops)
> >> +		return dev->archdata.dma_ops;
> >> +	/*
> >> +	 * In some cases (for example, use the Intel(R) 10 Gigabit PCI
> >> +	 * expression Virtual Function Network Driver -- ixgbevf.ko),
> >> +	 * their value of dev is the NULL. If return NULL, the driver is
> >> +	 * aborting. So return dma_direct_ops variable when dev == NULL.
> >> +	 */
> >> +	return &dma_direct_ops;
> >>   }
> >>   
> >>   static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
> >
> >
> >
> 
> 

^ permalink raw reply

* Re: [PATCH] powerpc: dma-mapping: Return dma_direct_ops variable when dev == NULL
From: Chunhe Lan @ 2014-01-15  3:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-pci, linuxppc-dev, Chunhe Lan
In-Reply-To: <1389694446.6933.14.camel@pasglop>

On 01/14/2014 06:14 PM, Benjamin Herrenschmidt wrote:
> On Tue, 2014-01-14 at 17:44 +0800, Chunhe Lan wrote:
>> Without this patch, kind of below error will be dumped if
>> 'insmod ixgbevf.ko' is executed:
>>
>>      ixgbevf: Intel(R) 10 Gigabit PCI Express Virtual Function
>>               Network Driver - version 2.7.12-k
>>      ixgbevf: Copyright (c) 2009 - 2012 Intel Corporation.
>>      ixgbevf 0000:01:10.0: enabling device (0000 -> 0002)
>>      ixgbevf 0000:01:10.0: No usable DMA configuration, aborting
>>      ixgbevf: probe of 0000:01:10.0 failed with error -5
>>      ......
>>      ......
> That's not right. The DMA ops must be set properly for the VF somewhere
> in the arch code instead. When creating VFs, is there a hook allowing
> the arch to fix things up ?
>
> (Also adding linux-pci on CC)
>
> Ben.
>
>> Signed-off-by: Chunhe Lan <Chunhe.Lan@freescale.com>
>> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> Tested-by: Chunhe Lan <Chunhe.Lan@freescale.com>
>> ---
>>   arch/powerpc/include/asm/dma-mapping.h |   13 +++++++++----
>>   1 files changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
>> index e27e9ad..b8c10de 100644
>> --- a/arch/powerpc/include/asm/dma-mapping.h
>> +++ b/arch/powerpc/include/asm/dma-mapping.h
>> @@ -84,10 +84,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
         I see the get_dma_ops function in 
arch/*x86*/include/asm/dma-mapping.h as the following:

  32 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
  33 {
  34 #ifndef CONFIG_X86_DEV_DMA_OPS
  35         return dma_ops;
  36 #else
  37         if (unlikely(!dev) || !dev->archdata.dma_ops)
  38                 return dma_ops;
  39         else
  40                 return dev->archdata.dma_ops;
  41 #endif
  42 }

         And also  see the get_dma_ops function in 
arch/*arm*/include/asm/dma-mapping.h as the following:

  18 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
  19 {
  20         if (dev && dev->archdata.dma_ops)
  21                 return dev->archdata.dma_ops;
  22         return &arm_dma_ops;
  23 }

       Why not powerpc use this method to process dev == NULL ?

Thanks,
-Chunhe

>>   	 * only ISA DMA device we support is the floppy and we have a hack
>>   	 * in the floppy driver directly to get a device for us.
>>   	 */
>> -	if (unlikely(dev == NULL))
>> -		return NULL;
>> -
>> -	return dev->archdata.dma_ops;
>> +	if (dev && dev->archdata.dma_ops)
>> +		return dev->archdata.dma_ops;
>> +	/*
>> +	 * In some cases (for example, use the Intel(R) 10 Gigabit PCI
>> +	 * expression Virtual Function Network Driver -- ixgbevf.ko),
>> +	 * their value of dev is the NULL. If return NULL, the driver is
>> +	 * aborting. So return dma_direct_ops variable when dev == NULL.
>> +	 */
>> +	return &dma_direct_ops;
>>   }
>>   
>>   static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
>
>
>

^ permalink raw reply

* RE: [PATCH 2/3] powerpc/85xx: Provide two functions to save/restore the core registers
From: Dongsheng.Wang @ 2014-01-15  3:30 UTC (permalink / raw)
  To: Scott Wood
  Cc: anton@enomsg.org, linuxppc-dev@lists.ozlabs.org,
	chenhui.zhao@freescale.com
In-Reply-To: <1389743456.24905.143.camel@snotra.buserror.net>

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogV29vZCBTY290dC1CMDc0
MjENCj4gU2VudDogV2VkbmVzZGF5LCBKYW51YXJ5IDE1LCAyMDE0IDc6NTEgQU0NCj4gVG86IFdh
bmcgRG9uZ3NoZW5nLUI0MDUzNA0KPiBDYzogYmVuaEBrZXJuZWwuY3Jhc2hpbmcub3JnOyBaaGFv
IENoZW5odWktQjM1MzM2OyBhbnRvbkBlbm9tc2cub3JnOyBsaW51eHBwYy0NCj4gZGV2QGxpc3Rz
Lm96bGFicy5vcmcNCj4gU3ViamVjdDogUmU6IFtQQVRDSCAyLzNdIHBvd2VycGMvODV4eDogUHJv
dmlkZSB0d28gZnVuY3Rpb25zIHRvIHNhdmUvcmVzdG9yZSB0aGUNCj4gY29yZSByZWdpc3RlcnMN
Cj4gDQo+IE9uIFR1ZSwgMjAxNC0wMS0xNCBhdCAxNTo1OSArMDgwMCwgRG9uZ3NoZW5nIFdhbmcg
d3JvdGU6DQo+ID4gRnJvbTogV2FuZyBEb25nc2hlbmcgPGRvbmdzaGVuZy53YW5nQGZyZWVzY2Fs
ZS5jb20+DQo+ID4NCj4gPiBBZGQgZnNsX2NwdV9zdGF0ZV9zYXZlL2ZzbF9jcHVfc3RhdGVfcmVz
dG9yZSBmdW5jdGlvbnMsIHVzZWQgZm9yIGRlZXANCj4gPiBzbGVlcCBhbmQgaGliZXJuYXRpb24g
dG8gc2F2ZS9yZXN0b3JlIGNvcmUgcmVnaXN0ZXJzLiBXZSBhYnN0cmFjdCBvdXQNCj4gPiBzYXZl
L3Jlc3RvcmUgY29kZSBmb3IgdXNlIGluIHZhcmlvdXMgbW9kdWxlcywgdG8gbWFrZSB0aGVtIGRv
bid0IG5lZWQNCj4gPiB0byBtYWludGFpbi4NCj4gPg0KPiA+IEN1cnJlbnRseSBzdXBwb3J0ZWQg
cHJvY2Vzc29ycyB0eXBlIGFyZSBFNjUwMCwgRTU1MDAsIEU1MDBNQywgRTUwMHYyDQo+ID4gYW5k
IEU1MDB2MS4NCj4gPg0KPiA+IFNpZ25lZC1vZmYtYnk6IFdhbmcgRG9uZ3NoZW5nIDxkb25nc2hl
bmcud2FuZ0BmcmVlc2NhbGUuY29tPg0KPiANCj4gV2hhdCBpcyB0aGVyZSB0aGF0IGlzIHNwZWNm
aWMgdG8gYSBwYXJ0aWN1bGFyIGNvcmUgdHlwZSB0aGF0IGNhbid0IGJlIGhhbmRsZWQNCj4gZnJv
bSBDIGNvZGU/DQo+IA0KDQpJbiB0aGUgY29udGV4dCBvZiB0aGUgY2FsbGluZywgbWF5YmUgbm90
IGluIEMgZW52aXJvbm1lbnQuKERlZXAgc2xlZXAgd2l0aG91dA0KQyBlbnZpcm9ubWVudCB3aGVu
IGNhbGxpbmcgdGhvc2UgaW50ZXJmYWNlcykNCg0KPiA+ICsJLyoNCj4gPiArCSAqIE5lZWQgdG8g
c2F2ZSBmbG9hdC1wb2ludCByZWdpc3RlcnMgaWYgTVNSW0ZQXSA9IDEuDQo+ID4gKwkgKi8NCj4g
PiArCW1mbXNyCXIxMg0KPiA+ICsJYW5kaS4JcjEyLCByMTIsIE1TUl9GUA0KPiA+ICsJYmVxCTFm
DQo+ID4gKwlkb19zcl9mcHJfcmVncyhzYXZlKQ0KPiANCj4gQyBjb2RlIHNob3VsZCBoYXZlIGFs
cmVhZHkgZW5zdXJlZCB0aGF0IE1TUltGUF0gaXMgbm90IDEgKGFuZCB0aHVzIHRoZSBGUA0KPiBj
b250ZXh0IGhhcyBiZWVuIHNhdmVkKS4NCj4gDQoNClllcywgcmlnaHQuIEJ1dCBJIG1lYW4gaWYg
dGhlIEZQIHN0aWxsIHVzZSBpbiBjb3JlIHNhdmUgZmxvdywgd2UgbmVlZCB0byBzYXZlIGl0Lg0K
SW4gdGhpcyBwcm9jZXNzLCBpIGRvbid0IGNhcmUgd2hhdCBvdGhlciBjb2RlIGRvLCB3ZSBuZWVk
IHRvIGZvY3VzIG9uIG5vdCBsb3NpbmcNCnZhbHVhYmxlIGRhdGEuDQoNCj4gPiArLyoNCj4gPiAr
ICogcjMgPSB0aGUgdmlydHVhbCBhZGRyZXNzIG9mIGJ1ZmZlcg0KPiA+ICsgKiByNCA9IHN1c3Bl
bmQgdHlwZSwgMC1CQVNFX1NBVkUsIDEtQUxMX1NBVkUNCj4gDQo+ICNkZWZpbmUgdGhlc2UgbWFn
aWMgbnVtYmVycywgYW5kIGRlZmluZSB3aGF0IGlzIG1lYW50IGJ5ICJiYXNlIHNhdmUiDQo+IHZl
cnN1cyAiYWxsIHNhdmUiLg0KDQpPaywgdGhhbmtzLg0KDQotRG9uZ3NoZW5nDQoNCg==

^ permalink raw reply

* RE: [PATCH 3/3] powerpc/fsl: Use the new interface to save or restore registers
From: Dongsheng.Wang @ 2014-01-15  2:57 UTC (permalink / raw)
  To: Scott Wood
  Cc: anton@enomsg.org, linuxppc-dev@lists.ozlabs.org,
	chenhui.zhao@freescale.com
In-Reply-To: <1389742224.24905.140.camel@snotra.buserror.net>

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogV29vZCBTY290dC1CMDc0
MjENCj4gU2VudDogV2VkbmVzZGF5LCBKYW51YXJ5IDE1LCAyMDE0IDc6MzAgQU0NCj4gVG86IFdh
bmcgRG9uZ3NoZW5nLUI0MDUzNA0KPiBDYzogYmVuaEBrZXJuZWwuY3Jhc2hpbmcub3JnOyBaaGFv
IENoZW5odWktQjM1MzM2OyBhbnRvbkBlbm9tc2cub3JnOyBsaW51eHBwYy0NCj4gZGV2QGxpc3Rz
Lm96bGFicy5vcmcNCj4gU3ViamVjdDogUmU6IFtQQVRDSCAzLzNdIHBvd2VycGMvZnNsOiBVc2Ug
dGhlIG5ldyBpbnRlcmZhY2UgdG8gc2F2ZSBvciByZXN0b3JlDQo+IHJlZ2lzdGVycw0KPiANCj4g
T24gVHVlLCAyMDE0LTAxLTE0IGF0IDE1OjU5ICswODAwLCBEb25nc2hlbmcgV2FuZyB3cm90ZToN
Cj4gPiBGcm9tOiBXYW5nIERvbmdzaGVuZyA8ZG9uZ3NoZW5nLndhbmdAZnJlZXNjYWxlLmNvbT4N
Cj4gPg0KPiA+IFVzZSBmc2xfY3B1X3N0YXRlX3NhdmUvZnNsX2NwdV9zdGF0ZV9yZXN0b3JlIHRv
IHNhdmUvcmVzdG9yZSByZWdpc3RlcnMuDQo+ID4gVXNlIHRoZSBmdW5jdGlvbnMgdG8gc2F2ZS9y
ZXN0b3JlIHJlZ2lzdGVycywgc28gd2UgZG9uJ3QgbmVlZCB0bw0KPiA+IG1haW50YWluIHRoZSBj
b2RlLg0KPiA+DQo+ID4gU2lnbmVkLW9mZi1ieTogV2FuZyBEb25nc2hlbmcgPGRvbmdzaGVuZy53
YW5nQGZyZWVzY2FsZS5jb20+DQo+IA0KPiBJcyB0aGVyZSBhbnkgZnVuY3Rpb25hbCBjaGFuZ2Ug
d2l0aCB0aGlzIHBhdGNoc2V0IChlLmcuIHN1c3BlbmQNCj4gc3VwcG9ydGVkIG9uIGNoaXBzIHdo
ZXJlIGl0IHdhc24ndCBiZWZvcmUpLCBvciBpcyBpdCBqdXN0IGNsZWFudXA/ICBBDQo+IGNvdmVy
IGxldHRlciB3b3VsZCBiZSB1c2VmdWwgdG8gZGVzY3JpYmUgdGhlIHB1cnBvc2Ugb2YgdGhlIG92
ZXJhbGwNCj4gcGF0Y2hzZXQgd2hlbiBpdCBpc24ndCBvYnZpb3VzLg0KPiANCg0KWWVzLCBqdXN0
IGNsZWFudXAuLg0KDQo+ID4gKw0KPiA+ICsJLyogUmVzdG9yZSBiYXNlIHJlZ2lzdGVyICovDQo+
ID4gKwlsaQlyNCwgMA0KPiA+ICsJYmwJZnNsX2NwdV9zdGF0ZV9yZXN0b3JlDQo+IA0KPiBXaHkg
YXJlIHlvdSBjYWxsaW5nIGFueXRoaW5nIHdpdGggImZzbCIgaW4gdGhlIG5hbWUgZnJvbSBjb2Rl
IHRoYXQgaXMNCj4gc3VwcG9zZWQgdG8gYmUgZm9yIGFsbCBib29rZT8NCj4gDQpFMjAwLCBFMzAw
IG5vdCBzdXBwb3J0Lg0KU3VwcG9ydCBFNTAwLCBFNTAwdjIsIEU1MDBNQywgRTU1MDAsIEU2NTAw
Lg0KDQpEbyB5b3UgaGF2ZSBhbnkgc3VnZ2VzdGlvbnMgYWJvdXQgdGhpcz8NCg0KVGhhbmtzLA0K
LURvbmdzaGVuZw0KDQo=

^ permalink raw reply

* Re: [v6,2/5] powerpc/book3e: store crit/mc/dbg exception thread info
From: Scott Wood @ 2014-01-15  1:27 UTC (permalink / raw)
  To: Tiejun Chen; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1382520685-11609-3-git-send-email-tiejun.chen@windriver.com>

On Wed, Oct 23, 2013 at 05:31:22PM +0800, Tiejun Chen wrote:
> We need to store thread info to these exception thread info like something
> we already did for PPC32.
> 
> Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
> 
> ---
> arch/powerpc/kernel/exceptions-64e.S |   22 +++++++++++++++++++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
> index 68d74b4..a55cf62 100644
> --- a/arch/powerpc/kernel/exceptions-64e.S
> +++ b/arch/powerpc/kernel/exceptions-64e.S
> @@ -36,6 +36,19 @@
>   */
>  #define	SPECIAL_EXC_FRAME_SIZE	INT_FRAME_SIZE
>  
> +/* Now we only store something to exception thread info */

Now as opposed to when?  Only as opposed to what else?

> +#define	EXC_LEVEL_EXCEPTION_PROLOG(type)				\

I'd prefer .macro over #define.

> +	ld	r14,PACAKSAVE(r13);					\
> +	CURRENT_THREAD_INFO(r14, r14);					\
> +	CURRENT_THREAD_INFO(r15, r1);					\
> +	ld	r10,TI_FLAGS(r14);		     			\
> +	std	r10,TI_FLAGS(r15);			     		\
> +	ld	r10,TI_PREEMPT(r14);		     			\
> +	std	r10,TI_PREEMPT(r15);		     			\
> +	ld	r10,TI_TASK(r14);			     		\
> +	std	r10,TI_TASK(r15);

This is a start, but we'll also need to save some more context to allow
TLB misses from within the exception (e.g. if a machine check handler or
GDB stub writes to a serial port, and the I/O registers aren't in the
TLB).  At a minimum I think we need to save SRR0, SRR1,
SPRN_SPRG_GEN_SCRATCH, SPRN_SPRG_TLB_SCRATCH, and the MAS registers. 
We'll also need to make the bolted TLB miss handlers capable of pointing
to different extables (though they won't need to auto-advance as the
original TLB miss handlers do -- we would advance SPRN_SPRG_TLB_EXFRAME
from this code), and the original TLB miss handlers will now need to
support more than 3 levels of nesting.

For the e6500 tablewalk TLB miss handler, we'll need to do something
special if we interrupt it when the lock is held, to revoke the lock and
return to code that retries.

Is there anything else I'm missing?

-Scott

^ permalink raw reply

* Re: [PATCH 2/3] powerpc/85xx: Provide two functions to save/restore the core registers
From: Scott Wood @ 2014-01-14 23:50 UTC (permalink / raw)
  To: Dongsheng Wang; +Cc: anton, linuxppc-dev, chenhui.zhao
In-Reply-To: <1389686397-46555-2-git-send-email-dongsheng.wang@freescale.com>

On Tue, 2014-01-14 at 15:59 +0800, Dongsheng Wang wrote:
> From: Wang Dongsheng <dongsheng.wang@freescale.com>
> 
> Add fsl_cpu_state_save/fsl_cpu_state_restore functions, used for deep
> sleep and hibernation to save/restore core registers. We abstract out
> save/restore code for use in various modules, to make them don't need
> to maintain.
> 
> Currently supported processors type are E6500, E5500, E500MC, E500v2 and
> E500v1.
> 
> Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>

What is there that is specfic to a particular core type that can't be
handled from C code?

> +	/*
> +	 * Need to save float-point registers if MSR[FP] = 1.
> +	 */
> +	mfmsr	r12
> +	andi.	r12, r12, MSR_FP
> +	beq	1f
> +	do_sr_fpr_regs(save)

C code should have already ensured that MSR[FP] is not 1 (and thus the
FP context has been saved).

> +/*
> + * r3 = the virtual address of buffer
> + * r4 = suspend type, 0-BASE_SAVE, 1-ALL_SAVE

#define these magic numbers, and define what is meant by "base save"
versus "all save".

-Scott

^ permalink raw reply

* Re: [PATCH 3/3] powerpc/fsl: Use the new interface to save or restore registers
From: Scott Wood @ 2014-01-14 23:30 UTC (permalink / raw)
  To: Dongsheng Wang; +Cc: anton, linuxppc-dev, chenhui.zhao
In-Reply-To: <1389686397-46555-3-git-send-email-dongsheng.wang@freescale.com>

On Tue, 2014-01-14 at 15:59 +0800, Dongsheng Wang wrote:
> From: Wang Dongsheng <dongsheng.wang@freescale.com>
> 
> Use fsl_cpu_state_save/fsl_cpu_state_restore to save/restore registers.
> Use the functions to save/restore registers, so we don't need to
> maintain the code.
> 
> Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>

Is there any functional change with this patchset (e.g. suspend
supported on chips where it wasn't before), or is it just cleanup?  A
cover letter would be useful to describe the purpose of the overall
patchset when it isn't obvious.

> 
> diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S
> index 553c140..b5992db 100644
> --- a/arch/powerpc/kernel/swsusp_booke.S
> +++ b/arch/powerpc/kernel/swsusp_booke.S
> @@ -4,92 +4,28 @@
>   * Copyright (c) 2009-2010 MontaVista Software, LLC.
>   */
>  
> -#include <linux/threads.h>
> -#include <asm/processor.h>
>  #include <asm/page.h>
> -#include <asm/cputable.h>
> -#include <asm/thread_info.h>
>  #include <asm/ppc_asm.h>
>  #include <asm/asm-offsets.h>
>  #include <asm/mmu.h>
> -
> -/*
> - * Structure for storing CPU registers on the save area.
> - */
> -#define SL_SP		0
> -#define SL_PC		4
> -#define SL_MSR		8
> -#define SL_TCR		0xc
> -#define SL_SPRG0	0x10
> -#define SL_SPRG1	0x14
> -#define SL_SPRG2	0x18
> -#define SL_SPRG3	0x1c
> -#define SL_SPRG4	0x20
> -#define SL_SPRG5	0x24
> -#define SL_SPRG6	0x28
> -#define SL_SPRG7	0x2c
> -#define SL_TBU		0x30
> -#define SL_TBL		0x34
> -#define SL_R2		0x38
> -#define SL_CR		0x3c
> -#define SL_LR		0x40
> -#define SL_R12		0x44	/* r12 to r31 */
> -#define SL_SIZE		(SL_R12 + 80)
> -
> -	.section .data
> -	.align	5
> -
> -_GLOBAL(swsusp_save_area)
> -	.space	SL_SIZE
> -
> +#include <asm/fsl_sleep.h>
>  
>  	.section .text
>  	.align	5
>  
>  _GLOBAL(swsusp_arch_suspend)
> -	lis	r11,swsusp_save_area@h
> -	ori	r11,r11,swsusp_save_area@l
> -
> -	mflr	r0
> -	stw	r0,SL_LR(r11)
> -	mfcr	r0
> -	stw	r0,SL_CR(r11)
> -	stw	r1,SL_SP(r11)
> -	stw	r2,SL_R2(r11)
> -	stmw	r12,SL_R12(r11)
> -
> -	/* Save MSR & TCR */
> -	mfmsr	r4
> -	stw	r4,SL_MSR(r11)
> -	mfspr	r4,SPRN_TCR
> -	stw	r4,SL_TCR(r11)
> -
> -	/* Get a stable timebase and save it */
> -1:	mfspr	r4,SPRN_TBRU
> -	stw	r4,SL_TBU(r11)
> -	mfspr	r5,SPRN_TBRL
> -	stw	r5,SL_TBL(r11)
> -	mfspr	r3,SPRN_TBRU
> -	cmpw	r3,r4
> -	bne	1b
> +	mflr	r15
> +	lis	r3, core_registers_save_area@h
> +	ori	r3, r3, core_registers_save_area@l
> +
> +	/* Save base register */
> +	li	r4, 0
> +	bl	fsl_cpu_state_save
>  
> -	/* Save SPRGs */
> -	mfspr	r4,SPRN_SPRG0
> -	stw	r4,SL_SPRG0(r11)
> -	mfspr	r4,SPRN_SPRG1
> -	stw	r4,SL_SPRG1(r11)
> -	mfspr	r4,SPRN_SPRG2
> -	stw	r4,SL_SPRG2(r11)
> -	mfspr	r4,SPRN_SPRG3
> -	stw	r4,SL_SPRG3(r11)
> -	mfspr	r4,SPRN_SPRG4
> -	stw	r4,SL_SPRG4(r11)
> -	mfspr	r4,SPRN_SPRG5
> -	stw	r4,SL_SPRG5(r11)
> -	mfspr	r4,SPRN_SPRG6
> -	stw	r4,SL_SPRG6(r11)
> -	mfspr	r4,SPRN_SPRG7
> -	stw	r4,SL_SPRG7(r11)
> +	/* Save LR */
> +	lis	r3, core_registers_save_area@h
> +	ori	r3, r3, core_registers_save_area@l
> +	stw	r15, SR_LR(r3)
>  
>  	/* Call the low level suspend stuff (we should probably have made
>  	 * a stackframe...
> @@ -97,11 +33,12 @@ _GLOBAL(swsusp_arch_suspend)
>  	bl	swsusp_save
>  
>  	/* Restore LR from the save area */
> -	lis	r11,swsusp_save_area@h
> -	ori	r11,r11,swsusp_save_area@l
> -	lwz	r0,SL_LR(r11)
> -	mtlr	r0
> +	lis	r3, core_registers_save_area@h
> +	ori	r3, r3, core_registers_save_area@l
> +	lwz	r15, SR_LR(r3)
> +	mtlr	r15
>  
> +	li	r3, 0
>  	blr
>  
>  _GLOBAL(swsusp_arch_resume)
> @@ -138,9 +75,6 @@ _GLOBAL(swsusp_arch_resume)
>  	bl flush_dcache_L1
>  	bl flush_instruction_cache
>  
> -	lis	r11,swsusp_save_area@h
> -	ori	r11,r11,swsusp_save_area@l
> -
>  	/*
>  	 * Mappings from virtual addresses to physical addresses may be
>  	 * different than they were prior to restoring hibernation state. 
> @@ -149,53 +83,12 @@ _GLOBAL(swsusp_arch_resume)
>  	 */
>  	bl	_tlbil_all
>  
> -	lwz	r4,SL_SPRG0(r11)
> -	mtspr	SPRN_SPRG0,r4
> -	lwz	r4,SL_SPRG1(r11)
> -	mtspr	SPRN_SPRG1,r4
> -	lwz	r4,SL_SPRG2(r11)
> -	mtspr	SPRN_SPRG2,r4
> -	lwz	r4,SL_SPRG3(r11)
> -	mtspr	SPRN_SPRG3,r4
> -	lwz	r4,SL_SPRG4(r11)
> -	mtspr	SPRN_SPRG4,r4
> -	lwz	r4,SL_SPRG5(r11)
> -	mtspr	SPRN_SPRG5,r4
> -	lwz	r4,SL_SPRG6(r11)
> -	mtspr	SPRN_SPRG6,r4
> -	lwz	r4,SL_SPRG7(r11)
> -	mtspr	SPRN_SPRG7,r4
> -
> -	/* restore the MSR */
> -	lwz	r3,SL_MSR(r11)
> -	mtmsr	r3
> -
> -	/* Restore TB */
> -	li	r3,0
> -	mtspr	SPRN_TBWL,r3
> -	lwz	r3,SL_TBU(r11)
> -	lwz	r4,SL_TBL(r11)
> -	mtspr	SPRN_TBWU,r3
> -	mtspr	SPRN_TBWL,r4
> -
> -	/* Restore TCR and clear any pending bits in TSR. */
> -	lwz	r4,SL_TCR(r11)
> -	mtspr	SPRN_TCR,r4
> -	lis	r4, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
> -	mtspr	SPRN_TSR,r4
> -
> -	/* Kick decrementer */
> -	li	r0,1
> -	mtdec	r0
> -
> -	/* Restore the callee-saved registers and return */
> -	lwz	r0,SL_CR(r11)
> -	mtcr	r0
> -	lwz	r2,SL_R2(r11)
> -	lmw	r12,SL_R12(r11)
> -	lwz	r1,SL_SP(r11)
> -	lwz	r0,SL_LR(r11)
> -	mtlr	r0
> +	lis	r3, core_registers_save_area@h
> +	ori	r3, r3, core_registers_save_area@l
> +
> +	/* Restore base register */
> +	li	r4, 0
> +	bl	fsl_cpu_state_restore

Why are you calling anything with "fsl" in the name from code that is
supposed to be for all booke?

-Scott

^ permalink raw reply

* Re: [PATCH 3/4] powerpc: use subsys_initcall for Freescale Local Bus
From: Scott Wood @ 2014-01-14 22:56 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: Paul Mackerras, linuxppc-dev
In-Reply-To: <1389630113-7919-4-git-send-email-paul.gortmaker@windriver.com>

On Mon, 2014-01-13 at 11:21 -0500, Paul Gortmaker wrote:
> The FSL_SOC option is bool, and hence this code is either
> present or absent.  It will never be modular, so using
> module_init as an alias for __initcall is rather misleading.
> 
> Fix this up now, so that we can relocate module_init from
> init.h into module.h in the future.  If we don't do this, we'd
> have to add module.h to obviously non-modular code, and that
> would be a worse thing.
> 
> Note that direct use of __initcall is discouraged, vs. one
> of the priority categorized subgroups.  As __initcall gets
> mapped onto device_initcall, our use of subsys_initcall (which
> makes sense for bus code) will thus change this registration
> from level 6-device to level 4-subsys (i.e. slightly earlier).
> However no observable impact of that small difference has
> been observed during testing, or is expected.
> 
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
> ---
>  arch/powerpc/sysdev/fsl_lbc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c
> index 6bc5a546d49f..9f00e5f84abe 100644
> --- a/arch/powerpc/sysdev/fsl_lbc.c
> +++ b/arch/powerpc/sysdev/fsl_lbc.c
> @@ -388,4 +388,4 @@ static int __init fsl_lbc_init(void)
>  {
>  	return platform_driver_register(&fsl_lbc_ctrl_driver);
>  }
> -module_init(fsl_lbc_init);
> +subsys_initcall(fsl_lbc_init);

Acked-by: Scott Wood <scottwood@freescale.com>

-Scott

^ permalink raw reply

* Re: [PATCH v2] Move precessing of MCE queued event out from syscall exit path.
From: Benjamin Herrenschmidt @ 2014-01-14 20:17 UTC (permalink / raw)
  To: Hugh Dickins; +Cc: Mahesh J Salgaonkar, linuxppc-dev
In-Reply-To: <alpine.LSU.2.11.1401141135240.3762@eggly.anvils>

On Tue, 2014-01-14 at 11:48 -0800, Hugh Dickins wrote:
> On Tue, 14 Jan 2014, Mahesh J Salgaonkar wrote:
> > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> > 
> > Huge Dickins reported an issue that b5ff4211a829
> > "powerpc/book3s: Queue up and process delayed MCE events" breaks the
> > PowerMac G5 boot. This patch fixes it by moving the mce even processing
> > away from syscall exit, which was wrong to do that in first place, and
> > using irq work framework to delay processing of mce event.
> > 
> > Reported-by: Hugh Dickins <hughd@google.com
> > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> 
> This version also boots and runs fine for me on the G5
> (but of course, I'm probably not testing delayed MCE events at all).

Thanks Hugh !

Cheers,
Ben.

> Hugh
> 
> > ---
> >  arch/powerpc/include/asm/mce.h |    1 -
> >  arch/powerpc/kernel/entry_64.S |    5 -----
> >  arch/powerpc/kernel/mce.c      |   13 ++++++++++---
> >  3 files changed, 10 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> > index 2257d1e..f97d8cb 100644
> > --- a/arch/powerpc/include/asm/mce.h
> > +++ b/arch/powerpc/include/asm/mce.h
> > @@ -192,7 +192,6 @@ extern void save_mce_event(struct pt_regs *regs, long handled,
> >  extern int get_mce_event(struct machine_check_event *mce, bool release);
> >  extern void release_mce_event(void);
> >  extern void machine_check_queue_event(void);
> > -extern void machine_check_process_queued_event(void);
> >  extern void machine_check_print_event_info(struct machine_check_event *evt);
> >  extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
> >  
> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > index 770d6d6..bbfb029 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -184,11 +184,6 @@ syscall_exit:
> >  	bl	.do_show_syscall_exit
> >  	ld	r3,RESULT(r1)
> >  #endif
> > -#ifdef CONFIG_PPC_BOOK3S_64
> > -BEGIN_FTR_SECTION
> > -	bl	.machine_check_process_queued_event
> > -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> > -#endif
> >  	CURRENT_THREAD_INFO(r12, r1)
> >  
> >  	ld	r8,_MSR(r1)
> > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> > index d6edf2b..a7fd4cb 100644
> > --- a/arch/powerpc/kernel/mce.c
> > +++ b/arch/powerpc/kernel/mce.c
> > @@ -26,6 +26,7 @@
> >  #include <linux/ptrace.h>
> >  #include <linux/percpu.h>
> >  #include <linux/export.h>
> > +#include <linux/irq_work.h>
> >  #include <asm/mce.h>
> >  
> >  static DEFINE_PER_CPU(int, mce_nest_count);
> > @@ -35,6 +36,11 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
> >  static DEFINE_PER_CPU(int, mce_queue_count);
> >  static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
> >  
> > +static void machine_check_process_queued_event(struct irq_work *work);
> > +struct irq_work mce_event_process_work = {
> > +        .func = machine_check_process_queued_event,
> > +};
> > +
> >  static void mce_set_error_info(struct machine_check_event *mce,
> >  			       struct mce_error_info *mce_err)
> >  {
> > @@ -185,17 +191,19 @@ void machine_check_queue_event(void)
> >  		return;
> >  	}
> >  	__get_cpu_var(mce_event_queue[index]) = evt;
> > +
> > +	/* Queue irq work to process this event later. */
> > +	irq_work_queue(&mce_event_process_work);
> >  }
> >  
> >  /*
> >   * process pending MCE event from the mce event queue. This function will be
> >   * called during syscall exit.
> >   */
> > -void machine_check_process_queued_event(void)
> > +static void machine_check_process_queued_event(struct irq_work *work)
> >  {
> >  	int index;
> >  
> > -	preempt_disable();
> >  	/*
> >  	 * For now just print it to console.
> >  	 * TODO: log this error event to FSP or nvram.
> > @@ -206,7 +214,6 @@ void machine_check_process_queued_event(void)
> >  				&__get_cpu_var(mce_event_queue[index]));
> >  		__get_cpu_var(mce_queue_count)--;
> >  	}
> > -	preempt_enable();
> >  }
> >  
> >  void machine_check_print_event_info(struct machine_check_event *evt)
> > 
> > 

^ permalink raw reply

* Re: [PATCH v2] Move precessing of MCE queued event out from syscall exit path.
From: Hugh Dickins @ 2014-01-14 19:48 UTC (permalink / raw)
  To: Mahesh J Salgaonkar; +Cc: linuxppc-dev
In-Reply-To: <20140114101450.32385.65506.stgit@mars.in.ibm.com>

On Tue, 14 Jan 2014, Mahesh J Salgaonkar wrote:
> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> 
> Huge Dickins reported an issue that b5ff4211a829
> "powerpc/book3s: Queue up and process delayed MCE events" breaks the
> PowerMac G5 boot. This patch fixes it by moving the mce even processing
> away from syscall exit, which was wrong to do that in first place, and
> using irq work framework to delay processing of mce event.
> 
> Reported-by: Hugh Dickins <hughd@google.com
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This version also boots and runs fine for me on the G5
(but of course, I'm probably not testing delayed MCE events at all).

Hugh

> ---
>  arch/powerpc/include/asm/mce.h |    1 -
>  arch/powerpc/kernel/entry_64.S |    5 -----
>  arch/powerpc/kernel/mce.c      |   13 ++++++++++---
>  3 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index 2257d1e..f97d8cb 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -192,7 +192,6 @@ extern void save_mce_event(struct pt_regs *regs, long handled,
>  extern int get_mce_event(struct machine_check_event *mce, bool release);
>  extern void release_mce_event(void);
>  extern void machine_check_queue_event(void);
> -extern void machine_check_process_queued_event(void);
>  extern void machine_check_print_event_info(struct machine_check_event *evt);
>  extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
>  
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 770d6d6..bbfb029 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -184,11 +184,6 @@ syscall_exit:
>  	bl	.do_show_syscall_exit
>  	ld	r3,RESULT(r1)
>  #endif
> -#ifdef CONFIG_PPC_BOOK3S_64
> -BEGIN_FTR_SECTION
> -	bl	.machine_check_process_queued_event
> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> -#endif
>  	CURRENT_THREAD_INFO(r12, r1)
>  
>  	ld	r8,_MSR(r1)
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index d6edf2b..a7fd4cb 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -26,6 +26,7 @@
>  #include <linux/ptrace.h>
>  #include <linux/percpu.h>
>  #include <linux/export.h>
> +#include <linux/irq_work.h>
>  #include <asm/mce.h>
>  
>  static DEFINE_PER_CPU(int, mce_nest_count);
> @@ -35,6 +36,11 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
>  static DEFINE_PER_CPU(int, mce_queue_count);
>  static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
>  
> +static void machine_check_process_queued_event(struct irq_work *work);
> +struct irq_work mce_event_process_work = {
> +        .func = machine_check_process_queued_event,
> +};
> +
>  static void mce_set_error_info(struct machine_check_event *mce,
>  			       struct mce_error_info *mce_err)
>  {
> @@ -185,17 +191,19 @@ void machine_check_queue_event(void)
>  		return;
>  	}
>  	__get_cpu_var(mce_event_queue[index]) = evt;
> +
> +	/* Queue irq work to process this event later. */
> +	irq_work_queue(&mce_event_process_work);
>  }
>  
>  /*
>   * process pending MCE event from the mce event queue. This function will be
>   * called during syscall exit.
>   */
> -void machine_check_process_queued_event(void)
> +static void machine_check_process_queued_event(struct irq_work *work)
>  {
>  	int index;
>  
> -	preempt_disable();
>  	/*
>  	 * For now just print it to console.
>  	 * TODO: log this error event to FSP or nvram.
> @@ -206,7 +214,6 @@ void machine_check_process_queued_event(void)
>  				&__get_cpu_var(mce_event_queue[index]));
>  		__get_cpu_var(mce_queue_count)--;
>  	}
> -	preempt_enable();
>  }
>  
>  void machine_check_print_event_info(struct machine_check_event *evt)
> 
> 

^ permalink raw reply

* Re: Disable sleep states on P7+
From: Steven Pratt @ 2014-01-14 17:04 UTC (permalink / raw)
  To: Preeti U Murthy; +Cc: linuxppc-dev
In-Reply-To: <52D56186.70807@linux.vnet.ibm.com>

On 01/14/2014 10:10 AM, Preeti U Murthy wrote:
> Hi Steven,
>
> On 01/14/2014 08:06 PM, Steven Pratt wrote:
>> I am looking for info on when and how we are able to disable power saving features of current (P7, P7+) chips in order to reduce latency. This is often done in latency sensitive applications when power consumption is not an issue. On Intel boxes we can disable P-state frequency changes as well as disabling C-State or sleep state changes. In fact we can control how deep a sleep the processor can go into.  I know we have control Dynamic Processor Scaling and Idle Power Savings, but what states do these really affect?  Can I really disable Nap mode of a processor? If so how?  Can I disable even the lightest winkle mode?  Looking for current information (read RHEL 6 and SLES11), future changes are interesting.
>>
>> Steve
> I can answer this question with respect to cpuidle on PowerNV platforms.
>
> 1. In order to disable cpuidle states management altogether, one can
> pass the powersave=off kernel cmd line parameter during boot up of the
> kernel. This will ensure that each time a CPU has nothing to do, it can
> enter low thread priority which could lower power consumption to some
> extent but is not expected to hit latency of applications noticeably.
>
> 2. In order to exactly control the cpuidle states into which idle CPUs
> can enter into during runtime, one can make use of the sysfs files under:
> /sys/devices/system/cpu/cpux/cpuidle/statex/disable option to
> selectively disable any state.
>
> However if one is using the menu cpuidle governor, disabling an idle
> state does not disable the idle states which are deeper than it. They
> continue to remain active unless they are specifically disabled. What
> this means is that one cannot control the depth of the idle states
> available for a CPU, although we can control the exact idle states
> available for a processor.
>
> But if the ladder governor is used, one can control the depth of the
> idle states that a CPU can enter into. The governor can be chosen by
> echoing either menu/ladder to
> /sys/devices/system/cpu/cpuidle/current_governor_ro. The cpuidle
> governor takes decisions about the idle state for a cpu to enter into
> depending on its idle history. The popular governor used by most archs
> is the menu governor.
>
> Hence nap/sleep/winkle any of these states can be disabled. The code
> which enables the above mentioned functionalities on powernv is yet to
> go upstream although the same is already upstream and can be used for
> the pseries platform to disable/enable the idle states on it.
>
> Today on powernv the default idle state nap is entered into all the
> time. One can disable it by echoing 0 to powersave_nap under
> /proc/sys/kernel/powersave_nap, in which case the cpu enters low thread

Thanks, that is great information going forward, now I just need info on what works today in PowerVM.

Steve

> priority.
>
> Thanks
>
> Regards
> Preeti U Murthy
>
>> _______________________________________________
>> Linuxppc-dev mailing list
>> Linuxppc-dev@lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/linuxppc-dev
>>

^ permalink raw reply

* Re: Disable sleep states on P7+
From: Preeti U Murthy @ 2014-01-14 16:10 UTC (permalink / raw)
  To: Steven Pratt; +Cc: linuxppc-dev
In-Reply-To: <52D54B60.4090807@austin.ibm.com>

Hi Steven,

On 01/14/2014 08:06 PM, Steven Pratt wrote:
> I am looking for info on when and how we are able to disable power saving features of current (P7, P7+) chips in order to reduce latency. This is often done in latency sensitive applications when power consumption is not an issue. On Intel boxes we can disable P-state frequency changes as well as disabling C-State or sleep state changes. In fact we can control how deep a sleep the processor can go into.  I know we have control Dynamic Processor Scaling and Idle Power Savings, but what states do these really affect?  Can I really disable Nap mode of a processor? If so how?  Can I disable even the lightest winkle mode?  Looking for current information (read RHEL 6 and SLES11), future changes are interesting.
> 
> Steve

I can answer this question with respect to cpuidle on PowerNV platforms.

1. In order to disable cpuidle states management altogether, one can
pass the powersave=off kernel cmd line parameter during boot up of the
kernel. This will ensure that each time a CPU has nothing to do, it can
enter low thread priority which could lower power consumption to some
extent but is not expected to hit latency of applications noticeably.

2. In order to exactly control the cpuidle states into which idle CPUs
can enter into during runtime, one can make use of the sysfs files under:
/sys/devices/system/cpu/cpux/cpuidle/statex/disable option to
selectively disable any state.

However if one is using the menu cpuidle governor, disabling an idle
state does not disable the idle states which are deeper than it. They
continue to remain active unless they are specifically disabled. What
this means is that one cannot control the depth of the idle states
available for a CPU, although we can control the exact idle states
available for a processor.

But if the ladder governor is used, one can control the depth of the
idle states that a CPU can enter into. The governor can be chosen by
echoing either menu/ladder to
/sys/devices/system/cpu/cpuidle/current_governor_ro. The cpuidle
governor takes decisions about the idle state for a cpu to enter into
depending on its idle history. The popular governor used by most archs
is the menu governor.

Hence nap/sleep/winkle any of these states can be disabled. The code
which enables the above mentioned functionalities on powernv is yet to
go upstream although the same is already upstream and can be used for
the pseries platform to disable/enable the idle states on it.

Today on powernv the default idle state nap is entered into all the
time. One can disable it by echoing 0 to powersave_nap under
/proc/sys/kernel/powersave_nap, in which case the cpu enters low thread
priority.

Thanks

Regards
Preeti U Murthy

> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 

^ permalink raw reply

* Disable sleep states on P7+
From: Steven Pratt @ 2014-01-14 14:36 UTC (permalink / raw)
  To: linuxppc-dev

I am looking for info on when and how we are able to disable power saving features of current (P7, P7+) chips in order to reduce latency. This is often done in latency sensitive applications when power consumption is not an issue. On Intel boxes we can disable P-state frequency changes as well as disabling C-State or sleep state changes. In fact we can control how deep a sleep the processor can go into.  I know we have control Dynamic Processor Scaling and Idle Power Savings, but what states do these really affect?  Can I really disable Nap mode of a processor? If so how?  Can I disable even the lightest winkle mode?  Looking for current information (read RHEL 6 and SLES11), future changes are interesting.

Steve

^ permalink raw reply

* Re: [PATCH] cpuidle/menu: Fail cpuidle_idle_call() if no idle state is acceptable
From: Preeti U Murthy @ 2014-01-14 11:02 UTC (permalink / raw)
  To: Srivatsa S. Bhat, svaidy
  Cc: deepthi, linux-pm, daniel.lezcano, rjw, linux-kernel, paulmck,
	linuxppc-dev, tuukka.tikkanen
In-Reply-To: <52D4E93C.3050503@linux.vnet.ibm.com>

On 01/14/2014 01:07 PM, Srivatsa S. Bhat wrote:
> On 01/14/2014 12:30 PM, Srivatsa S. Bhat wrote:
>> On 01/14/2014 11:35 AM, Preeti U Murthy wrote:
>>> On PowerPC, in a particular test scenario, all the cpu idle states were disabled.
>>> Inspite of this it was observed that the idle state count of the shallowest
>>> idle state, snooze, was increasing.
>>>
>>> This is because the governor returns the idle state index as 0 even in
>>> scenarios when no idle state can be chosen. These scenarios could be when the
>>> latency requirement is 0 or as mentioned above when the user wants to disable
>>> certain cpu idle states at runtime. In the latter case, its possible that no
>>> cpu idle state is valid because the suitable states were disabled
>>> and the rest did not match the menu governor criteria to be chosen as the
>>> next idle state.
>>>
>>> This patch adds the code to indicate that a valid cpu idle state could not be
>>> chosen by the menu governor and reports back to arch so that it can take some
>>> default action.
>>>
>>
>> That sounds fair enough. However, the "default" action of pseries idle loop
>> (pseries_lpar_idle()) surprises me. It enters Cede, which is _deeper_ than doing
>> a snooze! IOW, a user might "disable" cpuidle or set the PM_QOS_CPU_DMA_LATENCY
>> to 0 hoping to prevent the CPUs from going to deep idle states, but then the
>> machine would still end up going to Cede, even though that wont get reflected
>> in the idle state counts. IMHO that scenario needs some thought as well...
>>
> 
> I checked the git history and found that the default idle was changed (on purpose)
> to cede the processor, in order to speed up booting.. Hmm..
> 
> commit 363edbe2614aa90df706c0f19ccfa2a6c06af0be
> Author: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
> Date:   Fri Sep 6 00:25:06 2013 +0530
> 
>     powerpc: Default arch idle could cede processor on pseries

This issue is not powerpc specific as I observed on digging a bit into
the default idle routines of the common archs. The way that archs
perceive the call to cpuidle framework today is that if it fails, it
means that cpuidle backend driver fails to *function* due to some reason
(as is mentioned in the above commit: either since cpuidle driver is not
registered or it does not work on some specific platforms) and that
therefore the archs should decide on an idle state themselves. They
therefore end up choosing a convenient idle state which could very well
be one of the idle states in the cpuidle state table.

The archs do not see failed call to cpuidle driver as "cpuidle driver
says no idle state can be entered now because there are strict latency
requirements or the idle states are disabled". IOW, the call to cpuidle
driver is currently based on if cpuidle driver exists rather than if it
agrees on entry into any of the idle states.

This patch brings in the need for the archs to incorporate this
additional check of "did cpuidle_idle_call() fail because it did not
find it wise to enter any of the idle states". In which case they should
simply exit without taking any *default action*.

Need to give this some thought and reconsider the patch.

Regards
Preeti U Murthy
> 
> 
> Regards,
> Srivatsa S. Bhat
> 

^ permalink raw reply

* [PATCH v1] powernv/cpuidle: Back-end cpuidle driver for powernv platform.
From: Deepthi Dharwar @ 2014-01-14 11:02 UTC (permalink / raw)
  To: linux-pm, benh, daniel.lezcano, linux-kernel, srivatsa.bhat,
	preeti, svaidy, linuxppc-dev
In-Reply-To: <20140114110157.4091.80684.stgit@deepthi.in.ibm.com>

Following patch ports the cpuidle framework for powernv
platform and also implements a cpuidle back-end powernv
idle driver calling on to power7_nap and snooze idle states.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/setup.c |   13 ++
 drivers/cpuidle/Kconfig.powerpc        |    9 ++
 drivers/cpuidle/Makefile               |    1 
 drivers/cpuidle/cpuidle-powernv.c      |  169 ++++++++++++++++++++++++++++++++
 4 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 drivers/cpuidle/cpuidle-powernv.c

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 19884b2..764a14e 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -26,6 +26,7 @@
 #include <linux/of_fdt.h>
 #include <linux/interrupt.h>
 #include <linux/bug.h>
+#include <linux/cpuidle.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
@@ -214,6 +215,16 @@ static int __init pnv_probe(void)
 	return 1;
 }
 
+void powernv_idle(void)
+{
+	/* Hook to cpuidle framework if available, else
+	 * call on default platform idle code
+	 */
+	if (cpuidle_idle_call()) {
+		power7_idle();
+	}
+}
+
 define_machine(powernv) {
 	.name			= "PowerNV",
 	.probe			= pnv_probe,
@@ -223,7 +234,7 @@ define_machine(powernv) {
 	.show_cpuinfo		= pnv_show_cpuinfo,
 	.progress		= pnv_progress,
 	.machine_shutdown	= pnv_shutdown,
-	.power_save             = power7_idle,
+	.power_save             = powernv_idle,
 	.calibrate_decr		= generic_calibrate_decr,
 #ifdef CONFIG_KEXEC
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
diff --git a/drivers/cpuidle/Kconfig.powerpc b/drivers/cpuidle/Kconfig.powerpc
index 8147de5..66c3a09 100644
--- a/drivers/cpuidle/Kconfig.powerpc
+++ b/drivers/cpuidle/Kconfig.powerpc
@@ -9,3 +9,12 @@ config PSERIES_CPUIDLE
 	help
 	  Select this option to enable processor idle state management
 	  through cpuidle subsystem.
+
+config POWERNV_CPUIDLE
+	bool "Cpuidle driver for powernv platforms"
+	depends on CPU_IDLE
+	depends on PPC_POWERNV
+	default y
+	help
+	  Select this option to enable processor idle state management
+	  through cpuidle subsystem.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index a6331ad..f71ae1b 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_ARM_AT91_CPUIDLE)          += cpuidle-at91.o
 ###############################################################################
 # POWERPC drivers
 obj-$(CONFIG_PSERIES_CPUIDLE)		+= cpuidle-pseries.o
+obj-$(CONFIG_POWERNV_CPUIDLE)		+= cpuidle-powernv.o
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
new file mode 100644
index 0000000..78fd174
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -0,0 +1,169 @@
+/*
+ *  cpuidle-powernv - idle state cpuidle driver.
+ *  Adapted from drivers/cpuidle/cpuidle-pseries
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+struct cpuidle_driver powernv_idle_driver = {
+	.name             = "powernv_idle",
+	.owner            = THIS_MODULE,
+};
+
+static int max_idle_state;
+static struct cpuidle_state *cpuidle_state_table;
+
+static int snooze_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	local_irq_enable();
+	set_thread_flag(TIF_POLLING_NRFLAG);
+
+	while (!need_resched()) {
+		HMT_low();
+		HMT_very_low();
+	}
+
+	HMT_medium();
+	clear_thread_flag(TIF_POLLING_NRFLAG);
+	smp_mb();
+	return index;
+}
+
+static int nap_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	power7_idle();
+	return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state powernv_states[] = {
+	{ /* Snooze */
+		.name = "snooze",
+		.desc = "snooze",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 0,
+		.target_residency = 0,
+		.enter = &snooze_loop },
+	{ /* NAP */
+		.name = "NAP",
+		.desc = "NAP",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 100,
+		.enter = &nap_loop },
+};
+
+static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
+			unsigned long action, void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+	struct cpuidle_device *dev =
+				per_cpu(cpuidle_devices, hotcpu);
+
+	if (dev && cpuidle_get_driver()) {
+		switch (action) {
+		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_enable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		case CPU_DEAD:
+		case CPU_DEAD_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_disable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		default:
+			return NOTIFY_DONE;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block setup_hotplug_notifier = {
+	.notifier_call = powernv_cpuidle_add_cpu_notifier,
+};
+
+/*
+ * powernv_cpuidle_driver_init()
+ */
+static int powernv_cpuidle_driver_init(void)
+{
+	int idle_state;
+	struct cpuidle_driver *drv = &powernv_idle_driver;
+
+	drv->state_count = 0;
+
+	for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+		/* Is the state not enabled? */
+		if (cpuidle_state_table[idle_state].enter == NULL)
+			continue;
+
+		drv->states[drv->state_count] =	/* structure copy */
+			cpuidle_state_table[idle_state];
+
+		drv->state_count += 1;
+	}
+
+	return 0;
+}
+
+/*
+ * powernv_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int powernv_idle_probe(void)
+{
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		return -ENODEV;
+
+	if (firmware_has_feature(FW_FEATURE_OPALv3)) {
+		cpuidle_state_table = powernv_states;
+		max_idle_state = ARRAY_SIZE(powernv_states);
+ 	} else
+ 		return -ENODEV;
+
+	return 0;
+}
+
+static int __init powernv_processor_idle_init(void)
+{
+	int retval;
+
+	retval = powernv_idle_probe();
+	if (retval)
+		return retval;
+
+	powernv_cpuidle_driver_init();
+	retval = cpuidle_register(&powernv_idle_driver, NULL);
+	if (retval) {
+		printk(KERN_DEBUG "Registration of powernv driver failed.\n");
+		return retval;
+	}
+
+	register_cpu_notifier(&setup_hotplug_notifier);
+	printk(KERN_DEBUG "powernv_idle_driver registered\n");
+	return 0;
+}
+
+device_initcall(powernv_processor_idle_init);

^ permalink raw reply related

* [PATCH v1] powernv/cpuidle: Back-end cpuidle driver for powernv platform for idle state management.
From: Deepthi Dharwar @ 2014-01-14 11:02 UTC (permalink / raw)
  To: linux-pm, benh, daniel.lezcano, linux-kernel, srivatsa.bhat,
	preeti, svaidy, linuxppc-dev

Following patch ports the cpuidle framework for powernv
platform and also implements a cpuidle back-end powernv 
idle driver calling on to power7_nap and snooze idle states.

Moving the idle states over to cpuidle framework can take advantage 
of advanced heuristics, tunables and features provided by cpuidle 
framework. Additional idle states can be exploited using the cpuidle 
framework. The statistics and tracing infrastructure provided by 
the cpuidle framework also helps in enabling power management 
related tools and help tune the system and applications.

This series aims to maintain compatibility and functionality to
existing powernv idle cpu management code.  There are no new functions
or idle states added as part of this series. This can be extended by 
adding more states to this existing framework.

For POWERNV platform to hook into CPUIDLE framework, one
needs to enable CONFIG_POWERNV_IDLE. 

This patch series applies on pseries cpuidle backend driver
fixes patchset posted earlier.
pseries/cpuidle: pseries cpuidle backend driver clean-ups.

 Deepthi Dharwar (1):
      powernv/cpuidle: Back-end cpuidle driver for powernv platform.

 arch/powerpc/platforms/powernv/setup.c |   13 ++
 drivers/cpuidle/Kconfig.powerpc        |    9 ++
 drivers/cpuidle/Makefile               |    1 
 drivers/cpuidle/cpuidle-powernv.c      |  169 ++++++++++++++++++++++++++++++++
 4 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 drivers/cpuidle/cpuidle-powernv.c

-- Deepthi

^ permalink raw reply

* [PATCH v1 6/6] pseries/cpuidle: smt-snooze-delay cleanup.
From: Deepthi Dharwar @ 2014-01-14 10:56 UTC (permalink / raw)
  To: linux-pm, benh, daniel.lezcano, linux-kernel, srivatsa.bhat,
	preeti, svaidy, linuxppc-dev
In-Reply-To: <20140114105525.3064.52013.stgit@deepthi.in.ibm.com>

smt-snooze-delay was designed to disable NAP state or delay the entry
to the NAP state prior to adoption of cpuidle framework. This
is per-cpu variable. With the coming of CPUIDLE framework,
states can be disabled on per-cpu basis using the cpuidle/enable
sysfs entry.

Also, with the coming of cpuidle driver each state's target residency
is per-driver unlike earlier which was per-device. Therefore,
the per-cpu sysfs smt-snooze-delay which decides the target residency
of the idle state on a particular cpu causes more confusion to the user
as we cannot have different smt-snooze-delay (target residency)
values for each cpu.

In the current code, smt-snooze-delay functionality is completely broken.
It makes sense to remove smt-snooze-delay from idle driver with the
coming of cpuidle framework.
However, sysfs files are retained as ppc64_util currently
utilises it. Once we fix ppc64_util, propose to clean
up the kernel code.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/processor.h |    7 -------
 arch/powerpc/kernel/sysfs.c          |    2 --
 drivers/cpuidle/cpuidle-pseries.c    |   17 -----------------
 3 files changed, 26 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index fa98fdf..027fefd 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -444,13 +444,6 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 extern void power7_nap(void);
-
-#ifdef CONFIG_PSERIES_CPUIDLE
-extern void update_smt_snooze_delay(int cpu, int residency);
-#else
-static inline void update_smt_snooze_delay(int cpu, int residency) {}
-#endif
-
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b4e6676..7f9e130 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -51,8 +51,6 @@ static ssize_t store_smt_snooze_delay(struct device *dev,
 		return -EINVAL;
 
 	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
-	update_smt_snooze_delay(cpu->dev.id, snooze);
-
 	return count;
 }
 
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index bb56091..7ab564a 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -162,23 +162,6 @@ static struct cpuidle_state shared_states[] = {
 		.enter = &shared_cede_loop },
 };
 
-void update_smt_snooze_delay(int cpu, int residency)
-{
-	struct cpuidle_driver *drv = cpuidle_get_driver();
-	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
-
-	if (cpuidle_state_table != dedicated_states)
-		return;
-
-	if (residency < 0) {
-		/* Disable the Nap state on that cpu */
-		if (dev)
-			dev->states_usage[1].disable = 1;
-	} else
-		if (drv)
-			drv->states[1].target_residency = residency;
-}
-
 static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
 			unsigned long action, void *hcpu)
 {

^ permalink raw reply related

* [PATCH v1 5/6] pseries/cpuidle: Remove MAX_IDLE_STATE macro.
From: Deepthi Dharwar @ 2014-01-14 10:56 UTC (permalink / raw)
  To: linux-pm, benh, daniel.lezcano, linux-kernel, srivatsa.bhat,
	preeti, svaidy, linuxppc-dev
In-Reply-To: <20140114105525.3064.52013.stgit@deepthi.in.ibm.com>

This patch removes the usage of MAX_IDLE_STATE macro
and dead code around it. The number of states
are determined at run time based on the cpuidle
state table selected on a given platform

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
---
 drivers/cpuidle/cpuidle-pseries.c |   28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 5e13f6c..bb56091 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -24,9 +24,7 @@ struct cpuidle_driver pseries_idle_driver = {
 	.owner            = THIS_MODULE,
 };
 
-#define MAX_IDLE_STATE_COUNT	2
-
-static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
+static int max_idle_state;
 static struct cpuidle_state *cpuidle_state_table;
 
 static inline void idle_loop_prolog(unsigned long *in_purr)
@@ -134,7 +132,7 @@ static int shared_cede_loop(struct cpuidle_device *dev,
 /*
  * States for dedicated partition case.
  */
-static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
+static struct cpuidle_state dedicated_states[] = {
 	{ /* Snooze */
 		.name = "snooze",
 		.desc = "snooze",
@@ -154,7 +152,7 @@ static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
 /*
  * States for shared partition case.
  */
-static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
+static struct cpuidle_state shared_states[] = {
 	{ /* Shared Cede */
 		.name = "Shared Cede",
 		.desc = "Shared Cede",
@@ -225,12 +223,8 @@ static int pseries_cpuidle_driver_init(void)
 
 	drv->state_count = 0;
 
-	for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
-
-		if (idle_state > max_idle_state)
-			break;
-
-		/* is the state not enabled? */
+	for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+		/* Is the state not enabled? */
 		if (cpuidle_state_table[idle_state].enter == NULL)
 			continue;
 
@@ -253,16 +247,14 @@ static int pseries_idle_probe(void)
 	if (cpuidle_disable != IDLE_NO_OVERRIDE)
 		return -ENODEV;
 
-	if (max_idle_state == 0) {
-		printk(KERN_DEBUG "pseries processor idle disabled.\n");
-		return -EPERM;
-	}
-
 	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		if (lppaca_shared_proc(get_lppaca()))
+		if (lppaca_shared_proc(get_lppaca())) {
 			cpuidle_state_table = shared_states;
-		else
+			max_idle_state = ARRAY_SIZE(shared_states);
+		} else {
 			cpuidle_state_table = dedicated_states;
+			max_idle_state = ARRAY_SIZE(dedicated_states);
+		}
 	} else
 		return -ENODEV;
 

^ permalink raw reply related

* [PATCH v1 4/6] pseries/cpuidle: Make cpuidle-pseries backend driver a non-module.
From: Deepthi Dharwar @ 2014-01-14 10:56 UTC (permalink / raw)
  To: linux-pm, benh, daniel.lezcano, linux-kernel, srivatsa.bhat,
	preeti, svaidy, linuxppc-dev
In-Reply-To: <20140114105525.3064.52013.stgit@deepthi.in.ibm.com>

Currently cpuidle-pseries backend driver cannot be
built as a module due to dependencies wrt cpuidle framework.
This patch removes all the module related code in the driver.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
---
 drivers/cpuidle/cpuidle-pseries.c |   15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 32d86bc..5e13f6c 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -289,17 +289,4 @@ static int __init pseries_processor_idle_init(void)
 	return 0;
 }
 
-static void __exit pseries_processor_idle_exit(void)
-{
-
-	unregister_cpu_notifier(&setup_hotplug_notifier);
-	cpuidle_unregister(&pseries_idle_driver);
-	return;
-}
-
-module_init(pseries_processor_idle_init);
-module_exit(pseries_processor_idle_exit);
-
-MODULE_AUTHOR("Deepthi Dharwar <deepthi@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("Cpuidle driver for POWER");
-MODULE_LICENSE("GPL");
+device_initcall(pseries_processor_idle_init);

^ permalink raw reply related

* [PATCH v1 3/6] pseries/cpuidle: Use cpuidle_register() for initialisation.
From: Deepthi Dharwar @ 2014-01-14 10:56 UTC (permalink / raw)
  To: linux-pm, benh, daniel.lezcano, linux-kernel, srivatsa.bhat,
	preeti, svaidy, linuxppc-dev
In-Reply-To: <20140114105525.3064.52013.stgit@deepthi.in.ibm.com>

This patch replaces the cpuidle driver and devices initialisation
calls with a single generic cpuidle_register() call
and also includes minor refactoring of the code around it.

Remove the cpu online check in snooze loop, as this code can
only locally run on a cpu only if it is online. Therefore,
this check is not required.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
---
 drivers/cpuidle/cpuidle-pseries.c |   78 +++++--------------------------------
 1 file changed, 11 insertions(+), 67 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 2115478..32d86bc 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -27,7 +27,6 @@ struct cpuidle_driver pseries_idle_driver = {
 #define MAX_IDLE_STATE_COUNT	2
 
 static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
-static struct cpuidle_device __percpu *pseries_cpuidle_devices;
 static struct cpuidle_state *cpuidle_state_table;
 
 static inline void idle_loop_prolog(unsigned long *in_purr)
@@ -55,13 +54,12 @@ static int snooze_loop(struct cpuidle_device *dev,
 			int index)
 {
 	unsigned long in_purr;
-	int cpu = dev->cpu;
 
 	idle_loop_prolog(&in_purr);
 	local_irq_enable();
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
-	while ((!need_resched()) && cpu_online(cpu)) {
+	while (!need_resched()) {
 		HMT_low();
 		HMT_very_low();
 	}
@@ -188,7 +186,7 @@ static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
 {
 	int hotcpu = (unsigned long)hcpu;
 	struct cpuidle_device *dev =
-			per_cpu_ptr(pseries_cpuidle_devices, hotcpu);
+				per_cpu(cpuidle_devices, hotcpu);
 
 	if (dev && cpuidle_get_driver()) {
 		switch (action) {
@@ -245,50 +243,6 @@ static int pseries_cpuidle_driver_init(void)
 	return 0;
 }
 
-/* pseries_idle_devices_uninit(void)
- * unregister cpuidle devices and de-allocate memory
- */
-static void pseries_idle_devices_uninit(void)
-{
-	int i;
-	struct cpuidle_device *dev;
-
-	for_each_possible_cpu(i) {
-		dev = per_cpu_ptr(pseries_cpuidle_devices, i);
-		cpuidle_unregister_device(dev);
-	}
-
-	free_percpu(pseries_cpuidle_devices);
-	return;
-}
-
-/* pseries_idle_devices_init()
- * allocate, initialize and register cpuidle device
- */
-static int pseries_idle_devices_init(void)
-{
-	int i;
-	struct cpuidle_driver *drv = &pseries_idle_driver;
-	struct cpuidle_device *dev;
-
-	pseries_cpuidle_devices = alloc_percpu(struct cpuidle_device);
-	if (pseries_cpuidle_devices == NULL)
-		return -ENOMEM;
-
-	for_each_possible_cpu(i) {
-		dev = per_cpu_ptr(pseries_cpuidle_devices, i);
-		dev->state_count = drv->state_count;
-		dev->cpu = i;
-		if (cpuidle_register_device(dev)) {
-			printk(KERN_DEBUG \
-				"cpuidle_register_device %d failed!\n", i);
-			return -EIO;
-		}
-	}
-
-	return 0;
-}
-
 /*
  * pseries_idle_probe()
  * Choose state table for shared versus dedicated partition
@@ -296,9 +250,6 @@ static int pseries_idle_devices_init(void)
 static int pseries_idle_probe(void)
 {
 
-	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
-		return -ENODEV;
-
 	if (cpuidle_disable != IDLE_NO_OVERRIDE)
 		return -ENODEV;
 
@@ -307,10 +258,13 @@ static int pseries_idle_probe(void)
 		return -EPERM;
 	}
 
-	if (lppaca_shared_proc(get_lppaca()))
-		cpuidle_state_table = shared_states;
-	else
-		cpuidle_state_table = dedicated_states;
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		if (lppaca_shared_proc(get_lppaca()))
+			cpuidle_state_table = shared_states;
+		else
+			cpuidle_state_table = dedicated_states;
+	} else
+		return -ENODEV;
 
 	return 0;
 }
@@ -324,22 +278,14 @@ static int __init pseries_processor_idle_init(void)
 		return retval;
 
 	pseries_cpuidle_driver_init();
-	retval = cpuidle_register_driver(&pseries_idle_driver);
+	retval = cpuidle_register(&pseries_idle_driver, NULL);
 	if (retval) {
 		printk(KERN_DEBUG "Registration of pseries driver failed.\n");
 		return retval;
 	}
 
-	retval = pseries_idle_devices_init();
-	if (retval) {
-		pseries_idle_devices_uninit();
-		cpuidle_unregister_driver(&pseries_idle_driver);
-		return retval;
-	}
-
 	register_cpu_notifier(&setup_hotplug_notifier);
 	printk(KERN_DEBUG "pseries_idle_driver registered\n");
-
 	return 0;
 }
 
@@ -347,9 +293,7 @@ static void __exit pseries_processor_idle_exit(void)
 {
 
 	unregister_cpu_notifier(&setup_hotplug_notifier);
-	pseries_idle_devices_uninit();
-	cpuidle_unregister_driver(&pseries_idle_driver);
-
+	cpuidle_unregister(&pseries_idle_driver);
 	return;
 }
 

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox