LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 8/9] powerpc/powernv: Add /proc/powerpc/eeh_inf_err
From: Gavin Shan @ 2014-02-25  5:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1393306670-17435-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch adds /proc/powerpc/eeh_inf_err to count the INF errors
happened on PHBs as Ben suggested.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |   51 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci.h      |    1 +
 2 files changed, 52 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index cd06c52..3ddd706 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -20,6 +20,7 @@
 #include <linux/msi.h>
 #include <linux/notifier.h>
 #include <linux/pci.h>
+#include <linux/proc_fs.h>
 #include <linux/string.h>
 
 #include <asm/eeh.h>
@@ -35,6 +36,8 @@
 #include "powernv.h"
 #include "pci.h"
 
+static u64 ioda_eeh_ioc_inf_err = 0;
+static int ioda_eeh_proc_init = 0;
 static int ioda_eeh_nb_init = 0;
 
 static int ioda_eeh_event(struct notifier_block *nb,
@@ -114,6 +117,44 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
 			ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
 #endif /* CONFIG_DEBUG_FS */
 
+#ifdef CONFIG_PROC_FS
+static int ioda_eeh_proc_show(struct seq_file *m, void *v)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+
+	if (!eeh_enabled()) {
+                seq_printf(m, "EEH Subsystem disabled\n");
+		return 0;
+	}
+
+	seq_printf(m, "EEH Subsystem enabled\n");
+	if (ioda_eeh_ioc_inf_err > 0)
+		seq_printf(m, "\nIOC INF Errors: %llu\n\n",
+			   ioda_eeh_ioc_inf_err);
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+		seq_printf(m, "PHB#%d INF Errors: %llu\n",
+			   hose->global_number, phb->inf_err);
+	}
+
+	return 0;
+}
+
+static int ioda_eeh_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, ioda_eeh_proc_show, NULL);
+}
+
+static const struct file_operations ioda_eeh_proc_ops = {
+	.open		= ioda_eeh_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif /* CONFIG_PROC_FS */
+
 static void ioda_eeh_phb_diag(struct pci_controller *hose, char *buf)
 {
 	struct pnv_phb *phb = hose->private_data;
@@ -170,6 +211,14 @@ static int ioda_eeh_post_init(struct pci_controller *hose)
 	}
 #endif
 
+#ifdef CONFIG_PROC_FS
+	if (!ioda_eeh_proc_init) {
+		ioda_eeh_proc_init = 1;
+		proc_create("powerpc/eeh_inf_err", 0,
+			    NULL, &ioda_eeh_proc_ops);
+	}
+#endif
+
 	phb->flags |= PNV_PHB_FLAG_EEH;
 
 	return 0;
@@ -755,6 +804,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 			} else if (severity == OPAL_EEH_SEV_INF) {
 				pr_info("EEH: IOC informative error "
 					"detected\n");
+				ioda_eeh_ioc_inf_err++;
 				ioda_eeh_hub_diag(hose);
 				ret = EEH_NEXT_ERR_NONE;
 			}
@@ -775,6 +825,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 				pr_info("EEH: PHB#%x informative error "
 					"detected\n",
 					hose->global_number);
+				phb->inf_err++;
 				ioda_eeh_phb_diag(hose, phb->diag.blob);
 				ret = EEH_NEXT_ERR_NONE;
 			}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 3645fc4..64ca719 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -97,6 +97,7 @@ struct pnv_phb {
 	spinlock_t		lock;
 
 #ifdef CONFIG_EEH
+	u64			inf_err;
 	struct pnv_eeh_ops	*eeh_ops;
 #endif
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 9/9] powerpc/powernv: Refactor PHB diag-data dump
From: Gavin Shan @ 2014-02-25  5:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1393306670-17435-1-git-send-email-shangw@linux.vnet.ibm.com>

As Ben suggested, the patch prints PHB diag-data with multiple
fields in one line and omits the line if the fields of that
line are all zero.

With the patch applied, the PHB3 diag-data dump looks like:

PHB3 PHB#3 Diag-data (Version: 1)

  brdgCtl:     00000002
  RootSts:     0000000f 00400000 b0830008 00100147 00002000
  nFir:        0000000000000000 0030006e00000000 0000000000000000
  PhbSts:      0000001c00000000 0000000000000000
  Lem:         0000000000100000 42498e327f502eae 0000000000000000
  InAErr:      8000000000000000 8000000000000000 0402030000000000 \
               0000000000000000
  PE[  8] A/B: 8480002b00000000 8000000000000000

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci.c |  220 +++++++++++++++++++---------------
 1 file changed, 125 insertions(+), 95 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 3955fc0..114e1a7 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -134,57 +134,72 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
 	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
 		hose->global_number, common->version);
 
-	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
-	pr_info("  slotStatus:           %08x\n", data->slotStatus);
-	pr_info("  linkStatus:           %08x\n", data->linkStatus);
-	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
-	pr_info("  sourceId:             %08x\n", data->sourceId);
-	pr_info("  errorClass:           %016llx\n", data->errorClass);
-	pr_info("  correlator:           %016llx\n", data->correlator);
-	pr_info("  p7iocPlssr:           %016llx\n", data->p7iocPlssr);
-	pr_info("  p7iocCsr:             %016llx\n", data->p7iocCsr);
-	pr_info("  lemFir:               %016llx\n", data->lemFir);
-	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+	if (data->brdgCtl)
+		pr_info("  brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("  UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("  RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->p7iocPlssr || data->p7iocCsr)
+		pr_info("  PhbSts:      %016llx %016llx\n",
+			data->p7iocPlssr, data->p7iocCsr);
+	if (data->lemFir || data->lemErrorMask ||
+	    data->lemWOF)
+		pr_info("  Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
+	    data->phbErrorLog0   || data->phbErrorLog1)
+		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
+	    data->mmioErrorLog0   || data->mmioErrorLog1)
+		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
+	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
+		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
+	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
+		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
@@ -197,62 +212,77 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
 	data = (struct OpalIoPhb3ErrorData*)common;
 	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
 		hose->global_number, common->version);
-
-	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
-	pr_info("  slotStatus:           %08x\n", data->slotStatus);
-	pr_info("  linkStatus:           %08x\n", data->linkStatus);
-	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
-	pr_info("  sourceId:             %08x\n", data->sourceId);
-	pr_info("  errorClass:           %016llx\n", data->errorClass);
-	pr_info("  correlator:           %016llx\n", data->correlator);
-
-	pr_info("  nFir:                 %016llx\n", data->nFir);
-	pr_info("  nFirMask:             %016llx\n", data->nFirMask);
-	pr_info("  nFirWOF:              %016llx\n", data->nFirWOF);
-	pr_info("  PhbPlssr:             %016llx\n", data->phbPlssr);
-	pr_info("  PhbCsr:               %016llx\n", data->phbCsr);
-	pr_info("  lemFir:               %016llx\n", data->lemFir);
-	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+	if (data->brdgCtl)
+		pr_info("  brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("  UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("  RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->nFir || data->nFirMask ||
+	    data->nFirWOF)
+		pr_info("  nFir:        %016llx %016llx %016llx\n",
+			data->nFir, data->nFirMask,
+			data->nFirWOF);
+	if (data->phbPlssr || data->phbCsr)
+		pr_info("  PhbSts:      %016llx %016llx\n",
+			data->phbPlssr, data->phbCsr);
+	if (data->lemFir || data->lemErrorMask ||
+	    data->lemWOF)
+		pr_info("  Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
+	    data->phbErrorLog0   || data->phbErrorLog1)
+		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
+	    data->mmioErrorLog0   || data->mmioErrorLog1)
+		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
+	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
+		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
+	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
+		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 5/9] powerpc/eeh: Introduce eeh_ops->event()
From: Gavin Shan @ 2014-02-25  5:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1393306670-17435-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch introduces eeh_ops->event() so that we can pass various
events to underly platform. One reason to have that is to allocate
or free PHB diag-data for individual PEs on PowerNV platform in
future when EEH core to create or destroy PE instances.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h |    6 ++++++
 arch/powerpc/kernel/eeh_pe.c   |   14 ++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index a61b06f..8fd1c2d 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -71,6 +71,7 @@ struct eeh_pe {
 	struct list_head child_list;	/* Link PE to the child list	*/
 	struct list_head edevs;		/* Link list of EEH devices	*/
 	struct list_head child;		/* Child PEs			*/
+	void *data;			/* Platform dependent data	*/
 };
 
 #define eeh_pe_for_each_dev(pe, edev, tmp) \
@@ -151,6 +152,10 @@ enum {
 #define EEH_LOG_TEMP		1	/* EEH temporary error log	*/
 #define EEH_LOG_PERM		2	/* EEH permanent error log	*/
 
+/* EEH events sent to platform */
+#define EEH_EVENT_PE_ALLOC	0
+#define EEH_EVENT_PE_FREE	1
+
 struct eeh_ops {
 	char *name;
 	int (*init)(void);
@@ -168,6 +173,7 @@ struct eeh_ops {
 	int (*write_config)(struct device_node *dn, int where, int size, u32 val);
 	int (*next_error)(struct eeh_pe **pe);
 	int (*restore_config)(struct device_node *dn);
+	int (*event)(int event, void *data);
 };
 
 extern struct eeh_ops *eeh_ops;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 2add834..6cdc7a8 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -44,6 +44,7 @@ static LIST_HEAD(eeh_phb_pe);
 static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
 {
 	struct eeh_pe *pe;
+	int ret;
 
 	/* Allocate PHB PE */
 	pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
@@ -56,6 +57,16 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
 	INIT_LIST_HEAD(&pe->child);
 	INIT_LIST_HEAD(&pe->edevs);
 
+	if (eeh_ops->event) {
+		ret = eeh_ops->event(EEH_EVENT_PE_ALLOC, pe);
+		if (ret) {
+			pr_warn("%s: Can't alloc PE (%d)\n",
+				__func__, ret);
+			kfree(pe);
+			return NULL;
+		}
+	}
+
 	return pe;
 }
 
@@ -77,6 +88,9 @@ static void eeh_pe_free(struct eeh_pe *pe)
 		return;
 	}
 
+	if (eeh_ops->event)
+		eeh_ops->event(EEH_EVENT_PE_FREE, pe);
+
 	kfree(pe);
 }
 
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH 7/7] powerpc: Added PCI MSI support using the HSTA module
From: Alistair Popple @ 2014-02-25  5:54 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: devicetree, linuxppc-dev, linux-kernel, Arnd Bergmann
In-Reply-To: <1393015286.6771.110.camel@pasglop>

On Sat, 22 Feb 2014 07:41:26 Benjamin Herrenschmidt wrote:
> On Fri, 2014-02-21 at 15:33 +0100, Arnd Bergmann wrote:

[...]

> 
> Should we (provided it's possible in HW) create two ranges instead ? One
> covering RAM and one covering MSIs ? To avoid stray DMAs whacking random
> HW registers in the chip ...
> 

The thought occurred to me but I figured if we had stray DMAs then they could 
already whack random bits of system memory which would likely break your 
system anyway so I wasn't sure how much we'd gain. I guess whacking random HW 
registers is arguably a bit worse though.

I did a bit of digging into the HW documentation and it looks like it _may_ be 
possible to create a second range that would limit access to a subset of HW 
registers, although there doesn't seem to be much flexibility. Personally I'm 
not sure it justifies the work, but I'm happy to look into it a bit more if 
you feel it's important?

- Alistair

^ permalink raw reply

* [PATCH] powerpc/pci: Use of_pci_range_parser helper in pci_process_bridge_OF_ranges
From: Andrew Murray @ 2014-02-25  6:32 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: linux-pci, bhelgass, Andrew Murray

This patch updates the implementation of pci_process_bridge_OF_ranges to use
the of_pci_range_parser helpers.

Signed-off-by: Andrew Murray <amurray@embedded-bits.co.uk>
---
I've verified that this builds, however I have no hardware to test this.
---
 arch/powerpc/kernel/pci-common.c | 88 +++++++++++++---------------------------
 1 file changed, 29 insertions(+), 59 deletions(-)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index d9476c1..a05fe18 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -666,60 +666,36 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
 void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 				  struct device_node *dev, int primary)
 {
-	const __be32 *ranges;
-	int rlen;
-	int pna = of_n_addr_cells(dev);
-	int np = pna + 5;
 	int memno = 0;
-	u32 pci_space;
-	unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
 	struct resource *res;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
 
 	printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
 	       dev->full_name, primary ? "(primary)" : "");
 
-	/* Get ranges property */
-	ranges = of_get_property(dev, "ranges", &rlen);
-	if (ranges == NULL)
+	/* Check for ranges property */
+	if (of_pci_range_parser_init(&parser, dev))
 		return;
 
 	/* Parse it */
-	while ((rlen -= np * 4) >= 0) {
-		/* Read next ranges element */
-		pci_space = of_read_number(ranges, 1);
-		pci_addr = of_read_number(ranges + 1, 2);
-		cpu_addr = of_translate_address(dev, ranges + 3);
-		size = of_read_number(ranges + pna + 3, 2);
-		ranges += np;
-
+	for_each_of_pci_range(&parser, &range) {
 		/* If we failed translation or got a zero-sized region
 		 * (some FW try to feed us with non sensical zero sized regions
 		 * such as power3 which look like some kind of attempt at exposing
 		 * the VGA memory hole)
 		 */
-		if (cpu_addr == OF_BAD_ADDR || size == 0)
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
 			continue;
 
-		/* Now consume following elements while they are contiguous */
-		for (; rlen >= np * sizeof(u32);
-		     ranges += np, rlen -= np * 4) {
-			if (of_read_number(ranges, 1) != pci_space)
-				break;
-			pci_next = of_read_number(ranges + 1, 2);
-			cpu_next = of_translate_address(dev, ranges + 3);
-			if (pci_next != pci_addr + size ||
-			    cpu_next != cpu_addr + size)
-				break;
-			size += of_read_number(ranges + pna + 3, 2);
-		}
-
 		/* Act based on address space type */
 		res = NULL;
-		switch ((pci_space >> 24) & 0x3) {
-		case 1:		/* PCI IO space */
+		switch (range.flags & IORESOURCE_TYPE_BITS) {
+		case IORESOURCE_IO:
 			printk(KERN_INFO
 			       "  IO 0x%016llx..0x%016llx -> 0x%016llx\n",
-			       cpu_addr, cpu_addr + size - 1, pci_addr);
+			       range.cpu_addr, range.cpu_addr + range.size - 1,
+			       range.pci_addr);
 
 			/* We support only one IO range */
 			if (hose->pci_io_size) {
@@ -729,11 +705,12 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 			}
 #ifdef CONFIG_PPC32
 			/* On 32 bits, limit I/O space to 16MB */
-			if (size > 0x01000000)
-				size = 0x01000000;
+			if (range.size > 0x01000000)
+				range.size = 0x01000000;
 
 			/* 32 bits needs to map IOs here */
-			hose->io_base_virt = ioremap(cpu_addr, size);
+			hose->io_base_virt = ioremap(range.cpu_addr,
+						range.size);
 
 			/* Expect trouble if pci_addr is not 0 */
 			if (primary)
@@ -743,20 +720,20 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 			/* pci_io_size and io_base_phys always represent IO
 			 * space starting at 0 so we factor in pci_addr
 			 */
-			hose->pci_io_size = pci_addr + size;
-			hose->io_base_phys = cpu_addr - pci_addr;
+			hose->pci_io_size = range.pci_addr + range.size;
+			hose->io_base_phys = range.cpu_addr - range.pci_addr;
 
 			/* Build resource */
 			res = &hose->io_resource;
-			res->flags = IORESOURCE_IO;
-			res->start = pci_addr;
+			range.cpu_addr = range.pci_addr;
 			break;
-		case 2:		/* PCI Memory space */
-		case 3:		/* PCI 64 bits Memory space */
+		case IORESOURCE_MEM:
 			printk(KERN_INFO
 			       " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
-			       cpu_addr, cpu_addr + size - 1, pci_addr,
-			       (pci_space & 0x40000000) ? "Prefetch" : "");
+			       range.cpu_addr, range.cpu_addr + range.size - 1,
+			       range.pci_addr,
+			       (range.pci_space & 0x40000000) ?
+			       "Prefetch" : "");
 
 			/* We support only 3 memory ranges */
 			if (memno >= 3) {
@@ -765,28 +742,21 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 				continue;
 			}
 			/* Handles ISA memory hole space here */
-			if (pci_addr == 0) {
+			if (range.pci_addr == 0) {
 				if (primary || isa_mem_base == 0)
-					isa_mem_base = cpu_addr;
-				hose->isa_mem_phys = cpu_addr;
-				hose->isa_mem_size = size;
+					isa_mem_base = range.cpu_addr;
+				hose->isa_mem_phys = range.cpu_addr;
+				hose->isa_mem_size = range.size;
 			}
 
 			/* Build resource */
-			hose->mem_offset[memno] = cpu_addr - pci_addr;
+			hose->mem_offset[memno] = range.cpu_addr -
+							range.pci_addr;
 			res = &hose->mem_resources[memno++];
-			res->flags = IORESOURCE_MEM;
-			if (pci_space & 0x40000000)
-				res->flags |= IORESOURCE_PREFETCH;
-			res->start = cpu_addr;
 			break;
 		}
 		if (res != NULL) {
-			res->name = dev->full_name;
-			res->end = res->start + size - 1;
-			res->parent = NULL;
-			res->sibling = NULL;
-			res->child = NULL;
+			of_pci_range_to_resource(&range, dev, res);
 		}
 	}
 }
-- 
1.8.3.2

^ permalink raw reply related

* Re: [PATCH v2 0/9] EEH improvement
From: Gavin Shan @ 2014-02-25  7:26 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linuxppc-dev
In-Reply-To: <1393306670-17435-1-git-send-email-shangw@linux.vnet.ibm.com>

On Tue, Feb 25, 2014 at 01:37:41PM +0800, Gavin Shan wrote:
>The series of patches intends to improve reliability of EEH on PowerNV
>platform. First all, we have had multiple duplicate states (flags) for
>PHB and PE, so we remove those duplicate states to simplify the code.
>Besides, we had corrupted PHB diag-data for case of frozen PE. In order
>to solve the problem, we introduce eeh_ops->event() and notifications
>are sent from EEH core to (PowerNV) platform on creating or destroying
>PE instance so that we can allocate or free PHB diag-data backend. Then
>we cache the PHB diag-data on the first call to eeh_ops->get_state()
>and dump it afterwards, which helps to get correct PHB diag-data.
>
>With the patchset applied, we never dump PHB diag-data for INF errors.
>Instead, we just maintain statistics in /proc/powerpc/eeh_inf_err. Also,
>we changed the PHB diag-data dump format for a bit to have multiple
>fields per line and omits the line with all zero'd fields as Ben suggested.
>
>
>v1 -> v2:
>	* Amending commit logs
>	* Support eeh_ops->event() and maintain PHB diag-data on basis
>	  of PE instance
>	* When dumping PHB diag-data, to replace "-" with "00000000" and
>	  omit the line if the fields of it are all zeros.
>

Please ignore this and I'm going to send out v3 where we just
grab and dump the PHB diag-data (without cache any more) as
Ben suggested :-)

Thanks,
Gavin

>---
>
>arch/powerpc/include/asm/eeh.h               |    7 ++-
>arch/powerpc/kernel/eeh.c                    |   10 +---
>arch/powerpc/kernel/eeh_driver.c             |   10 ++--
>arch/powerpc/kernel/eeh_pe.c                 |   39 ++++++++++++-
>arch/powerpc/platforms/powernv/eeh-ioda.c    |  193 ++++++++++++++++++++++++++++++++++++-------------------------
>arch/powerpc/platforms/powernv/eeh-powernv.c |   74 +++++++++++++++++++-----
>arch/powerpc/platforms/powernv/pci.c         |  228 +++++++++++++++++++++++++++++++++++++++++-------------------------
>arch/powerpc/platforms/powernv/pci.h         |   11 ++--
>arch/powerpc/platforms/pseries/eeh_pseries.c |    3 +-
>9 files changed, 358 insertions(+), 217 deletions(-)
>
>Thanks,
>Gavin
>

^ permalink raw reply

* [PATCH 1/5] powerpc/eeh: Remove EEH_PE_PHB_DEAD
From: Gavin Shan @ 2014-02-25  7:28 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1393313318-6341-1-git-send-email-shangw@linux.vnet.ibm.com>

The PE state (for eeh_pe instance) EEH_PE_PHB_DEAD is duplicate to
EEH_PE_ISOLATED. Originally, those PHBs (PHB PE) with EEH_PE_PHB_DEAD
would be removed from the system. However, it's safe to replace
that with EEH_PE_ISOLATED.

The patch also clear EEH_PE_RECOVERING after fenced PHB has been handled,
either failure or success. It makes the PHB PE state consistent with:

	PHB functions normally		  NONE
	PHB has been removed		  EEH_PE_ISOLATED
	PHB fenced, recovery in progress  EEH_PE_ISOLATED | RECOVERING

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h   |    1 -
 arch/powerpc/kernel/eeh.c        |   10 ++--------
 arch/powerpc/kernel/eeh_driver.c |   10 +++++-----
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d4dd41f..a61b06f 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -53,7 +53,6 @@ struct device_node;
 
 #define EEH_PE_ISOLATED		(1 << 0)	/* Isolated PE		*/
 #define EEH_PE_RECOVERING	(1 << 1)	/* Recovering PE	*/
-#define EEH_PE_PHB_DEAD		(1 << 2)	/* Dead PHB		*/
 
 #define EEH_PE_KEEP		(1 << 8)	/* Keep PE on hotplug	*/
 
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index e7b76a6..f167676 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -232,7 +232,6 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
 	size_t loglen = 0;
 	struct eeh_dev *edev, *tmp;
-	bool valid_cfg_log = true;
 
 	/*
 	 * When the PHB is fenced or dead, it's pointless to collect
@@ -240,12 +239,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 	 * 0xFF's. For ER, we still retrieve the data from the PCI
 	 * config space.
 	 */
-	if (eeh_probe_mode_dev() &&
-	    (pe->type & EEH_PE_PHB) &&
-	    (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)))
-		valid_cfg_log = false;
-
-	if (valid_cfg_log) {
+	if (!(pe->type & EEH_PE_PHB)) {
 		eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 		eeh_ops->configure_bridge(pe);
 		eeh_pe_restore_bars(pe);
@@ -309,7 +303,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
 
 	/* If the PHB has been in problematic state */
 	eeh_serialize_lock(&flags);
-	if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) {
+	if (phb_pe->state & EEH_PE_ISOLATED) {
 		ret = 0;
 		goto out;
 	}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index fdc679d..4cf0467 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -665,8 +665,7 @@ static void eeh_handle_special_event(void)
 				phb_pe = eeh_phb_pe_get(hose);
 				if (!phb_pe) continue;
 
-				eeh_pe_state_mark(phb_pe,
-					EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+				eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
 			}
 
 			eeh_serialize_unlock(flags);
@@ -682,8 +681,7 @@ static void eeh_handle_special_event(void)
 			eeh_remove_event(pe);
 
 			if (rc == EEH_NEXT_ERR_DEAD_PHB)
-				eeh_pe_state_mark(pe,
-					EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+				eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 			else
 				eeh_pe_state_mark(pe,
 					EEH_PE_ISOLATED | EEH_PE_RECOVERING);
@@ -707,12 +705,14 @@ static void eeh_handle_special_event(void)
 		if (rc == EEH_NEXT_ERR_FROZEN_PE ||
 		    rc == EEH_NEXT_ERR_FENCED_PHB) {
 			eeh_handle_normal_event(pe);
+			eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 		} else {
 			pci_lock_rescan_remove();
 			list_for_each_entry(hose, &hose_list, list_node) {
 				phb_pe = eeh_phb_pe_get(hose);
 				if (!phb_pe ||
-				    !(phb_pe->state & EEH_PE_PHB_DEAD))
+				    !(phb_pe->state & EEH_PE_ISOLATED) ||
+				    (phb_pe->state & EEH_PE_RECOVERING))
 					continue;
 
 				/* Notify all devices to be down */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH v3 0/5] EEH improvement
From: Gavin Shan @ 2014-02-25  7:28 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The series of patches intends to improve reliability of EEH on PowerNV
platform. First all, we have had multiple duplicate states (flags) for
PHB and PE, so we remove those duplicate states to simplify the code.
Besides, we had corrupted PHB diag-data for case of frozen PE. In order
to solve the problem, we introduce eeh_ops->event() and notifications
are sent from EEH core to (PowerNV) platform on creating or destroying
PE instance so that we can allocate or free PHB diag-data backend. Then
we cache the PHB diag-data on the first call to eeh_ops->get_state()
and dump it afterwards, which helps to get correct PHB diag-data.

With the patchset applied, we never dump PHB diag-data for INF errors.
Instead, we just maintain statistics in /proc/powerpc/eeh_inf_err. Also,
we changed the PHB diag-data dump format for a bit to have multiple
fields per line and omits the line with all zero'd fields as Ben suggested.

v2 -> v3:
	* We don't cache the PHB diag-data, instead we just grab and
	  dump PHB diag-data on the first catch-up to avoid broken
	  PHB diag-data.
v1 -> v2:
	* Amending commit logs
	* Support eeh_ops->event() and maintain PHB diag-data on basis
	  of PE instance
	* When dumping PHB diag-data, to replace "-" with "00000000" and
	  omit the line if the fields of it are all zeros.

---

arch/powerpc/include/asm/eeh.h            |    1 -
arch/powerpc/kernel/eeh.c                 |   10 +---
arch/powerpc/kernel/eeh_driver.c          |   10 ++--
arch/powerpc/platforms/powernv/eeh-ioda.c |  137 ++++++++++++++++++++--------------------------
arch/powerpc/platforms/powernv/pci.c      |  228 ++++++++++++++++++++++++++++++++++++++++++---------------------------
arch/powerpc/platforms/powernv/pci.h      |    8 +--
6 files changed, 195 insertions(+), 199 deletions(-)

Thanks,
Gavin

^ permalink raw reply

* [PATCH 2/5] powerpc/powernv: Remove PNV_EEH_STATE_REMOVED
From: Gavin Shan @ 2014-02-25  7:28 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1393313318-6341-1-git-send-email-shangw@linux.vnet.ibm.com>

The PHB state PNV_EEH_STATE_REMOVED maintained in pnv_phb isn't
so useful any more and it's duplicated to EEH_PE_ISOLATED. The
patch replaces PNV_EEH_STATE_REMOVED with EEH_PE_ISOLATED.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |   56 ++++++++---------------------
 arch/powerpc/platforms/powernv/pci.h      |    1 -
 2 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index f514743..0d1d424 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -662,22 +662,6 @@ static void ioda_eeh_phb_diag(struct pci_controller *hose)
 	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
 }
 
-static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
-			       struct eeh_pe **pe)
-{
-	struct eeh_pe *phb_pe;
-
-	phb_pe = eeh_phb_pe_get(hose);
-	if (!phb_pe) {
-		pr_warning("%s Can't find PE for PHB#%d\n",
-			   __func__, hose->global_number);
-		return -EEXIST;
-	}
-
-	*pe = phb_pe;
-	return 0;
-}
-
 static int ioda_eeh_get_pe(struct pci_controller *hose,
 			   u16 pe_no, struct eeh_pe **pe)
 {
@@ -685,7 +669,8 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
 	struct eeh_dev dev;
 
 	/* Find the PHB PE */
-	if (ioda_eeh_get_phb_pe(hose, &phb_pe))
+	phb_pe = eeh_phb_pe_get(hose);
+	if (!phb_pe)
 		return -EEXIST;
 
 	/* Find the PE according to PE# */
@@ -713,6 +698,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
+	struct eeh_pe *phb_pe;
 	u64 frozen_pe_no;
 	u16 err_type, severity;
 	long rc;
@@ -729,10 +715,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 	list_for_each_entry(hose, &hose_list, list_node) {
 		/*
 		 * If the subordinate PCI buses of the PHB has been
-		 * removed, we needn't take care of it any more.
+		 * removed or is exactly under error recovery, we
+		 * needn't take care of it any more.
 		 */
 		phb = hose->private_data;
-		if (phb->eeh_state & PNV_EEH_STATE_REMOVED)
+		phb_pe = eeh_phb_pe_get(hose);
+		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
 			continue;
 
 		rc = opal_pci_next_error(phb->opal_id,
@@ -765,12 +753,6 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 		switch (err_type) {
 		case OPAL_EEH_IOC_ERROR:
 			if (severity == OPAL_EEH_SEV_IOC_DEAD) {
-				list_for_each_entry(hose, &hose_list,
-						    list_node) {
-					phb = hose->private_data;
-					phb->eeh_state |= PNV_EEH_STATE_REMOVED;
-				}
-
 				pr_err("EEH: dead IOC detected\n");
 				ret = EEH_NEXT_ERR_DEAD_IOC;
 			} else if (severity == OPAL_EEH_SEV_INF) {
@@ -783,17 +765,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 			break;
 		case OPAL_EEH_PHB_ERROR:
 			if (severity == OPAL_EEH_SEV_PHB_DEAD) {
-				if (ioda_eeh_get_phb_pe(hose, pe))
-					break;
-
+				*pe = phb_pe;
 				pr_err("EEH: dead PHB#%x detected\n",
 					hose->global_number);
-				phb->eeh_state |= PNV_EEH_STATE_REMOVED;
 				ret = EEH_NEXT_ERR_DEAD_PHB;
 			} else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
-				if (ioda_eeh_get_phb_pe(hose, pe))
-					break;
-
+				*pe = phb_pe;
 				pr_err("EEH: fenced PHB#%x detected\n",
 					hose->global_number);
 				ret = EEH_NEXT_ERR_FENCED_PHB;
@@ -813,15 +790,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 			 * fenced PHB so that it can be recovered.
 			 */
 			if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) {
-				if (!ioda_eeh_get_phb_pe(hose, pe)) {
-					pr_err("EEH: Escalated fenced PHB#%x "
-					       "detected for PE#%llx\n",
-						hose->global_number,
-						frozen_pe_no);
-					ret = EEH_NEXT_ERR_FENCED_PHB;
-				} else {
-					ret = EEH_NEXT_ERR_NONE;
-				}
+				*pe = phb_pe;
+				pr_err("EEH: Escalated fenced PHB#%x "
+				       "detected for PE#%llx\n",
+					hose->global_number,
+					frozen_pe_no);
+				ret = EEH_NEXT_ERR_FENCED_PHB;
 			} else {
 				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
 					(*pe)->addr, (*pe)->phb->global_number);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index cde1694..6870f60 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -83,7 +83,6 @@ struct pnv_eeh_ops {
 };
 
 #define PNV_EEH_STATE_ENABLED	(1 << 0)	/* EEH enabled	*/
-#define PNV_EEH_STATE_REMOVED	(1 << 1)	/* PHB removed	*/
 
 #endif /* CONFIG_EEH */
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 3/5] powerpc/powernv: Move PNV_EEH_STATE_ENABLED around
From: Gavin Shan @ 2014-02-25  7:28 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1393313318-6341-1-git-send-email-shangw@linux.vnet.ibm.com>

The flag PNV_EEH_STATE_ENABLED is put into pnv_phb::eeh_state,
which is protected by CONFIG_EEH. We needn't that. Instead, we
can have pnv_phb::flags and maintain all flags there, which is
the purpose of the patch. The patch also renames PNV_EEH_STATE_ENABLED
to PNV_PHB_FLAG_EEH.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |    2 +-
 arch/powerpc/platforms/powernv/pci.c      |    8 ++------
 arch/powerpc/platforms/powernv/pci.h      |    7 +++----
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 0d1d424..04b4710 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -153,7 +153,7 @@ static int ioda_eeh_post_init(struct pci_controller *hose)
 	}
 #endif
 
-	phb->eeh_state |= PNV_EEH_STATE_ENABLED;
+	phb->flags |= PNV_PHB_FLAG_EEH;
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 95633d7..3955fc0 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -396,7 +396,7 @@ int pnv_pci_cfg_read(struct device_node *dn,
 	if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
 		return PCIBIOS_SUCCESSFUL;
 
-	if (phb->eeh_state & PNV_EEH_STATE_ENABLED) {
+	if (phb->flags & PNV_PHB_FLAG_EEH) {
 		if (*val == EEH_IO_ERROR_VALUE(size) &&
 		    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
 			return PCIBIOS_DEVICE_NOT_FOUND;
@@ -434,12 +434,8 @@ int pnv_pci_cfg_write(struct device_node *dn,
 	}
 
 	/* Check if the PHB got frozen due to an error (no response) */
-#ifdef CONFIG_EEH
-	if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED))
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
 		pnv_pci_config_check_eeh(phb, dn);
-#else
-	pnv_pci_config_check_eeh(phb, dn);
-#endif
 
 	return PCIBIOS_SUCCESSFUL;
 }
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 6870f60..94e3495 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -81,24 +81,23 @@ struct pnv_eeh_ops {
 	int (*configure_bridge)(struct eeh_pe *pe);
 	int (*next_error)(struct eeh_pe **pe);
 };
-
-#define PNV_EEH_STATE_ENABLED	(1 << 0)	/* EEH enabled	*/
-
 #endif /* CONFIG_EEH */
 
+#define PNV_PHB_FLAG_EEH	(1 << 0)
+
 struct pnv_phb {
 	struct pci_controller	*hose;
 	enum pnv_phb_type	type;
 	enum pnv_phb_model	model;
 	u64			hub_id;
 	u64			opal_id;
+	int			flags;
 	void __iomem		*regs;
 	int			initialized;
 	spinlock_t		lock;
 
 #ifdef CONFIG_EEH
 	struct pnv_eeh_ops	*eeh_ops;
-	int			eeh_state;
 #endif
 
 #ifdef CONFIG_DEBUG_FS
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 4/5] powerpc/powernv: Dump PHB diag-data immediately
From: Gavin Shan @ 2014-02-25  7:28 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1393313318-6341-1-git-send-email-shangw@linux.vnet.ibm.com>

The PHB diag-data is useful to help locating the root cause for
frozen PE or fenced PHB. However, EEH core enables IO path by clearing
part of HW registers before collecting it and eventually we got broken
PHB diag-data.

The patch intends to fix it by dumping the PHB diag-data immediately
when frozen/fenced state on PE or PHB is detected for the first time
in eeh_ops::get_state() or next_error() backend.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |   79 +++++++++++++++--------------
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 04b4710..6dba684 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -114,6 +114,22 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
 			ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
 #endif /* CONFIG_DEBUG_FS */
 
+static void ioda_eeh_phb_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	long rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n",
+			__func__, hose->global_number, rc);
+		return;
+	}
+
+	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
+}
+
 /**
  * ioda_eeh_post_init - Chip dependent post initialization
  * @hose: PCI controller
@@ -272,6 +288,9 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
 			result |= EEH_STATE_DMA_ACTIVE;
 			result |= EEH_STATE_MMIO_ENABLED;
 			result |= EEH_STATE_DMA_ENABLED;
+		} else if (!(pe->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
 		}
 
 		return result;
@@ -315,6 +334,15 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
 			   __func__, fstate, hose->global_number, pe_no);
 	}
 
+	/* Dump PHB diag-data for frozen PE */
+	if (result != EEH_STATE_NOT_SUPPORT &&
+	    (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
+	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		ioda_eeh_phb_diag(hose);
+	}
+
 	return result;
 }
 
@@ -541,27 +569,6 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
 			    char *drv_log, unsigned long len)
 {
-	s64 ret;
-	unsigned long flags;
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-
-	spin_lock_irqsave(&phb->lock, flags);
-
-	ret = opal_pci_get_phb_diag_data2(phb->opal_id,
-			phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
-	if (ret) {
-		spin_unlock_irqrestore(&phb->lock, flags);
-		pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n",
-			   __func__, hose->global_number, pe->addr, ret);
-		return -EIO;
-	}
-
-	/* The PHB diag-data is always indicative */
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
-
-	spin_unlock_irqrestore(&phb->lock, flags);
-
 	return 0;
 }
 
@@ -646,22 +653,6 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
 	}
 }
 
-static void ioda_eeh_phb_diag(struct pci_controller *hose)
-{
-	struct pnv_phb *phb = hose->private_data;
-	long rc;
-
-	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
-					 PNV_PCI_DIAG_BUF_SIZE);
-	if (rc != OPAL_SUCCESS) {
-		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
-			    __func__, hose->global_number, rc);
-		return;
-	}
-
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
-}
-
 static int ioda_eeh_get_pe(struct pci_controller *hose,
 			   u16 pe_no, struct eeh_pe **pe)
 {
@@ -809,6 +800,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 		}
 
 		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
+		}
+
+		/*
 		 * If we have no errors on the specific PHB or only
 		 * informative error there, we continue poking it.
 		 * Otherwise, we need actions to be taken by upper
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 5/5] powerpc/powernv: Refactor PHB diag-data dump
From: Gavin Shan @ 2014-02-25  7:28 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1393313318-6341-1-git-send-email-shangw@linux.vnet.ibm.com>

As Ben suggested, the patch prints PHB diag-data with multiple
fields in one line and omits the line if the fields of that
line are all zero.

With the patch applied, the PHB3 diag-data dump looks like:

PHB3 PHB#3 Diag-data (Version: 1)

  brdgCtl:     00000002
  RootSts:     0000000f 00400000 b0830008 00100147 00002000
  nFir:        0000000000000000 0030006e00000000 0000000000000000
  PhbSts:      0000001c00000000 0000000000000000
  Lem:         0000000000100000 42498e327f502eae 0000000000000000
  InAErr:      8000000000000000 8000000000000000 0402030000000000 \
               0000000000000000
  PE[  8] A/B: 8480002b00000000 8000000000000000

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci.c |  220 +++++++++++++++++++---------------
 1 file changed, 125 insertions(+), 95 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 3955fc0..114e1a7 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -134,57 +134,72 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
 	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
 		hose->global_number, common->version);
 
-	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
-	pr_info("  slotStatus:           %08x\n", data->slotStatus);
-	pr_info("  linkStatus:           %08x\n", data->linkStatus);
-	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
-	pr_info("  sourceId:             %08x\n", data->sourceId);
-	pr_info("  errorClass:           %016llx\n", data->errorClass);
-	pr_info("  correlator:           %016llx\n", data->correlator);
-	pr_info("  p7iocPlssr:           %016llx\n", data->p7iocPlssr);
-	pr_info("  p7iocCsr:             %016llx\n", data->p7iocCsr);
-	pr_info("  lemFir:               %016llx\n", data->lemFir);
-	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+	if (data->brdgCtl)
+		pr_info("  brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("  UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("  RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->p7iocPlssr || data->p7iocCsr)
+		pr_info("  PhbSts:      %016llx %016llx\n",
+			data->p7iocPlssr, data->p7iocCsr);
+	if (data->lemFir || data->lemErrorMask ||
+	    data->lemWOF)
+		pr_info("  Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
+	    data->phbErrorLog0   || data->phbErrorLog1)
+		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
+	    data->mmioErrorLog0   || data->mmioErrorLog1)
+		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
+	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
+		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
+	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
+		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
@@ -197,62 +212,77 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
 	data = (struct OpalIoPhb3ErrorData*)common;
 	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
 		hose->global_number, common->version);
-
-	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
-	pr_info("  slotStatus:           %08x\n", data->slotStatus);
-	pr_info("  linkStatus:           %08x\n", data->linkStatus);
-	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
-	pr_info("  sourceId:             %08x\n", data->sourceId);
-	pr_info("  errorClass:           %016llx\n", data->errorClass);
-	pr_info("  correlator:           %016llx\n", data->correlator);
-
-	pr_info("  nFir:                 %016llx\n", data->nFir);
-	pr_info("  nFirMask:             %016llx\n", data->nFirMask);
-	pr_info("  nFirWOF:              %016llx\n", data->nFirWOF);
-	pr_info("  PhbPlssr:             %016llx\n", data->phbPlssr);
-	pr_info("  PhbCsr:               %016llx\n", data->phbCsr);
-	pr_info("  lemFir:               %016llx\n", data->lemFir);
-	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+	if (data->brdgCtl)
+		pr_info("  brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("  UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("  RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->nFir || data->nFirMask ||
+	    data->nFirWOF)
+		pr_info("  nFir:        %016llx %016llx %016llx\n",
+			data->nFir, data->nFirMask,
+			data->nFirWOF);
+	if (data->phbPlssr || data->phbCsr)
+		pr_info("  PhbSts:      %016llx %016llx\n",
+			data->phbPlssr, data->phbCsr);
+	if (data->lemFir || data->lemErrorMask ||
+	    data->lemWOF)
+		pr_info("  Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
+	    data->phbErrorLog0   || data->phbErrorLog1)
+		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
+	    data->mmioErrorLog0   || data->mmioErrorLog1)
+		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
+	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
+		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
+	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
+		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH] powerpc: warn users of smt-snooze-delay that the API isn't there anymore
From: Deepthi Dharwar @ 2014-02-25  7:59 UTC (permalink / raw)
  To: Cody P Schafer
  Cc: Madhavan Srinivasan, Wang Dongsheng, linux-kernel, Paul Gortmaker,
	Paul Mackerras, Olof Johansson, linuxppc-dev
In-Reply-To: <1393028074-26797-1-git-send-email-cody@linux.vnet.ibm.com>

On 02/22/2014 05:44 AM, Cody P Schafer wrote:
> /sys/devices/system/cpu/cpu*/smt-snooze-delay was converted into a NOP
> in commit 3fa8cad82b94d0bed002571bd246f2299ffc876b, and now does
> nothing. Add a pr_warn() to convince any users that they should stop
> using it.
> 
> The commit message from the removing commit notes that this
> functionality should move into the cpuidle driver, essentially by
> adjusting target_residency to the specified value. At the moment,
> target_residency is not exposed by cpuidle's sysfs, so there isn't a
> drop in replacement for this.
> 
> Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>



smt-snooze-delay was used to delay an entry into NAP state
or disable NAP state completely. This was before we adopted cpuidle
framework for idle state management on powerpc. This is per-cpu based
tunable, where we could have cores with  different target residencies
and idle states.

Now that we have moved towards cpuidle framework, which provides a
better way of idle state management and this framework expects a single
target residency for all the cpus. We can no longer honour
smt-snooze-delay functionality of providing per-cpu target residency.
This was badly broken in the kernel before the patch to clean it up.
By removing this we would honour cpuidle framework through which we
carry out idle state management.

And generic cpuidle framework does not provide the flexibility to change
target residency on the go as there are multiple idle states supported
and trying to change target residency of one state (incorrectly) may
result in undefined behavior.

Also, the second functionality to disable/enable states can be done
using the cpuidle sysfs files. So this is functionality is preserved.

We currently do not use smt-snooze-delay in the kernel.
The sysfs entries needs to  be retained until we do a clean up ppc64_cpu
util that uses these entries to determine SMT,
clean up patch for this has already been posted out by Prerna.
Once, we have the ppc64_cpu changes in, we can look to clean up these
parts from the kernel.

Regards,
Deepthi





> ---
>  arch/powerpc/kernel/sysfs.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 97e1dc9..84097b4 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -50,6 +50,9 @@ static ssize_t store_smt_snooze_delay(struct device *dev,
>  	if (ret != 1)
>  		return -EINVAL;
>  
> +	pr_warn_ratelimited("%s (%d): /sys/devices/system/cpu/cpu%d/smt-snooze-delay is deprecated and is a NOP\n",
> +		  current->comm, task_pid_nr(current), cpu->dev.id);
> +
>  	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
>  	return count;
>  }
> @@ -60,6 +63,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
>  {
>  	struct cpu *cpu = container_of(dev, struct cpu, dev);
>  
> +	pr_warn_ratelimited("%s (%d): /sys/devices/system/cpu/cpu%d/smt-snooze-delay is deprecated and is a NOP\n",
> +		  current->comm, task_pid_nr(current), cpu->dev.id);
> +
>  	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
>  }
>  
> 

^ permalink raw reply

* [PATCH v2 0/1] audit: Add CONFIG_HAVE_ARCH_AUDITSYSCALL
From: AKASHI Takahiro @ 2014-02-25  9:16 UTC (permalink / raw)
  To: viro, eparis, rgb, arndb
  Cc: linux-s390, linaro-kernel, linux-ia64, user-mode-linux-devel,
	linux-parisc, linux-sh, catalin.marinas, x86, will.deacon,
	linux-kernel, AKASHI Takahiro, linux-alpha, dsaxena,
	user-mode-linux-user, linux-audit, sparclinux, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <1391407232-4623-1-git-send-email-takahiro.akashi@linaro.org>

Currently AUDITSYSCALL has a long list of architecture depencency:
       depends on AUDIT && (X86 || PARISC || PPC || S390 || IA64 || UML ||
                SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT) || ALPHA)
The purpose of this patch is to replace it with HAVE_ARCH_AUDITSYSCALL
for simplicity.

Changes v1 -> v2:
* rebased to 3.14-rcX, and so added a change on ALPHA

AKASHI Takahiro (1):
  audit: Add CONFIG_HAVE_ARCH_AUDITSYSCALL

 arch/alpha/Kconfig     |    1 +
 arch/arm/Kconfig       |    1 +
 arch/ia64/Kconfig      |    1 +
 arch/parisc/Kconfig    |    1 +
 arch/powerpc/Kconfig   |    1 +
 arch/s390/Kconfig      |    1 +
 arch/sh/Kconfig        |    1 +
 arch/sparc/Kconfig     |    1 +
 arch/um/Kconfig.common |    1 +
 arch/x86/Kconfig       |    1 +
 init/Kconfig           |    5 ++++-
 11 files changed, 14 insertions(+), 1 deletion(-)

-- 
1.7.9.5

^ permalink raw reply

* [PATCH v2 1/1] audit: Add CONFIG_HAVE_ARCH_AUDITSYSCALL
From: AKASHI Takahiro @ 2014-02-25  9:16 UTC (permalink / raw)
  To: viro, eparis, rgb, arndb
  Cc: linux-s390, linaro-kernel, linux-ia64, user-mode-linux-devel,
	linux-parisc, linux-sh, catalin.marinas, x86, will.deacon,
	linux-kernel, AKASHI Takahiro, linux-alpha, dsaxena,
	user-mode-linux-user, linux-audit, sparclinux, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <1393319784-2758-1-git-send-email-takahiro.akashi@linaro.org>

Currently AUDITSYSCALL has a long list of architecture depencency:
       depends on AUDIT && (X86 || PARISC || PPC || S390 || IA64 || UML ||
		SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT) || ALPHA)
The purpose of this patch is to replace it with HAVE_ARCH_AUDITSYSCALL
for simplicity.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/alpha/Kconfig     |    1 +
 arch/arm/Kconfig       |    1 +
 arch/ia64/Kconfig      |    1 +
 arch/parisc/Kconfig    |    1 +
 arch/powerpc/Kconfig   |    1 +
 arch/s390/Kconfig      |    1 +
 arch/sh/Kconfig        |    1 +
 arch/sparc/Kconfig     |    1 +
 arch/um/Kconfig.common |    1 +
 arch/x86/Kconfig       |    1 +
 init/Kconfig           |    5 ++++-
 11 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index f6c6b34..b7ff9a3 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -22,6 +22,7 @@ config ALPHA
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select ODD_RT_SIGACTION
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e254198..ca79340 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -24,6 +24,7 @@ config ARM
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HARDIRQS_SW_RESEND
+	select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 0c8e553..5409bf4 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -44,6 +44,7 @@ config IA64
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select HAVE_ARCH_AUDITSYSCALL
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index bb2a8ec..1faefed 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -28,6 +28,7 @@ config PARISC
 	select CLONE_BACKWARDS
 	select TTY # Needed for pdc_cons.c
 	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_ARCH_AUDITSYSCALL
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 957bf34..7b3b8fe 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,6 +141,7 @@ config PPC
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
 	select ARCH_USE_CMPXCHG_LOCKREF if PPC64
+	select HAVE_ARCH_AUDITSYSCALL
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 65a0775..1b58568 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -103,6 +103,7 @@ config S390
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6357710..4addd87 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -42,6 +42,7 @@ config SUPERH
 	select MODULES_USE_ELF_RELA
 	select OLD_SIGSUSPEND
 	select OLD_SIGACTION
+	select HAVE_ARCH_AUDITSYSCALL
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index c51efdc..9c74d6b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -77,6 +77,7 @@ config SPARC64
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select HAVE_C_RECORDMCOUNT
 	select NO_BOOTMEM
+	select HAVE_ARCH_AUDITSYSCALL
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index 21ca44c..6915d28 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -1,6 +1,7 @@
 config UML
 	bool
 	default y
+	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_UID16
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0af5250..2938365 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,6 +127,7 @@ config X86
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
 	select HAVE_CC_STACKPROTECTOR
+	select HAVE_ARCH_AUDITSYSCALL
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/init/Kconfig b/init/Kconfig
index 009a797..d4ec53d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -282,9 +282,12 @@ config AUDIT
 	  logging of avc messages output).  Does not do system-call
 	  auditing without CONFIG_AUDITSYSCALL.
 
+config HAVE_ARCH_AUDITSYSCALL
+	bool
+
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
-	depends on AUDIT && (X86 || PARISC || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT) || ALPHA)
+	depends on AUDIT && HAVE_ARCH_AUDITSYSCALL
 	default y if SECURITY_SELINUX
 	help
 	  Enable low-overhead system-call auditing infrastructure that
-- 
1.7.9.5

^ permalink raw reply related

* Re: [PATCH v2 02/11] perf core: export swevent hrtimer helpers
From: Peter Zijlstra @ 2014-02-25 10:20 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Cody P Schafer, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Linux PPC
In-Reply-To: <20140225033326.7BB942C0228@ozlabs.org>

On Tue, Feb 25, 2014 at 02:33:26PM +1100, Michael Ellerman wrote:
> On Fri, 2014-14-02 at 22:02:06 UTC, Cody P Schafer wrote:
> > Export the swevent hrtimer helpers currently only used in events/core.c
> > to allow the addition of architecture specific sw-like pmus.
> 
> Peter, Ingo, can we get your ACK on this please?

How are they used? I saw some usage in patch 9 or so; but its not
explained anywhere. All patches have non-existent Changelogs and the few
comments that are there are pretty hardware specific.

So please do tell; what do you need this for?

^ permalink raw reply

* Re: [PATCH] powerpc/pci: Use of_pci_range_parser helper in pci_process_bridge_OF_ranges
From: Benjamin Herrenschmidt @ 2014-02-25 13:25 UTC (permalink / raw)
  To: Andrew Murray; +Cc: linux-pci, bhelgass, linuxppc-dev
In-Reply-To: <1393309931-20405-1-git-send-email-amurray@embedded-bits.co.uk>

On Tue, 2014-02-25 at 06:32 +0000, Andrew Murray wrote:
> This patch updates the implementation of pci_process_bridge_OF_ranges to use
> the of_pci_range_parser helpers.
> 
> Signed-off-by: Andrew Murray <amurray@embedded-bits.co.uk>
> ---
> I've verified that this builds, however I have no hardware to test this.
> ---

Thanks. A cursory review looks good but I need to spend a bit more time
making sure our various special cases are handled properly.

It's tracked on patchwork so unless you have an update to the patch,
it won't be lost, but it might take a little while before I get to
actually merge it.

Cheers,
Ben.

>  arch/powerpc/kernel/pci-common.c | 88 +++++++++++++---------------------------
>  1 file changed, 29 insertions(+), 59 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
> index d9476c1..a05fe18 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -666,60 +666,36 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
>  void pci_process_bridge_OF_ranges(struct pci_controller *hose,
>  				  struct device_node *dev, int primary)
>  {
> -	const __be32 *ranges;
> -	int rlen;
> -	int pna = of_n_addr_cells(dev);
> -	int np = pna + 5;
>  	int memno = 0;
> -	u32 pci_space;
> -	unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
>  	struct resource *res;
> +	struct of_pci_range range;
> +	struct of_pci_range_parser parser;
>  
>  	printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
>  	       dev->full_name, primary ? "(primary)" : "");
>  
> -	/* Get ranges property */
> -	ranges = of_get_property(dev, "ranges", &rlen);
> -	if (ranges == NULL)
> +	/* Check for ranges property */
> +	if (of_pci_range_parser_init(&parser, dev))
>  		return;
>  
>  	/* Parse it */
> -	while ((rlen -= np * 4) >= 0) {
> -		/* Read next ranges element */
> -		pci_space = of_read_number(ranges, 1);
> -		pci_addr = of_read_number(ranges + 1, 2);
> -		cpu_addr = of_translate_address(dev, ranges + 3);
> -		size = of_read_number(ranges + pna + 3, 2);
> -		ranges += np;
> -
> +	for_each_of_pci_range(&parser, &range) {
>  		/* If we failed translation or got a zero-sized region
>  		 * (some FW try to feed us with non sensical zero sized regions
>  		 * such as power3 which look like some kind of attempt at exposing
>  		 * the VGA memory hole)
>  		 */
> -		if (cpu_addr == OF_BAD_ADDR || size == 0)
> +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
>  			continue;
>  
> -		/* Now consume following elements while they are contiguous */
> -		for (; rlen >= np * sizeof(u32);
> -		     ranges += np, rlen -= np * 4) {
> -			if (of_read_number(ranges, 1) != pci_space)
> -				break;
> -			pci_next = of_read_number(ranges + 1, 2);
> -			cpu_next = of_translate_address(dev, ranges + 3);
> -			if (pci_next != pci_addr + size ||
> -			    cpu_next != cpu_addr + size)
> -				break;
> -			size += of_read_number(ranges + pna + 3, 2);
> -		}
> -
>  		/* Act based on address space type */
>  		res = NULL;
> -		switch ((pci_space >> 24) & 0x3) {
> -		case 1:		/* PCI IO space */
> +		switch (range.flags & IORESOURCE_TYPE_BITS) {
> +		case IORESOURCE_IO:
>  			printk(KERN_INFO
>  			       "  IO 0x%016llx..0x%016llx -> 0x%016llx\n",
> -			       cpu_addr, cpu_addr + size - 1, pci_addr);
> +			       range.cpu_addr, range.cpu_addr + range.size - 1,
> +			       range.pci_addr);
>  
>  			/* We support only one IO range */
>  			if (hose->pci_io_size) {
> @@ -729,11 +705,12 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
>  			}
>  #ifdef CONFIG_PPC32
>  			/* On 32 bits, limit I/O space to 16MB */
> -			if (size > 0x01000000)
> -				size = 0x01000000;
> +			if (range.size > 0x01000000)
> +				range.size = 0x01000000;
>  
>  			/* 32 bits needs to map IOs here */
> -			hose->io_base_virt = ioremap(cpu_addr, size);
> +			hose->io_base_virt = ioremap(range.cpu_addr,
> +						range.size);
>  
>  			/* Expect trouble if pci_addr is not 0 */
>  			if (primary)
> @@ -743,20 +720,20 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
>  			/* pci_io_size and io_base_phys always represent IO
>  			 * space starting at 0 so we factor in pci_addr
>  			 */
> -			hose->pci_io_size = pci_addr + size;
> -			hose->io_base_phys = cpu_addr - pci_addr;
> +			hose->pci_io_size = range.pci_addr + range.size;
> +			hose->io_base_phys = range.cpu_addr - range.pci_addr;
>  
>  			/* Build resource */
>  			res = &hose->io_resource;
> -			res->flags = IORESOURCE_IO;
> -			res->start = pci_addr;
> +			range.cpu_addr = range.pci_addr;
>  			break;
> -		case 2:		/* PCI Memory space */
> -		case 3:		/* PCI 64 bits Memory space */
> +		case IORESOURCE_MEM:
>  			printk(KERN_INFO
>  			       " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
> -			       cpu_addr, cpu_addr + size - 1, pci_addr,
> -			       (pci_space & 0x40000000) ? "Prefetch" : "");
> +			       range.cpu_addr, range.cpu_addr + range.size - 1,
> +			       range.pci_addr,
> +			       (range.pci_space & 0x40000000) ?
> +			       "Prefetch" : "");
>  
>  			/* We support only 3 memory ranges */
>  			if (memno >= 3) {
> @@ -765,28 +742,21 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
>  				continue;
>  			}
>  			/* Handles ISA memory hole space here */
> -			if (pci_addr == 0) {
> +			if (range.pci_addr == 0) {
>  				if (primary || isa_mem_base == 0)
> -					isa_mem_base = cpu_addr;
> -				hose->isa_mem_phys = cpu_addr;
> -				hose->isa_mem_size = size;
> +					isa_mem_base = range.cpu_addr;
> +				hose->isa_mem_phys = range.cpu_addr;
> +				hose->isa_mem_size = range.size;
>  			}
>  
>  			/* Build resource */
> -			hose->mem_offset[memno] = cpu_addr - pci_addr;
> +			hose->mem_offset[memno] = range.cpu_addr -
> +							range.pci_addr;
>  			res = &hose->mem_resources[memno++];
> -			res->flags = IORESOURCE_MEM;
> -			if (pci_space & 0x40000000)
> -				res->flags |= IORESOURCE_PREFETCH;
> -			res->start = cpu_addr;
>  			break;
>  		}
>  		if (res != NULL) {
> -			res->name = dev->full_name;
> -			res->end = res->start + size - 1;
> -			res->parent = NULL;
> -			res->sibling = NULL;
> -			res->child = NULL;
> +			of_pci_range_to_resource(&range, dev, res);
>  		}
>  	}
>  }

^ permalink raw reply

* Re: [PATCH] powerpc/pci: Use of_pci_range_parser helper in pci_process_bridge_OF_ranges
From: Andrew Murray @ 2014-02-25 14:12 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-pci@vger.kernel.org, bhelgass, linuxppc-dev
In-Reply-To: <1393334743.1282.8.camel@pasglop>

On 25 February 2014 13:25, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Tue, 2014-02-25 at 06:32 +0000, Andrew Murray wrote:
>> This patch updates the implementation of pci_process_bridge_OF_ranges to use
>> the of_pci_range_parser helpers.
>>
>> Signed-off-by: Andrew Murray <amurray@embedded-bits.co.uk>
>> ---
>> I've verified that this builds, however I have no hardware to test this.
>> ---
>
> Thanks. A cursory review looks good but I need to spend a bit more time
> making sure our various special cases are handled properly.
>
> It's tracked on patchwork so unless you have an update to the patch,
> it won't be lost, but it might take a little while before I get to
> actually merge it.

Thanks for the response - Yes it's easy to screw this stuff up.

Please note that some of the special cases are handled by the parser
helper e.g. consumption of contiguous ranges, assignment to 'struct
resource', etc.

It should also be pointed out that this is now once again very similar
to the Microblaze implementation.

Thanks,

Andrew Murray

>
> Cheers,
> Ben.
>
>>  arch/powerpc/kernel/pci-common.c | 88 +++++++++++++---------------------------
>>  1 file changed, 29 insertions(+), 59 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
>> index d9476c1..a05fe18 100644
>> --- a/arch/powerpc/kernel/pci-common.c
>> +++ b/arch/powerpc/kernel/pci-common.c
>> @@ -666,60 +666,36 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
>>  void pci_process_bridge_OF_ranges(struct pci_controller *hose,
>>                                 struct device_node *dev, int primary)
>>  {
>> -     const __be32 *ranges;
>> -     int rlen;
>> -     int pna = of_n_addr_cells(dev);
>> -     int np = pna + 5;
>>       int memno = 0;
>> -     u32 pci_space;
>> -     unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
>>       struct resource *res;
>> +     struct of_pci_range range;
>> +     struct of_pci_range_parser parser;
>>
>>       printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
>>              dev->full_name, primary ? "(primary)" : "");
>>
>> -     /* Get ranges property */
>> -     ranges = of_get_property(dev, "ranges", &rlen);
>> -     if (ranges == NULL)
>> +     /* Check for ranges property */
>> +     if (of_pci_range_parser_init(&parser, dev))
>>               return;
>>
>>       /* Parse it */
>> -     while ((rlen -= np * 4) >= 0) {
>> -             /* Read next ranges element */
>> -             pci_space = of_read_number(ranges, 1);
>> -             pci_addr = of_read_number(ranges + 1, 2);
>> -             cpu_addr = of_translate_address(dev, ranges + 3);
>> -             size = of_read_number(ranges + pna + 3, 2);
>> -             ranges += np;
>> -
>> +     for_each_of_pci_range(&parser, &range) {
>>               /* If we failed translation or got a zero-sized region
>>                * (some FW try to feed us with non sensical zero sized regions
>>                * such as power3 which look like some kind of attempt at exposing
>>                * the VGA memory hole)
>>                */
>> -             if (cpu_addr == OF_BAD_ADDR || size == 0)
>> +             if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
>>                       continue;
>>
>> -             /* Now consume following elements while they are contiguous */
>> -             for (; rlen >= np * sizeof(u32);
>> -                  ranges += np, rlen -= np * 4) {
>> -                     if (of_read_number(ranges, 1) != pci_space)
>> -                             break;
>> -                     pci_next = of_read_number(ranges + 1, 2);
>> -                     cpu_next = of_translate_address(dev, ranges + 3);
>> -                     if (pci_next != pci_addr + size ||
>> -                         cpu_next != cpu_addr + size)
>> -                             break;
>> -                     size += of_read_number(ranges + pna + 3, 2);
>> -             }
>> -
>>               /* Act based on address space type */
>>               res = NULL;
>> -             switch ((pci_space >> 24) & 0x3) {
>> -             case 1:         /* PCI IO space */
>> +             switch (range.flags & IORESOURCE_TYPE_BITS) {
>> +             case IORESOURCE_IO:
>>                       printk(KERN_INFO
>>                              "  IO 0x%016llx..0x%016llx -> 0x%016llx\n",
>> -                            cpu_addr, cpu_addr + size - 1, pci_addr);
>> +                            range.cpu_addr, range.cpu_addr + range.size - 1,
>> +                            range.pci_addr);
>>
>>                       /* We support only one IO range */
>>                       if (hose->pci_io_size) {
>> @@ -729,11 +705,12 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
>>                       }
>>  #ifdef CONFIG_PPC32
>>                       /* On 32 bits, limit I/O space to 16MB */
>> -                     if (size > 0x01000000)
>> -                             size = 0x01000000;
>> +                     if (range.size > 0x01000000)
>> +                             range.size = 0x01000000;
>>
>>                       /* 32 bits needs to map IOs here */
>> -                     hose->io_base_virt = ioremap(cpu_addr, size);
>> +                     hose->io_base_virt = ioremap(range.cpu_addr,
>> +                                             range.size);
>>
>>                       /* Expect trouble if pci_addr is not 0 */
>>                       if (primary)
>> @@ -743,20 +720,20 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
>>                       /* pci_io_size and io_base_phys always represent IO
>>                        * space starting at 0 so we factor in pci_addr
>>                        */
>> -                     hose->pci_io_size = pci_addr + size;
>> -                     hose->io_base_phys = cpu_addr - pci_addr;
>> +                     hose->pci_io_size = range.pci_addr + range.size;
>> +                     hose->io_base_phys = range.cpu_addr - range.pci_addr;
>>
>>                       /* Build resource */
>>                       res = &hose->io_resource;
>> -                     res->flags = IORESOURCE_IO;
>> -                     res->start = pci_addr;
>> +                     range.cpu_addr = range.pci_addr;
>>                       break;
>> -             case 2:         /* PCI Memory space */
>> -             case 3:         /* PCI 64 bits Memory space */
>> +             case IORESOURCE_MEM:
>>                       printk(KERN_INFO
>>                              " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
>> -                            cpu_addr, cpu_addr + size - 1, pci_addr,
>> -                            (pci_space & 0x40000000) ? "Prefetch" : "");
>> +                            range.cpu_addr, range.cpu_addr + range.size - 1,
>> +                            range.pci_addr,
>> +                            (range.pci_space & 0x40000000) ?
>> +                            "Prefetch" : "");
>>
>>                       /* We support only 3 memory ranges */
>>                       if (memno >= 3) {
>> @@ -765,28 +742,21 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
>>                               continue;
>>                       }
>>                       /* Handles ISA memory hole space here */
>> -                     if (pci_addr == 0) {
>> +                     if (range.pci_addr == 0) {
>>                               if (primary || isa_mem_base == 0)
>> -                                     isa_mem_base = cpu_addr;
>> -                             hose->isa_mem_phys = cpu_addr;
>> -                             hose->isa_mem_size = size;
>> +                                     isa_mem_base = range.cpu_addr;
>> +                             hose->isa_mem_phys = range.cpu_addr;
>> +                             hose->isa_mem_size = range.size;
>>                       }
>>
>>                       /* Build resource */
>> -                     hose->mem_offset[memno] = cpu_addr - pci_addr;
>> +                     hose->mem_offset[memno] = range.cpu_addr -
>> +                                                     range.pci_addr;
>>                       res = &hose->mem_resources[memno++];
>> -                     res->flags = IORESOURCE_MEM;
>> -                     if (pci_space & 0x40000000)
>> -                             res->flags |= IORESOURCE_PREFETCH;
>> -                     res->start = cpu_addr;
>>                       break;
>>               }
>>               if (res != NULL) {
>> -                     res->name = dev->full_name;
>> -                     res->end = res->start + size - 1;
>> -                     res->parent = NULL;
>> -                     res->sibling = NULL;
>> -                     res->child = NULL;
>> +                     of_pci_range_to_resource(&range, dev, res);
>>               }
>>       }
>>  }
>
>



-- 
Andrew Murray, Director
Embedded Bits Limited
www.embedded-bits.co.uk

Embedded Bits Limited is a company registered in England and Wales
with company number 08178608 and VAT number 140658911. Registered
office: Embedded Bits Limited c/o InTouch Accounting Ltd. Bristol and
West House Post Office Road Bournemouth Dorset BH1 1BL

^ permalink raw reply

* Re: [PATCH v2 1/1] audit: Add CONFIG_HAVE_ARCH_AUDITSYSCALL
From: Will Deacon @ 2014-02-25 14:53 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: linux-s390@vger.kernel.org, linaro-kernel@lists.linaro.org,
	linux-ia64@vger.kernel.org,
	user-mode-linux-devel@lists.sourceforge.net,
	linux-parisc@vger.kernel.org, linux-sh@vger.kernel.org,
	rgb@redhat.com, Catalin Marinas, x86@kernel.org, arndb@arndb.de,
	eparis@redhat.com, linux-kernel@vger.kernel.org,
	linux-alpha@vger.kernel.org, dsaxena@linaro.org,
	viro@zeniv.linux.org.uk,
	user-mode-linux-user@lists.sourceforge.net,
	linux-audit@redhat.com, sparclinux@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <1393319784-2758-2-git-send-email-takahiro.akashi@linaro.org>

On Tue, Feb 25, 2014 at 09:16:24AM +0000, AKASHI Takahiro wrote:
> Currently AUDITSYSCALL has a long list of architecture depencency:
>        depends on AUDIT && (X86 || PARISC || PPC || S390 || IA64 || UML ||
> 		SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT) || ALPHA)
> The purpose of this patch is to replace it with HAVE_ARCH_AUDITSYSCALL
> for simplicity.

Looks sensible to me:

  Acked-by: Will Deacon <will.deacon@arm.com>

Will

^ permalink raw reply

* Re: [PATCH v2 1/1] audit: Add CONFIG_HAVE_ARCH_AUDITSYSCALL
From: Richard Guy Briggs @ 2014-02-25 15:25 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: linux-s390, linaro-kernel, linux-ia64, user-mode-linux-devel,
	linux-parisc, linux-sh, catalin.marinas, x86, will.deacon,
	linux-kernel, eparis, linux-audit, user-mode-linux-user,
	linux-alpha, sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <1393319784-2758-2-git-send-email-takahiro.akashi@linaro.org>

On 14/02/25, AKASHI Takahiro wrote:
> Currently AUDITSYSCALL has a long list of architecture depencency:
>        depends on AUDIT && (X86 || PARISC || PPC || S390 || IA64 || UML ||
> 		SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT) || ALPHA)
> The purpose of this patch is to replace it with HAVE_ARCH_AUDITSYSCALL
> for simplicity.
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>

Acked-by: Richard Guy Briggs <rgb@redhat.com>

> ---
>  arch/alpha/Kconfig     |    1 +
>  arch/arm/Kconfig       |    1 +
>  arch/ia64/Kconfig      |    1 +
>  arch/parisc/Kconfig    |    1 +
>  arch/powerpc/Kconfig   |    1 +
>  arch/s390/Kconfig      |    1 +
>  arch/sh/Kconfig        |    1 +
>  arch/sparc/Kconfig     |    1 +
>  arch/um/Kconfig.common |    1 +
>  arch/x86/Kconfig       |    1 +
>  init/Kconfig           |    5 ++++-
>  11 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
> index f6c6b34..b7ff9a3 100644
> --- a/arch/alpha/Kconfig
> +++ b/arch/alpha/Kconfig
> @@ -22,6 +22,7 @@ config ALPHA
>  	select GENERIC_SMP_IDLE_THREAD
>  	select GENERIC_STRNCPY_FROM_USER
>  	select GENERIC_STRNLEN_USER
> +	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_MOD_ARCH_SPECIFIC
>  	select MODULES_USE_ELF_RELA
>  	select ODD_RT_SIGACTION
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index e254198..ca79340 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -24,6 +24,7 @@ config ARM
>  	select GENERIC_STRNCPY_FROM_USER
>  	select GENERIC_STRNLEN_USER
>  	select HARDIRQS_SW_RESEND
> +	select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
>  	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
>  	select HAVE_ARCH_KGDB
>  	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
> diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
> index 0c8e553..5409bf4 100644
> --- a/arch/ia64/Kconfig
> +++ b/arch/ia64/Kconfig
> @@ -44,6 +44,7 @@ config IA64
>  	select HAVE_MOD_ARCH_SPECIFIC
>  	select MODULES_USE_ELF_RELA
>  	select ARCH_USE_CMPXCHG_LOCKREF
> +	select HAVE_ARCH_AUDITSYSCALL
>  	default y
>  	help
>  	  The Itanium Processor Family is Intel's 64-bit successor to
> diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
> index bb2a8ec..1faefed 100644
> --- a/arch/parisc/Kconfig
> +++ b/arch/parisc/Kconfig
> @@ -28,6 +28,7 @@ config PARISC
>  	select CLONE_BACKWARDS
>  	select TTY # Needed for pdc_cons.c
>  	select HAVE_DEBUG_STACKOVERFLOW
> +	select HAVE_ARCH_AUDITSYSCALL
>  
>  	help
>  	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 957bf34..7b3b8fe 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -141,6 +141,7 @@ config PPC
>  	select HAVE_DEBUG_STACKOVERFLOW
>  	select HAVE_IRQ_EXIT_ON_IRQ_STACK
>  	select ARCH_USE_CMPXCHG_LOCKREF if PPC64
> +	select HAVE_ARCH_AUDITSYSCALL
>  
>  config GENERIC_CSUM
>  	def_bool CPU_LITTLE_ENDIAN
> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> index 65a0775..1b58568 100644
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -103,6 +103,7 @@ config S390
>  	select GENERIC_SMP_IDLE_THREAD
>  	select GENERIC_TIME_VSYSCALL
>  	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
> +	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
>  	select HAVE_ARCH_SECCOMP_FILTER
>  	select HAVE_ARCH_TRACEHOOK
> diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
> index 6357710..4addd87 100644
> --- a/arch/sh/Kconfig
> +++ b/arch/sh/Kconfig
> @@ -42,6 +42,7 @@ config SUPERH
>  	select MODULES_USE_ELF_RELA
>  	select OLD_SIGSUSPEND
>  	select OLD_SIGACTION
> +	select HAVE_ARCH_AUDITSYSCALL
>  	help
>  	  The SuperH is a RISC processor targeted for use in embedded systems
>  	  and consumer electronics; it was also used in the Sega Dreamcast
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index c51efdc..9c74d6b 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -77,6 +77,7 @@ config SPARC64
>  	select ARCH_HAVE_NMI_SAFE_CMPXCHG
>  	select HAVE_C_RECORDMCOUNT
>  	select NO_BOOTMEM
> +	select HAVE_ARCH_AUDITSYSCALL
>  
>  config ARCH_DEFCONFIG
>  	string
> diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
> index 21ca44c..6915d28 100644
> --- a/arch/um/Kconfig.common
> +++ b/arch/um/Kconfig.common
> @@ -1,6 +1,7 @@
>  config UML
>  	bool
>  	default y
> +	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_UID16
>  	select GENERIC_IRQ_SHOW
>  	select GENERIC_CPU_DEVICES
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 0af5250..2938365 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -127,6 +127,7 @@ config X86
>  	select HAVE_DEBUG_STACKOVERFLOW
>  	select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
>  	select HAVE_CC_STACKPROTECTOR
> +	select HAVE_ARCH_AUDITSYSCALL
>  
>  config INSTRUCTION_DECODER
>  	def_bool y
> diff --git a/init/Kconfig b/init/Kconfig
> index 009a797..d4ec53d 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -282,9 +282,12 @@ config AUDIT
>  	  logging of avc messages output).  Does not do system-call
>  	  auditing without CONFIG_AUDITSYSCALL.
>  
> +config HAVE_ARCH_AUDITSYSCALL
> +	bool
> +
>  config AUDITSYSCALL
>  	bool "Enable system-call auditing support"
> -	depends on AUDIT && (X86 || PARISC || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT) || ALPHA)
> +	depends on AUDIT && HAVE_ARCH_AUDITSYSCALL
>  	default y if SECURITY_SELINUX
>  	help
>  	  Enable low-overhead system-call auditing infrastructure that
> -- 
> 1.7.9.5
> 

- RGB

--
Richard Guy Briggs <rbriggs@redhat.com>
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545

^ permalink raw reply

* Re: [PATCH] PPC: KVM: Introduce hypervisor call H_GET_TCE
From: Laurent Dufour @ 2014-02-25 16:00 UTC (permalink / raw)
  To: Alexander Graf
  Cc: kvm@vger.kernel.org mailing list, Gleb Natapov, kvm-ppc,
	Paul Mackerras, Paolo Bonzini, linuxppc-dev
In-Reply-To: <75FB1EEB-910A-49A9-A4CC-0A2E5403C54C@suse.de>

On 21/02/2014 16:57, Alexander Graf wrote:
> 
> On 21.02.2014, at 16:31, Laurent Dufour <ldufour@linux.vnet.ibm.com> wrote:
> 
>> This fix introduces the H_GET_TCE hypervisor call which is basically the
>> reverse of H_PUT_TCE, as defined in the Power Architecture Platform
>> Requirements (PAPR).
>>
>> The hcall H_GET_TCE is required by the kdump kernel which is calling it to
>> retrieve the TCE set up by the panicing kernel.
>>
>> Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
> 
> Thanks, applied to kvm-ppc-queue. Btw, why exactly are we using struct page pointers and alloc_page rather than __get_free_page() and simple page start pointers?

FWIW, I'm not so familiar with that part of code, it seems that this is
due to the page fault handler (kvm_spapr_tce_fault) which is part of the
mmap file operation handlers associated to the fd returned by
kvm_vm_ioctl_create_spapr_tce. Underlying vma's operation requires the
page fault handler to return a struct page value in the vm_fault structure.

Cheers,
Laurent.

^ permalink raw reply

* Re: [PATCH v2 1/1] audit: Add CONFIG_HAVE_ARCH_AUDITSYSCALL
From: Matt Turner @ 2014-02-25 17:40 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: linux-s390, linaro-kernel, linux-ia64, user-mode-linux-devel,
	linux-parisc@vger.kernel.org, linux-sh, rgb, catalin.marinas, x86,
	Will Deacon, arndb, eparis, LKML, linux-alpha, dsaxena, Al Viro,
	user-mode-linux-user, linux-audit, sparclinux, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <1393319784-2758-2-git-send-email-takahiro.akashi@linaro.org>

On Tue, Feb 25, 2014 at 1:16 AM, AKASHI Takahiro
<takahiro.akashi@linaro.org> wrote:
> Currently AUDITSYSCALL has a long list of architecture depencency:
>        depends on AUDIT && (X86 || PARISC || PPC || S390 || IA64 || UML ||
>                 SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT) || ALPHA)
> The purpose of this patch is to replace it with HAVE_ARCH_AUDITSYSCALL
> for simplicity.
>
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> ---
>  arch/alpha/Kconfig     |    1 +
>  arch/arm/Kconfig       |    1 +
>  arch/ia64/Kconfig      |    1 +
>  arch/parisc/Kconfig    |    1 +
>  arch/powerpc/Kconfig   |    1 +
>  arch/s390/Kconfig      |    1 +
>  arch/sh/Kconfig        |    1 +
>  arch/sparc/Kconfig     |    1 +
>  arch/um/Kconfig.common |    1 +
>  arch/x86/Kconfig       |    1 +
>  init/Kconfig           |    5 ++++-
>  11 files changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
> index f6c6b34..b7ff9a3 100644
> --- a/arch/alpha/Kconfig
> +++ b/arch/alpha/Kconfig
> @@ -22,6 +22,7 @@ config ALPHA
>         select GENERIC_SMP_IDLE_THREAD
>         select GENERIC_STRNCPY_FROM_USER
>         select GENERIC_STRNLEN_USER
> +       select HAVE_ARCH_AUDITSYSCALL
>         select HAVE_MOD_ARCH_SPECIFIC
>         select MODULES_USE_ELF_RELA
>         select ODD_RT_SIGACTION

Thanks.

Acked-by: Matt Turner <mattst88@gmail.com>

^ permalink raw reply

* Re: [PATCH v2 01/11] perf: add PMU_RANGE_ATTR() helper for use by sw-like pmus
From: Cody P Schafer @ 2014-02-25 20:33 UTC (permalink / raw)
  To: Michael Ellerman, Linux PPC, Arnaldo Carvalho de Melo,
	Ingo Molnar, Paul Mackerras, Peter Zijlstra
  Cc: LKML
In-Reply-To: <20140225033326.135BB2C0227@ozlabs.org>

On 02/24/2014 07:33 PM, Michael Ellerman wrote:
> On Fri, 2014-14-02 at 22:02:05 UTC, Cody P Schafer wrote:
>> Add PMU_RANGE_ATTR() and PMU_RANGE_RESV() (for reserved areas) which
>> generate functions to extract the relevent bits from
>> event->attr.config{,1,2} for use by sw-like pmus where the
>> 'config{,1,2}' values don't map directly to hardware registers.
>>
>> Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
>> ---
>>   include/linux/perf_event.h | 17 +++++++++++++++++
>>   1 file changed, 17 insertions(+)
>>
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index e56b07f..2702e91 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -871,4 +871,21 @@ _name##_show(struct device *dev,					\
>>   									\
>>   static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
>>
>> +#define PMU_RANGE_ATTR(name, attr_var, bit_start, bit_end)		\
>> +PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);		\
>> +PMU_RANGE_RESV(name, attr_var, bit_start, bit_end)
>> +
>> +#define PMU_RANGE_RESV(name, attr_var, bit_start, bit_end)		\
>> +static u64 event_get_##name##_max(void)					\
>> +{									\
>> +	int bits = (bit_end) - (bit_start) + 1;				\
>> +	return ((0x1ULL << (bits - 1ULL)) - 1ULL) |			\
>> +		(0xFULL << (bits - 4ULL));				\
>> +}									\
>> +static u64 event_get_##name(struct perf_event *event)			\
>> +{									\
>> +	return (event->attr.attr_var >> (bit_start)) &			\
>> +		event_get_##name##_max();				\
>> +}
>
> I still don't like the names.
>
> EVENT_GETTER_AND_FORMAT()

EVENT_RANGE()

I'd prefer to describe the intended usage rather than what is generated 
both in case we change some of the specifics later, and to provide 
additional information to the developers beyond what a simple code 
reading gives.

> EVENT_RESERVED()

Sure. The PMU_* naming was just based on the PMU_FORMAT_ATTR() naming, 
so I kept it for continuity with the existing API. Maybe 
EVENT_RANGE_RESERVED() would be more appropriate?

> ?
>
> It's not clear to me the max routine is useful in general. Can't we just do:
>
>> +#define EVENT_RESERVED(name, attr_var, bit_start, bit_end)		\
>> +static u64 event_get_##name(struct perf_event *event)		\
>> +{									\
>> +	return (event->attr.attr_var >> (bit_start)) &			\
>> +		((0x1ULL << ((bit_end) - (bit_start) + 1)) - 1ULL);	\
>> +}

I use event_get_*_max() for some checking of parameters in event_init(). 
Having it lets me avoid specifying the maximum explicitly (0x7ffff = 
0-19, for example). Specifying it explicitly would mean we'd have the 
bit width of the field in question encoded in two places instead of one, 
and I'd prefer to avoid unneeded duplication.

^ permalink raw reply

* Re: [PATCH v2 05/11] powerpc: add hv_gpci interface header
From: Cody P Schafer @ 2014-02-25 20:35 UTC (permalink / raw)
  To: Michael Ellerman, Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo
In-Reply-To: <20140225033328.1D5652C030B@ozlabs.org>

On 02/24/2014 07:33 PM, Michael Ellerman wrote:
> On Fri, 2014-14-02 at 22:02:09 UTC, Cody P Schafer wrote:
>> "H_GetPerformanceCounterInfo" (refered to as hv_gpci or just gpci from
>> here on) is an interface to retrieve specific performance counters and
>> other data from the hypervisor. All outputs have a fixed format (and
>> are represented as structs in this patch).
>
> I still see unused stuff in here, can you strip it back to just what we need.
> Same goes for the next patch.
>

Sure, I can remove the unused structures and enum entries (hadn't 
realized you wanted that in the last review).

^ permalink raw reply

* Re: [PATCH v2 09/11] powerpc/perf: add support for the hv 24x7 interface
From: Cody P Schafer @ 2014-02-25 20:55 UTC (permalink / raw)
  To: Michael Ellerman, Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo
In-Reply-To: <20140225033329.BBB492C033B@ozlabs.org>

On 02/24/2014 07:33 PM, Michael Ellerman wrote:
> On Fri, 2014-14-02 at 22:02:13 UTC, Cody P Schafer wrote:
>> This provides a basic interface between hv_24x7 and perf. Similar to
>> the one provided for gpci, it lacks transaction support and does not
>> list any events.
>>
>> Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
>> ---
>>   arch/powerpc/perf/hv-24x7.c | 491 ++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 491 insertions(+)
>>   create mode 100644 arch/powerpc/perf/hv-24x7.c
>>
>> diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
>> new file mode 100644
>> index 0000000..13de140
>> --- /dev/null
>> +++ b/arch/powerpc/perf/hv-24x7.c
> ...
>> +
>> +/*
>> + * read_offset_data - copy data from one buffer to another while treating the
>> + *                    source buffer as a small view on the total avaliable
>> + *                    source data.
>> + *
>> + * @dest: buffer to copy into
>> + * @dest_len: length of @dest in bytes
>> + * @requested_offset: the offset within the source data we want. Must be > 0
>> + * @src: buffer to copy data from
>> + * @src_len: length of @src in bytes
>> + * @source_offset: the offset in the sorce data that (src,src_len) refers to.
>> + *                 Must be > 0
>> + *
>> + * returns the number of bytes copied.
>> + *
>> + * '.' areas in d are written to.
>> + *
>> + *                       u
>> + *   x         w	 v  z
>> + * d           |.........|
>> + * s |----------------------|
>> + *
>> + *                      u
>> + *   x         w	z     v
>> + * d           |........------|
>> + * s |------------------|
>> + *
>> + *   x         w        u,z,v
>> + * d           |........|
>> + * s |------------------|
>> + *
>> + *   x,w                u,v,z
>> + * d |------------------|
>> + * s |------------------|
>> + *
>> + *   x        u
>> + *   w        v		z
>> + * d |........|
>> + * s |------------------|
>> + *
>> + *   x      z   w      v
>> + * d            |------|
>> + * s |------|
>> + *
>> + * x = source_offset
>> + * w = requested_offset
>> + * z = source_offset + src_len
>> + * v = requested_offset + dest_len
>> + *
>> + * w_offset_in_s = w - x = requested_offset - source_offset
>> + * z_offset_in_s = z - x = src_len
>> + * v_offset_in_s = v - x = request_offset + dest_len - src_len
>> + * u_offset_in_s = min(z_offset_in_s, v_offset_in_s)
>> + *
>> + * copy_len = u_offset_in_s - w_offset_in_s = min(z_offset_in_s, v_offset_in_s)
>> + *						- w_offset_in_s
>
> Comments are great, especially for complicated code like this. But at a glance
> I don't actually understand what this comment is trying to tell me.

The function was composed via some number line logic. The comment tries 
to explain what that logic is. The ascii art is various overlapping 
buffers that we're copying between (the '+'s from the patch are messing 
with the indenting some of the labels). The only major omission I'm 
seeing is I failed to note that d=dest and s=src (though this could be 
inferred from the comment about '.' indicating a write).

Is there anything specific That doesn't make sense in the comment? (it 
may not be a comment that really can be read at a glance).

>
>> + */
>> +static ssize_t read_offset_data(void *dest, size_t dest_len,
>> +				loff_t requested_offset, void *src,
>> +				size_t src_len, loff_t source_offset)
>> +{
>> +	size_t w_offset_in_s = requested_offset - source_offset;
>> +	size_t z_offset_in_s = src_len;
>> +	size_t v_offset_in_s = requested_offset + dest_len - src_len;
>> +	size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s);
>> +	size_t copy_len = u_offset_in_s - w_offset_in_s;
>> +
>> +	if (requested_offset < 0 || source_offset < 0)
>> +		return -EINVAL;
>> +
>> +	if (z_offset_in_s <= w_offset_in_s)
>> +		return 0;
>> +
>> +	memcpy(dest, src + w_offset_in_s, copy_len);
>> +	return copy_len;
>> +}
>> +
>> +static unsigned long h_get_24x7_catalog_page(char page[static 4096],
>> +					     u32 version, u32 index)
>> +{
>> +	WARN_ON(!IS_ALIGNED((unsigned long)page, 4096));
>> +	return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
>> +			virt_to_phys(page),
>> +			version,
>> +			index);
>> +}
>> +
>> +static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
>> +			    struct bin_attribute *bin_attr, char *buf,
>> +			    loff_t offset, size_t count)
>> +{
>> +	unsigned long hret;
>> +	ssize_t ret = 0;
>> +	size_t catalog_len = 0, catalog_page_len = 0, page_count = 0;
>> +	loff_t page_offset = 0;
>> +	uint32_t catalog_version_num = 0;
>> +	void *page = kmalloc(4096, GFP_USER);
>> +	struct hv_24x7_catalog_page_0 *page_0 = page;
>> +	if (!page)
>> +		return -ENOMEM;
>> +
>> +
>> +	hret = h_get_24x7_catalog_page(page, 0, 0);
>> +	if (hret) {
>> +		ret = -EIO;
>> +		goto e_free;
>> +	}
>> +
>> +	catalog_version_num = be32_to_cpu(page_0->version);
>> +	catalog_page_len = be32_to_cpu(page_0->length);
>> +	catalog_len = catalog_page_len * 4096;
>> +
>> +	page_offset = offset / 4096;
>> +	page_count  = count  / 4096;
>> +
>> +	if (page_offset >= catalog_page_len)
>> +		goto e_free;
>> +
>> +	if (page_offset != 0) {
>> +		hret = h_get_24x7_catalog_page(page, catalog_version_num,
>> +					       page_offset);
>> +		if (hret) {
>> +			ret = -EIO;
>> +			goto e_free;
>> +		}
>> +	}
>> +
>> +	ret = read_offset_data(buf, count, offset,
>> +				page, 4096, page_offset * 4096);
>> +e_free:
>> +	if (hret)
>> +		pr_err("h_get_24x7_catalog_page(ver=%d, page=%lld) failed: rc=%ld\n",
>> +				catalog_version_num, page_offset, hret);
>> +	kfree(page);
>> +
>> +	pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n",
>> +			offset, page_offset, count, page_count, catalog_len,
>> +			catalog_page_len, ret);
>> +
>> +	return ret;
>> +}
>> +
>> +#define PAGE_0_ATTR(_name, _fmt, _expr)				\
>> +static ssize_t _name##_show(struct device *dev,			\
>> +			    struct device_attribute *dev_attr,	\
>> +			    char *buf)				\
>> +{								\
>> +	unsigned long hret;					\
>> +	ssize_t ret = 0;					\
>> +	void *page = kmalloc(4096, GFP_USER);			\
>> +	struct hv_24x7_catalog_page_0 *page_0 = page;		\
>> +	if (!page)						\
>> +		return -ENOMEM;					\
>> +	hret = h_get_24x7_catalog_page(page, 0, 0);		\
>> +	if (hret) {						\
>> +		ret = -EIO;					\
>> +		goto e_free;					\
>> +	}							\
>> +	ret = sprintf(buf, _fmt, _expr);			\
>> +e_free:								\
>> +	kfree(page);						\
>> +	return ret;						\
>> +}								\
>> +static DEVICE_ATTR_RO(_name)
>> +
>> +PAGE_0_ATTR(catalog_version, "%lld\n",
>> +		(unsigned long long)be32_to_cpu(page_0->version));
>> +PAGE_0_ATTR(catalog_len, "%lld\n",
>> +		(unsigned long long)be32_to_cpu(page_0->length) * 4096);
>> +static BIN_ATTR_RO(catalog, 0/* real length varies */);
>
> So we're dumping the catalog out as a binary blob.

Yep

> Why do we want to do that?

Right now it's the only way to know what events are available. 
Additionally, even when the kernel starts parsing events out (and 
exposing them via sysfs), there is some additional powerpc specific 
structuring ("groups" and "schemas" that some userspace applications may 
want to take advantage of.

> It clearly violates the sysfs rule-of-sorts of ASCII and one value per file.
> Obviously there can be exceptions, but what's our justification?

Actual justification is above, but additionally:
I actually was looking at the acpi code that provides (among other 
binary tables) the dsdt as a binary blob in sysfs when I was putting 
this code together. The 24x7 catalog is, in the same manner, a binary 
blob provided by firmware.

>> +static struct bin_attribute *if_bin_attrs[] = {
>> +	&bin_attr_catalog,
>> +	NULL,
>> +};
>> +
>> +static struct attribute *if_attrs[] = {
>> +	&dev_attr_catalog_len.attr,
>> +	&dev_attr_catalog_version.attr,
>> +	NULL,
>> +};
>> +
>> +static struct attribute_group if_group = {
>> +	.name = "interface",
>> +	.bin_attrs = if_bin_attrs,
>> +	.attrs = if_attrs,
>> +};
>
> Both pmus have an "interface" directory, but they don't seem to have anything
> in common? Its feels a little ad-hoc.

It is absolutely ad-hoc. The only similarity is that both groups named 
"interface" provide some additional details about the firmware interface 
they're using to provide the perf data. We could easily call them both 
"misc", "details", put all the attributes in the device root, or call 
them some other generic name. I ended up choosing "interface" because 
we're provided details on the firmware interface, and it feels just a 
bit less generic. Having device specific names for the attribute group 
("24x7" and "gpci", for example) doesn't get us anything because the 
devices themselves already have those names ("hv_24x7" and "hv_gpci"). I 
don't see any reason to make them different.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox