* [PATCH 1/3] powerpc/powernv: Supports PHB3
From: Gavin Shan @ 2013-04-19 9:32 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Gavin Shan
The patch intends to initialize PHB3 during system boot stage. The
flag "PNV_PHB_MODEL_PHB3" is introduced to differentiate IODA2
compatible PHB3 from other types of PHBs.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
arch/powerpc/platforms/powernv/pci-ioda.c | 62 +++++++++++++++--------------
arch/powerpc/platforms/powernv/pci.c | 7 +++-
arch/powerpc/platforms/powernv/pci.h | 8 ++-
3 files changed, 43 insertions(+), 34 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8e90e89..8993242 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -852,18 +852,19 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
}
-void __init pnv_pci_init_ioda1_phb(struct device_node *np)
+void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
{
struct pci_controller *hose;
static int primary = 1;
struct pnv_phb *phb;
unsigned long size, m32map_off, iomap_off, pemap_off;
const u64 *prop64;
+ const u32 *prop32;
u64 phb_id;
void *aux;
long rc;
- pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
+ pr_info(" Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
if (!prop64) {
@@ -890,37 +891,34 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
hose->last_busno = 0xff;
hose->private_data = phb;
phb->opal_id = phb_id;
- phb->type = PNV_PHB_IODA1;
+ phb->type = ioda_type;
/* Detect specific models for error handling */
if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
phb->model = PNV_PHB_MODEL_P7IOC;
+ else if (of_device_is_compatible(np, "ibm,p8-pciex"))
+ phb->model = PNV_PHB_MODEL_PHB3;
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
- /* We parse "ranges" now since we need to deduce the register base
- * from the IO base
- */
+ /* Parse 32-bit and IO ranges (if any) */
pci_process_bridge_OF_ranges(phb->hose, np, primary);
primary = 0;
- /* Magic formula from Milton */
+ /* Get registers */
phb->regs = of_iomap(np, 0);
if (phb->regs == NULL)
pr_err(" Failed to map registers !\n");
-
- /* XXX This is hack-a-thon. This needs to be changed so that:
- * - we obtain stuff like PE# etc... from device-tree
- * - we properly re-allocate M32 ourselves
- * (the OFW one isn't very good)
- */
-
/* Initialize more IODA stuff */
- phb->ioda.total_pe = 128;
+ prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
+ if (!prop32)
+ phb->ioda.total_pe = 1;
+ else
+ phb->ioda.total_pe = *prop32;
phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
- /* OFW Has already off top 64k of M32 space (MSI space) */
+ /* FW Has already off top 64k of M32 space (MSI space) */
phb->ioda.m32_size += 0x10000;
phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
@@ -930,7 +928,10 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
- /* Allocate aux data & arrays */
+ /* Allocate aux data & arrays
+ *
+ * XXX TODO: Don't allocate io segmap on PHB3
+ */
size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
m32map_off = size;
size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
@@ -960,7 +961,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
hose->mem_resources[2].start = 0;
hose->mem_resources[2].end = 0;
-#if 0
+#if 0 /* We should really do that ... */
rc = opal_pci_set_phb_mem_window(opal->phb_id,
window_type,
window_num,
@@ -974,16 +975,6 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
phb->ioda.m32_size, phb->ioda.m32_segsize,
phb->ioda.io_size, phb->ioda.io_segsize);
- if (phb->regs) {
- pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100));
- pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160));
- pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170));
- pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178));
- pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180));
- pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190));
- pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
- pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
- }
phb->hose->ops = &pnv_pci_ops;
/* Setup RID -> PE mapping function */
@@ -1011,7 +1002,18 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
if (rc)
pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
- opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
+
+ /*
+ * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
+ * has cleared the RTT which has the same effect
+ */
+ if (ioda_type == PNV_PHB_IODA1)
+ opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
+}
+
+void pnv_pci_init_ioda2_phb(struct device_node *np)
+{
+ pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2);
}
void __init pnv_pci_init_ioda_hub(struct device_node *np)
@@ -1034,6 +1036,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
for_each_child_of_node(np, phbn) {
/* Look for IODA1 PHBs */
if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
- pnv_pci_init_ioda1_phb(phbn);
+ pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1);
}
}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b8b8e0b..e088dc7 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -525,12 +525,13 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
pnv_pci_dma_fallback_setup(hose, pdev);
}
-/* Fixup wrong class code in p7ioc root complex */
+/* Fixup wrong class code in p7ioc and p8 root complex */
static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
{
dev->class = PCI_CLASS_BRIDGE_PCI << 8;
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x2da, pnv_p7ioc_rc_quirk);
static int pnv_pci_probe_mode(struct pci_bus *bus)
{
@@ -591,6 +592,10 @@ void __init pnv_pci_init(void)
if (!found_ioda)
for_each_compatible_node(np, NULL, "ibm,p5ioc2")
pnv_pci_init_p5ioc2_hub(np);
+
+ /* Look for ioda2 built-in PHB3's */
+ for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
+ pnv_pci_init_ioda2_phb(np);
}
/* Setup the linkage between OF nodes and PHBs */
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 7cfb7c8..4ce91f5 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -4,9 +4,9 @@
struct pci_dn;
enum pnv_phb_type {
- PNV_PHB_P5IOC2,
- PNV_PHB_IODA1,
- PNV_PHB_IODA2,
+ PNV_PHB_P5IOC2 = 0,
+ PNV_PHB_IODA1 = 1,
+ PNV_PHB_IODA2 = 2,
};
/* Precise PHB model for error management */
@@ -14,6 +14,7 @@ enum pnv_phb_model {
PNV_PHB_MODEL_UNKNOWN,
PNV_PHB_MODEL_P5IOC2,
PNV_PHB_MODEL_P7IOC,
+ PNV_PHB_MODEL_PHB3,
};
#define PNV_PCI_DIAG_BUF_SIZE 4096
@@ -150,6 +151,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
u64 dma_offset);
extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
+extern void pnv_pci_init_ioda2_phb(struct device_node *np);
#endif /* __POWERNV_PCI_H */
--
1.7.5.4
^ permalink raw reply related
* [PATCH 3/3] powerpc/powernv: Patch MSI EOI handler on P8
From: Gavin Shan @ 2013-04-19 9:32 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1366363965-23281-1-git-send-email-shangw@linux.vnet.ibm.com>
The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
steps to handle the P/Q bits in IVE before EOIing the corresponding
interrupt. The patch changes the EOI handler to cover that.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
arch/powerpc/platforms/powernv/pci-ioda.c | 33 +++++++++++++++++++++++++++++
arch/powerpc/platforms/powernv/pci.c | 19 ++++++++++++++++
arch/powerpc/platforms/powernv/pci.h | 1 +
arch/powerpc/sysdev/xics/icp-native.c | 33 ++++++++++++++++++++++++++++-
4 files changed, 85 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index d8d5baa..de4a4a9 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -645,6 +645,37 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
return 0;
}
+static int pnv_pci_ioda_msi_eoi(struct pnv_phb *phb, unsigned int hw_irq)
+{
+ u8 p_bit = 1, q_bit = 1;
+ long rc;
+
+ while (p_bit || q_bit) {
+ rc = opal_pci_get_xive_reissue(phb->opal_id,
+ hw_irq - phb->msi_base, &p_bit, &q_bit);
+ if (rc) {
+ pr_warning("%s: Failed to get P/Q bits of IRQ#%d "
+ "on PHB#%d, rc=%ld\n", __func__, hw_irq,
+ phb->hose->global_number, rc);
+ return -EIO;
+ }
+ if (!p_bit && !q_bit)
+ break;
+
+ rc = opal_pci_set_xive_reissue(phb->opal_id,
+ hw_irq - phb->msi_base, p_bit, q_bit);
+ if (rc) {
+ pr_warning("%s: Failed to clear P/Q (%01d/%01d) of "
+ "IRQ#%d on PHB#%d, rc=%ld\n", __func__,
+ p_bit, q_bit, hw_irq,
+ phb->hose->global_number, rc);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
{
unsigned int bmap_size;
@@ -667,6 +698,8 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
return;
}
phb->msi_setup = pnv_pci_ioda_msi_setup;
+ if (phb->type == PNV_PHB_IODA2)
+ phb->msi_eoi = pnv_pci_ioda_msi_eoi;
phb->msi32_support = 1;
pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
phb->msi_count, phb->msi_base);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index e088dc7..439dfc5 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -148,6 +148,25 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
irq_dispose_mapping(entry->irq);
}
}
+
+int pnv_pci_msi_eoi(unsigned int hw_irq)
+{
+ struct pci_controller *hose, *tmp;
+ struct pnv_phb *phb = NULL;
+
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->private_data;
+ if (hw_irq >= phb->msi_base &&
+ hw_irq < phb->msi_base + phb->msi_count) {
+ if (!phb->msi_eoi)
+ return -EEXIST;
+ return phb->msi_eoi(phb, hw_irq);
+ }
+ }
+
+ /* For LSI interrupts, we needn't do it */
+ return 0;
+}
#endif /* CONFIG_PCI_MSI */
static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index fcd5135..4ae015b 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -101,6 +101,7 @@ struct pnv_phb {
int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int is_64,
struct msi_msg *msg);
+ int (*msi_eoi)(struct pnv_phb *phb, unsigned int hw_irq);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 48861d3..289355e 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -27,6 +27,10 @@
#include <asm/xics.h>
#include <asm/kvm_ppc.h>
+#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_PCI_MSI)
+extern int pnv_pci_msi_eoi(unsigned int hw_irq);
+#endif
+
struct icp_ipl {
union {
u32 word;
@@ -89,6 +93,24 @@ static void icp_native_eoi(struct irq_data *d)
icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
}
+static void icp_p8_native_eoi(struct irq_data *d)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ int ret;
+
+ /* Let firmware handle P/Q bits */
+#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_PCI_MSI)
+ if (hw_irq != XICS_IPI) {
+ ret = pnv_pci_msi_eoi(hw_irq);
+ WARN_ON_ONCE(ret);
+ }
+#endif
+
+ /* EOI on ICP */
+ iosync();
+ icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
static void icp_native_teardown_cpu(void)
{
int cpu = smp_processor_id();
@@ -264,7 +286,7 @@ static int __init icp_native_init_one_node(struct device_node *np,
return 0;
}
-static const struct icp_ops icp_native_ops = {
+static struct icp_ops icp_native_ops = {
.get_irq = icp_native_get_irq,
.eoi = icp_native_eoi,
.set_priority = icp_native_set_cpu_priority,
@@ -296,6 +318,15 @@ int __init icp_native_init(void)
if (found == 0)
return -ENODEV;
+ /* Change the EOI handler for P8 */
+#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_PCI_MSI)
+ np = of_find_compatible_node(NULL, NULL, "ibm,power8-xicp");
+ if (np) {
+ icp_native_ops.eoi = icp_p8_native_eoi;
+ of_node_put(np);
+ }
+#endif
+
icp_ops = &icp_native_ops;
return 0;
--
1.7.5.4
^ permalink raw reply related
* [PATCH] powerpc/fsl: Enable CONFIG_E1000E in mpc85xx_smp_defconfig
From: Chunhe Lan @ 2013-04-19 8:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Chunhe Lan
On the most boards of Freescale platform, they use the PCI-Express
Intel(R) PRO/1000 gigabit ethernet card to work. So enable the
corresponding driver for it.
Signed-off-by: Chunhe Lan <Chunhe.Lan@freescale.com>
---
arch/powerpc/configs/mpc85xx_smp_defconfig | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 8d00ea5b..6996ea3 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -114,6 +114,7 @@ CONFIG_DUMMY=y
CONFIG_FS_ENET=y
CONFIG_UCC_GETH=y
CONFIG_GIANFAR=y
+CONFIG_E1000E=y
CONFIG_MARVELL_PHY=y
CONFIG_DAVICOM_PHY=y
CONFIG_CICADA_PHY=y
--
1.7.6.5
^ permalink raw reply related
* Re: [PATCH v7 3/3] of/pci: mips: convert to common of_pci_range_parser
From: Gabor Juhos @ 2013-04-19 7:19 UTC (permalink / raw)
To: Jason Cooper
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, Linus Walleij, Thierry Reding,
Liviu Dudau, Paul Mackerras, linux-samsung-soc,
Russell King - ARM Linux, Jingoo Han, Jason Gunthorpe,
Thomas Abraham, Arnd Bergmann,
devicetree-discuss@lists.ozlabs.org, rob.herring@calxeda.com,
Kukjin Kim, bhelgaas@google.com,
linux-arm-kernel@lists.infradead.org, Thomas Petazzoni,
Michal Simek, linux-kernel@vger.kernel.org,
suren.reddy@samsung.com, Andrew Murray,
linuxppc-dev@lists.ozlabs.org list
In-Reply-To: <20130418130919.GI27197@titan.lakedaemon.net>
2013.04.18. 15:09 keltezéssel, Jason Cooper írta:
> On Thu, Apr 18, 2013 at 01:59:10PM +0100, Andrew Murray wrote:
>> On Wed, Apr 17, 2013 at 04:42:48PM +0100, Linus Walleij wrote:
>>> On Tue, Apr 16, 2013 at 12:18 PM, Andrew Murray <Andrew.Murray@arm.com> wrote:
>>>
>>>> This patch converts the pci_load_of_ranges function to use the new common
>>>> of_pci_range_parser.
>>>>
>>>> Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
>>>> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
>>>> Reviewed-by: Rob Herring <rob.herring@calxeda.com>
>>>
>>> Tested-by: Linus Walleij <linus.walleij@linaro.org>
>>
>> Jason - you may not have seen this, but here (Linus Walleij) is another Tested-by
>> to add to this patch in your tree (if you can).
>
> Thanks, I saw it. Unfortunately, the PR was already sent, and the branch
> is now pulled into arm-soc.
Sorry I had no time earlier, but I have tested this now on MIPS. The patch
causes build errors unfortunately. Given the fact that this has been merged
already, I will send a fixup patch.
-Gabor
^ permalink raw reply
* Re: [RFC PATCH powerpc] make CONFIG_NUMA depends on CONFIG_SMP
From: Michael Ellerman @ 2013-04-19 7:20 UTC (permalink / raw)
To: Li Zhong; +Cc: PowerPC email list, Paul Mackerras
In-Reply-To: <1366337453.3250.16.camel@ThinkPad-T5421.cn.ibm.com>
On Fri, 2013-04-19 at 10:10 +0800, Li Zhong wrote:
> On Thu, 2013-04-18 at 11:46 +1000, Michael Ellerman wrote:
> > On Wed, May 30, 2012 at 05:31:58PM +0800, Li Zhong wrote:
> > > I'm not sure whether it makes sense to add this dependency to avoid
> > > CONFI_NUMA && !CONFIG_SMP.
> > >
> > > I want to do this because I saw some build errors on next-tree when
> > > compiling with CONFIG_SMP disabled, and it seems they are caused by some
> > > codes under the CONFIG_NUMA #ifdefs.
> >
> > This seems to make sense to me. Can you please repost with a better
> > changelog and a description of the actual build error you were seeing.
>
> I tried it today, but didn't find any build errors any more, guess those
> errors should have already been fixed.
>
> But it seems to me by disabling CONFIG_NUMA when CONFIG_SMP is disabled,
> could at least prevent some unnecessary code being compiled into the
> kernel. (After building a kernel with/without CONFIG_NUMA just now, it
> seems that the vmlinux is ~100K smaller without CONFIG_NUMA).
>
> I'm not sure whether this is still needed.
Yeah we'll leave your patch out. Unless someone cares deeply about the
size of the UP build, I think it's better to just leave them as separate
options.
cheers
^ permalink raw reply
* Re: [RFC PATCH powerpc] make CONFIG_NUMA depends on CONFIG_SMP
From: Li Zhong @ 2013-04-19 2:10 UTC (permalink / raw)
To: Michael Ellerman; +Cc: PowerPC email list, Paul Mackerras
In-Reply-To: <20130418014559.GB8261@concordia>
On Thu, 2013-04-18 at 11:46 +1000, Michael Ellerman wrote:
> On Wed, May 30, 2012 at 05:31:58PM +0800, Li Zhong wrote:
> > I'm not sure whether it makes sense to add this dependency to avoid
> > CONFI_NUMA && !CONFIG_SMP.
> >
> > I want to do this because I saw some build errors on next-tree when
> > compiling with CONFIG_SMP disabled, and it seems they are caused by some
> > codes under the CONFIG_NUMA #ifdefs.
>
> This seems to make sense to me. Can you please repost with a better
> changelog and a description of the actual build error you were seeing.
I tried it today, but didn't find any build errors any more, guess those
errors should have already been fixed.
But it seems to me by disabling CONFIG_NUMA when CONFIG_SMP is disabled,
could at least prevent some unnecessary code being compiled into the
kernel. (After building a kernel with/without CONFIG_NUMA just now, it
seems that the vmlinux is ~100K smaller without CONFIG_NUMA).
I'm not sure whether this is still needed.
Thanks, Zhong
>
> cheers
>
^ permalink raw reply
* Re: [PATCH -V5 00/25] THP support for PPC64
From: Simon Jeons @ 2013-04-19 1:55 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Hi Aneesh,
On 04/04/2013 01:57 PM, Aneesh Kumar K.V wrote:
> Hi,
>
> This patchset adds transparent hugepage support for PPC64.
>
> TODO:
> * hash preload support in update_mmu_cache_pmd (we don't do that for hugetlb)
>
> Some numbers:
>
> The latency measurements code from Anton found at
> http://ozlabs.org/~anton/junkcode/latency2001.c
>
> THP disabled 64K page size
> ------------------------
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 731.73 cycles 205.77 ns
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 743.39 cycles 209.05 ns
> [root@llmp24l02 ~]#
>
> THP disabled large page via hugetlbfs
> -------------------------------------
> [root@llmp24l02 ~]# ./latency2001 -l 8G
> 8589934592 416.09 cycles 117.01 ns
> [root@llmp24l02 ~]# ./latency2001 -l 8G
> 8589934592 415.74 cycles 116.91 ns
>
> THP enabled 64K page size.
> ----------------
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 405.07 cycles 113.91 ns
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 411.82 cycles 115.81 ns
> [root@llmp24l02 ~]#
>
> We are close to hugetlbfs in latency and we can achieve this with zero
> config/page reservation. Most of the allocations above are fault allocated.
>
> Another test that does 50000000 random access over 1GB area goes from
> 2.65 seconds to 1.07 seconds with this patchset.
>
> split_huge_page impact:
> ---------------------
> To look at the performance impact of large page invalidate, I tried the below
> experiment. The test involved, accessing a large contiguous region of memory
> location as below
>
> for (i = 0; i < size; i += PAGE_SIZE)
> data[i] = i;
>
> We wanted to access the data in sequential order so that we look at the
> worst case THP performance. Accesing the data in sequential order implies
> we have the Page table cached and overhead of TLB miss is as minimal as
> possible. We also don't touch the entire page, because that can result in
> cache evict.
>
> After we touched the full range as above, we now call mprotect on each
> of that page. A mprotect will result in a hugepage split. This should
> allow us to measure the impact of hugepage split.
>
> for (i = 0; i < size; i += PAGE_SIZE)
> mprotect(&data[i], PAGE_SIZE, PROT_READ);
>
> Split hugepage impact:
> ---------------------
> THP enabled: 2.851561705 seconds for test completion
> THP disable: 3.599146098 seconds for test completion
>
> We are 20.7% better than non THP case even when we have all the large pages split.
>
> Detailed output:
>
> THP enabled:
> ---------------------------------------
> [root@llmp24l02 ~]# cat /proc/vmstat | grep thp
> thp_fault_alloc 0
> thp_fault_fallback 0
> thp_collapse_alloc 0
> thp_collapse_alloc_failed 0
> thp_split 0
> thp_zero_page_alloc 0
> thp_zero_page_alloc_failed 0
> [root@llmp24l02 ~]# /root/thp/tools/perf/perf stat -e page-faults,dTLB-load-misses ./split-huge-page-mpro 20G
> time taken to touch all the data in ns: 2763096913
>
> Performance counter stats for './split-huge-page-mpro 20G':
>
> 1,581 page-faults
> 3,159 dTLB-load-misses
>
> 2.851561705 seconds time elapsed
>
> [root@llmp24l02 ~]#
> [root@llmp24l02 ~]# cat /proc/vmstat | grep thp
> thp_fault_alloc 1279
> thp_fault_fallback 0
> thp_collapse_alloc 0
> thp_collapse_alloc_failed 0
> thp_split 1279
> thp_zero_page_alloc 0
> thp_zero_page_alloc_failed 0
> [root@llmp24l02 ~]#
>
> 77.05% split-huge-page [kernel.kallsyms] [k] .clear_user_page
> 7.10% split-huge-page [kernel.kallsyms] [k] .perf_event_mmap_ctx
> 1.51% split-huge-page split-huge-page-mpro [.] 0x0000000000000a70
> 0.96% split-huge-page [unknown] [H] 0x000000000157e3bc
> 0.81% split-huge-page [kernel.kallsyms] [k] .up_write
> 0.76% split-huge-page [kernel.kallsyms] [k] .perf_event_mmap
> 0.76% split-huge-page [kernel.kallsyms] [k] .down_write
> 0.74% split-huge-page [kernel.kallsyms] [k] .lru_add_page_tail
> 0.61% split-huge-page [kernel.kallsyms] [k] .split_huge_page
> 0.59% split-huge-page [kernel.kallsyms] [k] .change_protection
> 0.51% split-huge-page [kernel.kallsyms] [k] .release_pages
>
>
> 0.96% split-huge-page [unknown] [H] 0x000000000157e3bc
> |
> |--79.44%-- reloc_start
> | |
> | |--86.54%-- .__pSeries_lpar_hugepage_invalidate
> | | .pSeries_lpar_hugepage_invalidate
> | | .hpte_need_hugepage_flush
> | | .split_huge_page
> | | .__split_huge_page_pmd
> | | .vma_adjust
> | | .vma_merge
> | | .mprotect_fixup
> | | .SyS_mprotect
>
>
> THP disabled:
> ---------------
> [root@llmp24l02 ~]# echo never > /sys/kernel/mm/transparent_hugepage/enabled
> [root@llmp24l02 ~]# /root/thp/tools/perf/perf stat -e page-faults,dTLB-load-misses ./split-huge-page-mpro 20G
> time taken to touch all the data in ns: 3513767220
>
> Performance counter stats for './split-huge-page-mpro 20G':
>
> 3,27,726 page-faults
> 3,29,654 dTLB-load-misses
>
> 3.599146098 seconds time elapsed
>
> [root@llmp24l02 ~]#
Thanks for your great work. One question about page table of ppc64:
Why x86 use tree based page table and ppc64 use hash based page table?
>
> Changes from V4:
> * Fix bad page error in page_table_alloc
> BUG: Bad page state in process stream pfn:f1a59
> page:f0000000034dc378 count:1 mapcount:0 mapping: (null) index:0x0
> [c000000f322c77d0] [c00000000015e198] .bad_page+0xe8/0x140
> [c000000f322c7860] [c00000000015e3c4] .free_pages_prepare+0x1d4/0x1e0
> [c000000f322c7910] [c000000000160450] .free_hot_cold_page+0x50/0x230
> [c000000f322c79c0] [c00000000003ad18] .page_table_alloc+0x168/0x1c0
>
> Changes from V3:
> * PowerNV boot fixes
>
> Change from V2:
> * Change patch "powerpc: Reduce PTE table memory wastage" to use much simpler approach
> for PTE page sharing.
> * Changes to handle huge pages in KVM code.
> * Address other review comments
>
> Changes from V1
> * Address review comments
> * More patch split
> * Add batch hpte invalidate for hugepages.
>
> Changes from RFC V2:
> * Address review comments
> * More code cleanup and patch split
>
> Changes from RFC V1:
> * HugeTLB fs now works
> * Compile issues fixed
> * rebased to v3.8
> * Patch series reorded so that ppc64 cleanups and MM THP changes are moved
> early in the series. This should help in picking those patches early.
>
> Thanks,
> -aneesh
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* Re: [PATCH] [RFC] powerpc: Add VDSO version of time
From: Anton Blanchard @ 2013-04-18 22:38 UTC (permalink / raw)
To: Adhemerval Zanella; +Cc: linuxppc-dev
In-Reply-To: <5148C2B3.6010408@linux.vnet.ibm.com>
Hi Adhemerval,
> This patch implement the time syscall as vDSO. I have a glibc patch
> to use it as IFUNC (as latest gettimeofday patch). Below the perf
> numbers:
>
> Baseline PPC32: 380 nsec
> Baseline PPC64: 352 nsec
> vdso PPC32: 20 nsec
> vdso PPC64: 20 nsec
Very nice speedup. One small performance improvement:
+1: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 1b
Since you are only reading one long you shouldn't need to check the
update count and loop, you will always see a consistent value. The
system call version of time() just does an unprotected load for example.
> I focused on 64 bit kernel, do I need to provide a scheme for 32 bits
> as well?
>
> Any tips, advices, comments?
With the above change and with Michael's comments covered (decent
changelog entry and Signed-off-by):
Acked-by: Anton Blanchard <anton@samba.org>
Anton
^ permalink raw reply
* Re: [PATCH 04/28] proc: Supply PDE attribute setting accessor functions [RFC]
From: Bjorn Helgaas @ 2013-04-18 16:42 UTC (permalink / raw)
To: David Howells
Cc: alsa-devel, netdev, linux-wireless, linux-kernel@vger.kernel.org,
netfilter-devel, Al Viro, linux-pci@vger.kernel.org,
linux-fsdevel, linuxppc-dev, linux-media
In-Reply-To: <20130416182606.27773.55054.stgit@warthog.procyon.org.uk>
On Tue, Apr 16, 2013 at 12:26 PM, David Howells <dhowells@redhat.com> wrote:
> Supply accessor functions to set attributes in proc_dir_entry structs.
>
> The following are supplied: proc_set_size() and proc_set_user().
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: linuxppc-dev@lists.ozlabs.org
> cc: linux-media@vger.kernel.org
> cc: netdev@vger.kernel.org
> cc: linux-wireless@vger.kernel.org
> cc: linux-pci@vger.kernel.org
> cc: netfilter-devel@vger.kernel.org
> cc: alsa-devel@alsa-project.org
> ---
>
> arch/powerpc/kernel/proc_powerpc.c | 2 +-
> arch/powerpc/platforms/pseries/reconfig.c | 2 +-
> drivers/media/pci/ttpci/av7110_ir.c | 2 +-
> drivers/net/irda/vlsi_ir.c | 2 +-
> drivers/net/wireless/airo.c | 34 +++++++++--------------------
> drivers/pci/proc.c | 2 +-
For the drivers/pci part:
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
> fs/proc/generic.c | 13 +++++++++++
> include/linux/proc_fs.h | 5 ++++
> kernel/configs.c | 2 +-
> kernel/profile.c | 2 +-
> net/netfilter/xt_recent.c | 3 +--
> sound/core/info.c | 2 +-
> 12 files changed, 38 insertions(+), 33 deletions(-)
>
> diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
> index 41d8ee9..feb8580 100644
> --- a/arch/powerpc/kernel/proc_powerpc.c
> +++ b/arch/powerpc/kernel/proc_powerpc.c
> @@ -83,7 +83,7 @@ static int __init proc_ppc64_init(void)
> &page_map_fops, vdso_data);
> if (!pde)
> return 1;
> - pde->size = PAGE_SIZE;
> + proc_set_size(pde, PAGE_SIZE);
>
> return 0;
> }
> diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
> index d6491bd..f93cdf5 100644
> --- a/arch/powerpc/platforms/pseries/reconfig.c
> +++ b/arch/powerpc/platforms/pseries/reconfig.c
> @@ -452,7 +452,7 @@ static int proc_ppc64_create_ofdt(void)
>
> ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops);
> if (ent)
> - ent->size = 0;
> + proc_set_size(ent, 0);
>
> return 0;
> }
> diff --git a/drivers/media/pci/ttpci/av7110_ir.c b/drivers/media/pci/ttpci/av7110_ir.c
> index eb82286..0e763a7 100644
> --- a/drivers/media/pci/ttpci/av7110_ir.c
> +++ b/drivers/media/pci/ttpci/av7110_ir.c
> @@ -375,7 +375,7 @@ int av7110_ir_init(struct av7110 *av7110)
> if (av_cnt == 1) {
> e = proc_create("av7110_ir", S_IWUSR, NULL, &av7110_ir_proc_fops);
> if (e)
> - e->size = 4 + 256 * sizeof(u16);
> + proc_set_size(e, 4 + 256 * sizeof(u16));
> }
>
> tasklet_init(&av7110->ir.ir_tasklet, av7110_emit_key, (unsigned long) &av7110->ir);
> diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
> index e22cd4e..5f47584 100644
> --- a/drivers/net/irda/vlsi_ir.c
> +++ b/drivers/net/irda/vlsi_ir.c
> @@ -1678,7 +1678,7 @@ vlsi_irda_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> IRDA_WARNING("%s: failed to create proc entry\n",
> __func__);
> } else {
> - ent->size = 0;
> + proc_set_size(ent, 0);
> }
> idev->proc_entry = ent;
> }
> diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
> index 66e398d..21d0233 100644
> --- a/drivers/net/wireless/airo.c
> +++ b/drivers/net/wireless/airo.c
> @@ -4507,73 +4507,63 @@ static int setup_proc_entry( struct net_device *dev,
> airo_entry);
> if (!apriv->proc_entry)
> goto fail;
> - apriv->proc_entry->uid = proc_kuid;
> - apriv->proc_entry->gid = proc_kgid;
> + proc_set_user(apriv->proc_entry, proc_kuid, proc_kgid);
>
> /* Setup the StatsDelta */
> entry = proc_create_data("StatsDelta", S_IRUGO & proc_perm,
> apriv->proc_entry, &proc_statsdelta_ops, dev);
> if (!entry)
> goto fail_stats_delta;
> - entry->uid = proc_kuid;
> - entry->gid = proc_kgid;
> + proc_set_user(entry, proc_kuid, proc_kgid);
>
> /* Setup the Stats */
> entry = proc_create_data("Stats", S_IRUGO & proc_perm,
> apriv->proc_entry, &proc_stats_ops, dev);
> if (!entry)
> goto fail_stats;
> - entry->uid = proc_kuid;
> - entry->gid = proc_kgid;
> + proc_set_user(entry, proc_kuid, proc_kgid);
>
> /* Setup the Status */
> entry = proc_create_data("Status", S_IRUGO & proc_perm,
> apriv->proc_entry, &proc_status_ops, dev);
> if (!entry)
> goto fail_status;
> - entry->uid = proc_kuid;
> - entry->gid = proc_kgid;
> + proc_set_user(entry, proc_kuid, proc_kgid);
>
> /* Setup the Config */
> entry = proc_create_data("Config", proc_perm,
> apriv->proc_entry, &proc_config_ops, dev);
> if (!entry)
> goto fail_config;
> - entry->uid = proc_kuid;
> - entry->gid = proc_kgid;
> + proc_set_user(entry, proc_kuid, proc_kgid);
>
> /* Setup the SSID */
> entry = proc_create_data("SSID", proc_perm,
> apriv->proc_entry, &proc_SSID_ops, dev);
> if (!entry)
> goto fail_ssid;
> - entry->uid = proc_kuid;
> - entry->gid = proc_kgid;
> + proc_set_user(entry, proc_kuid, proc_kgid);
>
> /* Setup the APList */
> entry = proc_create_data("APList", proc_perm,
> apriv->proc_entry, &proc_APList_ops, dev);
> if (!entry)
> goto fail_aplist;
> - entry->uid = proc_kuid;
> - entry->gid = proc_kgid;
> + proc_set_user(entry, proc_kuid, proc_kgid);
>
> /* Setup the BSSList */
> entry = proc_create_data("BSSList", proc_perm,
> apriv->proc_entry, &proc_BSSList_ops, dev);
> if (!entry)
> goto fail_bsslist;
> - entry->uid = proc_kuid;
> - entry->gid = proc_kgid;
> + proc_set_user(entry, proc_kuid, proc_kgid);
>
> /* Setup the WepKey */
> entry = proc_create_data("WepKey", proc_perm,
> apriv->proc_entry, &proc_wepkey_ops, dev);
> if (!entry)
> goto fail_wepkey;
> - entry->uid = proc_kuid;
> - entry->gid = proc_kgid;
> -
> + proc_set_user(entry, proc_kuid, proc_kgid);
> return 0;
>
> fail_wepkey:
> @@ -5695,10 +5685,8 @@ static int __init airo_init_module( void )
>
> airo_entry = proc_mkdir_mode("driver/aironet", airo_perm, NULL);
>
> - if (airo_entry) {
> - airo_entry->uid = proc_kuid;
> - airo_entry->gid = proc_kgid;
> - }
> + if (airo_entry)
> + proc_set_user(airo_entry, proc_kuid, proc_kgid);
>
> for (i = 0; i < 4 && io[i] && irq[i]; i++) {
> airo_print_info("", "Trying to configure ISA adapter at irq=%d "
> diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
> index 12e4fb5..7cde7c1 100644
> --- a/drivers/pci/proc.c
> +++ b/drivers/pci/proc.c
> @@ -419,7 +419,7 @@ int pci_proc_attach_device(struct pci_dev *dev)
> &proc_bus_pci_operations, dev);
> if (!e)
> return -ENOMEM;
> - e->size = dev->cfg_size;
> + proc_set_size(e, dev->cfg_size);
> dev->procent = e;
>
> return 0;
> diff --git a/fs/proc/generic.c b/fs/proc/generic.c
> index 1c07cad..5f6f6c3 100644
> --- a/fs/proc/generic.c
> +++ b/fs/proc/generic.c
> @@ -498,6 +498,19 @@ out:
> return NULL;
> }
> EXPORT_SYMBOL(proc_create_data);
> +
> +void proc_set_size(struct proc_dir_entry *de, loff_t size)
> +{
> + de->size = size;
> +}
> +EXPORT_SYMBOL(proc_set_size);
> +
> +void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
> +{
> + de->uid = uid;
> + de->gid = gid;
> +}
> +EXPORT_SYMBOL(proc_set_user);
>
> static void free_proc_entry(struct proc_dir_entry *de)
> {
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index 805edac..28a4d7e 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -130,6 +130,9 @@ static inline struct proc_dir_entry *proc_create(const char *name, umode_t mode,
> extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
> struct proc_dir_entry *parent);
>
> +extern void proc_set_size(struct proc_dir_entry *, loff_t);
> +extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t);
> +
> extern struct file *proc_ns_fget(int fd);
> extern bool proc_ns_inode(struct inode *inode);
>
> @@ -158,6 +161,8 @@ static inline struct proc_dir_entry *proc_mkdir(const char *name,
> struct proc_dir_entry *parent) {return NULL;}
> static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
> umode_t mode, struct proc_dir_entry *parent) { return NULL; }
> +static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {}
> +static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {}
>
> struct tty_driver;
> static inline void proc_tty_register_driver(struct tty_driver *driver) {};
> diff --git a/kernel/configs.c b/kernel/configs.c
> index 42e8fa0..c18b1f1 100644
> --- a/kernel/configs.c
> +++ b/kernel/configs.c
> @@ -79,7 +79,7 @@ static int __init ikconfig_init(void)
> if (!entry)
> return -ENOMEM;
>
> - entry->size = kernel_config_data_size;
> + proc_set_size(entry, kernel_config_data_size);
>
> return 0;
> }
> diff --git a/kernel/profile.c b/kernel/profile.c
> index 524ce5e..0bf4007 100644
> --- a/kernel/profile.c
> +++ b/kernel/profile.c
> @@ -600,7 +600,7 @@ int __ref create_proc_profile(void) /* false positive from hotcpu_notifier */
> NULL, &proc_profile_operations);
> if (!entry)
> return 0;
> - entry->size = (1+prof_len) * sizeof(atomic_t);
> + proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t));
> hotcpu_notifier(profile_cpu_callback, 0);
> return 0;
> }
> diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
> index 3db2d38..1e657cf 100644
> --- a/net/netfilter/xt_recent.c
> +++ b/net/netfilter/xt_recent.c
> @@ -401,8 +401,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
> ret = -ENOMEM;
> goto out;
> }
> - pde->uid = uid;
> - pde->gid = gid;
> + proc_set_user(pde, uid, gid);
> #endif
> spin_lock_bh(&recent_lock);
> list_add_tail(&t->list, &recent_net->tables);
> diff --git a/sound/core/info.c b/sound/core/info.c
> index 3aa8864..c7f41c3 100644
> --- a/sound/core/info.c
> +++ b/sound/core/info.c
> @@ -970,7 +970,7 @@ int snd_info_register(struct snd_info_entry * entry)
> mutex_unlock(&info_mutex);
> return -ENOMEM;
> }
> - p->size = entry->size;
> + proc_set_size(p, entry->size);
> }
> entry->p = p;
> if (entry->parent)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH v7 2/3] of/pci: Provide support for parsing PCI DT ranges property
From: Andrew Murray @ 2013-04-18 15:38 UTC (permalink / raw)
To: Grant Likely
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, linus.walleij@linaro.org,
thierry.reding@avionic-design.de, Liviu Dudau, paulus@samba.org,
linux-samsung-soc@vger.kernel.org, linux@arm.linux.org.uk,
jg1.han@samsung.com, jgunthorpe@obsidianresearch.com,
thomas.abraham@linaro.org, arnd@arndb.de,
devicetree-discuss@lists.ozlabs.org, rob.herring@calxeda.com,
kgene.kim@samsung.com, bhelgaas@google.com,
linux-arm-kernel@lists.infradead.org,
thomas.petazzoni@free-electrons.com, monstr@monstr.eu,
linux-kernel@vger.kernel.org, suren.reddy@samsung.com,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <CACxGe6u7yoecyTR2r-EzpcRxLuSw-p9KC7jA12wEQZs4of3vFA@mail.gmail.com>
On Thu, Apr 18, 2013 at 04:29:54PM +0100, Grant Likely wrote:
> On Thu, Apr 18, 2013 at 3:24 PM, Andrew Murray <andrew.murray@arm.com> wrote:
> > On Thu, Apr 18, 2013 at 02:44:01PM +0100, Grant Likely wrote:
> >> On Tue, 16 Apr 2013 11:18:27 +0100, Andrew Murray <Andrew.Murray@arm.com> wrote:
> >> > /* Act based on address space type */
> >> > res = NULL;
> >> > - switch ((pci_space >> 24) & 0x3) {
> >> > - case 1: /* PCI IO space */
> >> > + res_type = range.flags & IORESOURCE_TYPE_BITS;
> >> > + if (res_type == IORESOURCE_IO) {
> >>
> >> Why the change from switch() to an if/else if sequence?
> >
> > Russell King suggested this change - see
> > https://patchwork.kernel.org/patch/2323941/
>
> Umm, that isn't what that link shows. That link shows the patch
> already changing to an if/else if construct, and rmk is commenting on
> that.
Arh yes sorry about that. I can't find or remember any good reason why I
changed it from a switch to an if/else :\
Andrew Murray
^ permalink raw reply
* Re: [PATCH v7 2/3] of/pci: Provide support for parsing PCI DT ranges property
From: Grant Likely @ 2013-04-18 15:29 UTC (permalink / raw)
To: Andrew Murray
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, linus.walleij@linaro.org,
thierry.reding@avionic-design.de, Liviu Dudau, paulus@samba.org,
linux-samsung-soc@vger.kernel.org, linux@arm.linux.org.uk,
jg1.han@samsung.com, jgunthorpe@obsidianresearch.com,
thomas.abraham@linaro.org, arnd@arndb.de,
devicetree-discuss@lists.ozlabs.org, rob.herring@calxeda.com,
kgene.kim@samsung.com, bhelgaas@google.com,
linux-arm-kernel@lists.infradead.org,
thomas.petazzoni@free-electrons.com, monstr@monstr.eu,
linux-kernel@vger.kernel.org, suren.reddy@samsung.com,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20130418142434.GA18881@arm.com>
On Thu, Apr 18, 2013 at 3:24 PM, Andrew Murray <andrew.murray@arm.com> wrote:
> On Thu, Apr 18, 2013 at 02:44:01PM +0100, Grant Likely wrote:
>> On Tue, 16 Apr 2013 11:18:27 +0100, Andrew Murray <Andrew.Murray@arm.com> wrote:
>> > /* Act based on address space type */
>> > res = NULL;
>> > - switch ((pci_space >> 24) & 0x3) {
>> > - case 1: /* PCI IO space */
>> > + res_type = range.flags & IORESOURCE_TYPE_BITS;
>> > + if (res_type == IORESOURCE_IO) {
>>
>> Why the change from switch() to an if/else if sequence?
>
> Russell King suggested this change - see
> https://patchwork.kernel.org/patch/2323941/
Umm, that isn't what that link shows. That link shows the patch
already changing to an if/else if construct, and rmk is commenting on
that.
g.
^ permalink raw reply
* Re: [PATCH v7 3/3] of/pci: mips: convert to common of_pci_range_parser
From: Andrew Murray @ 2013-04-18 14:26 UTC (permalink / raw)
To: Grant Likely
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, linus.walleij@linaro.org,
thierry.reding@avionic-design.de, Liviu Dudau, paulus@samba.org,
linux-samsung-soc@vger.kernel.org, linux@arm.linux.org.uk,
jg1.han@samsung.com, jgunthorpe@obsidianresearch.com,
thomas.abraham@linaro.org, arnd@arndb.de,
devicetree-discuss@lists.ozlabs.org, rob.herring@calxeda.com,
kgene.kim@samsung.com, bhelgaas@google.com,
linux-arm-kernel@lists.infradead.org,
thomas.petazzoni@free-electrons.com, monstr@monstr.eu,
linux-kernel@vger.kernel.org, suren.reddy@samsung.com,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20130418134535.5189E3E1319@localhost>
On Thu, Apr 18, 2013 at 02:45:35PM +0100, Grant Likely wrote:
> On Tue, 16 Apr 2013 11:18:28 +0100, Andrew Murray <Andrew.Murray@arm.com> wrote:
> > This patch converts the pci_load_of_ranges function to use the new common
> > of_pci_range_parser.
> >
> > Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > Reviewed-by: Rob Herring <rob.herring@calxeda.com>
>
> Looks okay to me, and thank you very much for doing the legwork to merge
> the common code.
No problem, though I think there is more than can be done in this area -
there is a lot of similar PCI code floating about.
>
> Reviewed-by: Grant Likely <grant.likely@secretlab.ca>
Thanks for the review.
Andrew Murray
^ permalink raw reply
* Re: [PATCH v7 2/3] of/pci: Provide support for parsing PCI DT ranges property
From: Andrew Murray @ 2013-04-18 14:24 UTC (permalink / raw)
To: Grant Likely
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, linus.walleij@linaro.org,
thierry.reding@avionic-design.de, Liviu Dudau, paulus@samba.org,
linux-samsung-soc@vger.kernel.org, linux@arm.linux.org.uk,
jg1.han@samsung.com, jgunthorpe@obsidianresearch.com,
thomas.abraham@linaro.org, arnd@arndb.de,
devicetree-discuss@lists.ozlabs.org, rob.herring@calxeda.com,
kgene.kim@samsung.com, bhelgaas@google.com,
linux-arm-kernel@lists.infradead.org,
thomas.petazzoni@free-electrons.com, monstr@monstr.eu,
linux-kernel@vger.kernel.org, suren.reddy@samsung.com,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20130418134401.84AEE3E1319@localhost>
On Thu, Apr 18, 2013 at 02:44:01PM +0100, Grant Likely wrote:
> On Tue, 16 Apr 2013 11:18:27 +0100, Andrew Murray <Andrew.Murray@arm.com> wrote:
> > This patch factors out common implementation patterns to reduce overall kernel
> > code and provide a means for host bridge drivers to directly obtain struct
> > resources from the DT's ranges property without relying on architecture specific
> > DT handling. This will make it easier to write archiecture independent host bridge
> > drivers and mitigate against further duplication of DT parsing code.
> >
> > This patch can be used in the following way:
> >
> > struct of_pci_range_parser parser;
> > struct of_pci_range range;
> >
> > if (of_pci_range_parser(&parser, np))
> > ; //no ranges property
> >
> > for_each_of_pci_range(&parser, &range) {
> >
> > /*
> > directly access properties of the address range, e.g.:
> > range.pci_space, range.pci_addr, range.cpu_addr,
> > range.size, range.flags
> >
> > alternatively obtain a struct resource, e.g.:
> > struct resource res;
> > of_pci_range_to_resource(&range, np, &res);
> > */
> > }
> >
> > Additionally the implementation takes care of adjacent ranges and merges them
> > into a single range (as was the case with powerpc and microblaze).
> >
> > Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> > Reviewed-by: Rob Herring <rob.herring@calxeda.com>
> > Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> > Tested-by: Linus Walleij <linus.walleij@linaro.org>
>
> Acked-by: Grant Likely <grant.likely@secretlab.ca>
>
> But comments below...
>
> > ---
> > drivers/of/address.c | 67 ++++++++++++++++++++++++++
> > drivers/of/of_pci.c | 113 ++++++++++++++++----------------------------
> > include/linux/of_address.h | 46 ++++++++++++++++++
> > 3 files changed, 154 insertions(+), 72 deletions(-)
> >
> > diff --git a/drivers/of/address.c b/drivers/of/address.c
> > index 04da786..6eec70c 100644
> > --- a/drivers/of/address.c
> > +++ b/drivers/of/address.c
> > @@ -227,6 +227,73 @@ int of_pci_address_to_resource(struct device_node *dev, int bar,
> > return __of_address_to_resource(dev, addrp, size, flags, NULL, r);
> > }
> > EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
> > +
> > +int of_pci_range_parser(struct of_pci_range_parser *parser,
> > + struct device_node *node)
> > +{
> > + const int na = 3, ns = 2;
> > + int rlen;
> > +
> > + parser->node = node;
> > + parser->pna = of_n_addr_cells(node);
> > + parser->np = parser->pna + na + ns;
> > +
> > + parser->range = of_get_property(node, "ranges", &rlen);
> > + if (parser->range == NULL)
> > + return -ENOENT;
> > +
> > + parser->end = parser->range + rlen / sizeof(__be32);
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(of_pci_range_parser);
>
> "of_pci_range_parser_init" would be a clearer name
>
> > +struct of_pci_range *of_pci_process_ranges(struct of_pci_range_parser *parser,
> > + struct of_pci_range *range)
>
> Similarly, "of_pci_range_parser_one" would be more consistent.
>
> > +{
> > + const int na = 3, ns = 2;
> > +
> > + if (!range)
> > + return NULL;
> > +
> > + if (!parser->range || parser->range + parser->np > parser->end)
> > + return NULL;
> > +
> > + range->pci_space = parser->range[0];
> > + range->flags = of_bus_pci_get_flags(parser->range);
> > + range->pci_addr = of_read_number(parser->range + 1, ns);
> > + range->cpu_addr = of_translate_address(parser->node,
> > + parser->range + na);
> > + range->size = of_read_number(parser->range + parser->pna + na, ns);
> > +
> > + parser->range += parser->np;
> > +
> > + /* Now consume following elements while they are contiguous */
> > + while (parser->range + parser->np <= parser->end) {
> > + u32 flags, pci_space;
> > + u64 pci_addr, cpu_addr, size;
> > +
> > + pci_space = be32_to_cpup(parser->range);
> > + flags = of_bus_pci_get_flags(parser->range);
> > + pci_addr = of_read_number(parser->range + 1, ns);
> > + cpu_addr = of_translate_address(parser->node,
> > + parser->range + na);
> > + size = of_read_number(parser->range + parser->pna + na, ns);
> > +
> > + if (flags != range->flags)
> > + break;
> > + if (pci_addr != range->pci_addr + range->size ||
> > + cpu_addr != range->cpu_addr + range->size)
> > + break;
> > +
> > + range->size += size;
> > + parser->range += parser->np;
> > + }
> > +
> > + return range;
> > +}
> > +EXPORT_SYMBOL_GPL(of_pci_process_ranges);
> > +
> > #endif /* CONFIG_PCI */
> >
> > /*
> > diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
> > index 1626172..e5ab604 100644
> > --- a/drivers/of/of_pci.c
> > +++ b/drivers/of/of_pci.c
> > @@ -2,6 +2,7 @@
> > #include <linux/export.h>
> > #include <linux/of.h>
> > #include <linux/of_pci.h>
> > +#include <linux/of_address.h>
> > #include <asm/prom.h>
> >
> > #if defined(CONFIG_PPC32) || defined(CONFIG_PPC64) || defined(CONFIG_MICROBLAZE)
> > @@ -82,67 +83,43 @@ EXPORT_SYMBOL_GPL(of_pci_find_child_device);
> > void pci_process_bridge_OF_ranges(struct pci_controller *hose,
> > struct device_node *dev, int primary)
> > {
> > - const u32 *ranges;
> > - int rlen;
> > - int pna = of_n_addr_cells(dev);
> > - int np = pna + 5;
> > int memno = 0, isa_hole = -1;
> > - u32 pci_space;
> > - unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
> > unsigned long long isa_mb = 0;
> > struct resource *res;
> > + struct of_pci_range range;
> > + struct of_pci_range_parser parser;
> > + u32 res_type;
> >
> > pr_info("PCI host bridge %s %s ranges:\n",
> > dev->full_name, primary ? "(primary)" : "");
> >
> > - /* Get ranges property */
> > - ranges = of_get_property(dev, "ranges", &rlen);
> > - if (ranges == NULL)
> > + /* Check for ranges property */
> > + if (of_pci_range_parser(&parser, dev))
> > return;
> >
> > - /* Parse it */
> > pr_debug("Parsing ranges property...\n");
> > - while ((rlen -= np * 4) >= 0) {
> > + for_each_of_pci_range(&parser, &range) {
> > /* Read next ranges element */
> > - pci_space = ranges[0];
> > - pci_addr = of_read_number(ranges + 1, 2);
> > - cpu_addr = of_translate_address(dev, ranges + 3);
> > - size = of_read_number(ranges + pna + 3, 2);
>
> Tip: the diff on this function would be a whole lot simpler if the
> above locals were kept, but updated from the ranges structure. Not at
> all a big problem, but it is something that makes changes like this
> easier to review. The removal of the locals could also be split into a
> separate patch to end up with the same result.
>
> > -
> > - pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > - pci_space, pci_addr);
> > - pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > - cpu_addr, size);
> > -
> > - ranges += np;
> > + pr_debug("pci_space: 0x%08x pci_addr: 0x%016llx ",
> > + range.pci_space, range.pci_addr);
> > + pr_debug("cpu_addr: 0x%016llx size: 0x%016llx\n",
> > + range.cpu_addr, range.size);
>
> Nit: the patch changed whitespace on the pr_debug() statements, so even
> though the first line of each is identical, they look different in the
> patch.
>
> >
> > /* If we failed translation or got a zero-sized region
> > * (some FW try to feed us with non sensical zero sized regions
> > * such as power3 which look like some kind of attempt
> > * at exposing the VGA memory hole)
> > */
> > - if (cpu_addr == OF_BAD_ADDR || size == 0)
> > + if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > continue;
>
> Can this also be rolled into the parsing iterator?
>
Yes I think that would make sense.
> >
> > - /* Now consume following elements while they are contiguous */
> > - for (; rlen >= np * sizeof(u32);
> > - ranges += np, rlen -= np * 4) {
> > - if (ranges[0] != pci_space)
> > - break;
> > - pci_next = of_read_number(ranges + 1, 2);
> > - cpu_next = of_translate_address(dev, ranges + 3);
> > - if (pci_next != pci_addr + size ||
> > - cpu_next != cpu_addr + size)
> > - break;
> > - size += of_read_number(ranges + pna + 3, 2);
> > - }
> > -
> > /* Act based on address space type */
> > res = NULL;
> > - switch ((pci_space >> 24) & 0x3) {
> > - case 1: /* PCI IO space */
> > + res_type = range.flags & IORESOURCE_TYPE_BITS;
> > + if (res_type == IORESOURCE_IO) {
>
> Why the change from switch() to an if/else if sequence?
Russell King suggested this change - see
https://patchwork.kernel.org/patch/2323941/
>
> But those are mostly nitpicks. If this is deferred to v3.10 then I would
> suggest fixing them up and posting for another round of review.
Thanks for the feedback. I'll update the patch in the future.
Andrew Murray
^ permalink raw reply
* Re: [PATCH v7 3/3] of/pci: mips: convert to common of_pci_range_parser
From: Grant Likely @ 2013-04-18 13:45 UTC (permalink / raw)
To: Andrew Murray, linux-mips, linuxppc-dev
Cc: siva.kallam, linux-pci, linus.walleij, thierry.reding,
Liviu.Dudau, paulus, linux-samsung-soc, linux, jg1.han,
jgunthorpe, thomas.abraham, arnd, devicetree-discuss, rob.herring,
kgene.kim, bhelgaas, linux-arm-kernel, thomas.petazzoni, monstr,
linux-kernel, suren.reddy, Andrew Murray
In-Reply-To: <1366107508-12672-4-git-send-email-Andrew.Murray@arm.com>
On Tue, 16 Apr 2013 11:18:28 +0100, Andrew Murray <Andrew.Murray@arm.com> wrote:
> This patch converts the pci_load_of_ranges function to use the new common
> of_pci_range_parser.
>
> Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> Reviewed-by: Rob Herring <rob.herring@calxeda.com>
Looks okay to me, and thank you very much for doing the legwork to merge
the common code.
Reviewed-by: Grant Likely <grant.likely@secretlab.ca>
^ permalink raw reply
* Re: [PATCH v7 2/3] of/pci: Provide support for parsing PCI DT ranges property
From: Grant Likely @ 2013-04-18 13:44 UTC (permalink / raw)
To: Andrew Murray, linux-mips, linuxppc-dev
Cc: siva.kallam, linux-pci, linus.walleij, thierry.reding,
Liviu.Dudau, paulus, linux-samsung-soc, linux, jg1.han,
jgunthorpe, thomas.abraham, arnd, devicetree-discuss, rob.herring,
kgene.kim, bhelgaas, linux-arm-kernel, thomas.petazzoni, monstr,
linux-kernel, suren.reddy, Andrew Murray
In-Reply-To: <1366107508-12672-3-git-send-email-Andrew.Murray@arm.com>
On Tue, 16 Apr 2013 11:18:27 +0100, Andrew Murray <Andrew.Murray@arm.com> wrote:
> This patch factors out common implementation patterns to reduce overall kernel
> code and provide a means for host bridge drivers to directly obtain struct
> resources from the DT's ranges property without relying on architecture specific
> DT handling. This will make it easier to write archiecture independent host bridge
> drivers and mitigate against further duplication of DT parsing code.
>
> This patch can be used in the following way:
>
> struct of_pci_range_parser parser;
> struct of_pci_range range;
>
> if (of_pci_range_parser(&parser, np))
> ; //no ranges property
>
> for_each_of_pci_range(&parser, &range) {
>
> /*
> directly access properties of the address range, e.g.:
> range.pci_space, range.pci_addr, range.cpu_addr,
> range.size, range.flags
>
> alternatively obtain a struct resource, e.g.:
> struct resource res;
> of_pci_range_to_resource(&range, np, &res);
> */
> }
>
> Additionally the implementation takes care of adjacent ranges and merges them
> into a single range (as was the case with powerpc and microblaze).
>
> Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> Reviewed-by: Rob Herring <rob.herring@calxeda.com>
> Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> Tested-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
But comments below...
> ---
> drivers/of/address.c | 67 ++++++++++++++++++++++++++
> drivers/of/of_pci.c | 113 ++++++++++++++++----------------------------
> include/linux/of_address.h | 46 ++++++++++++++++++
> 3 files changed, 154 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/of/address.c b/drivers/of/address.c
> index 04da786..6eec70c 100644
> --- a/drivers/of/address.c
> +++ b/drivers/of/address.c
> @@ -227,6 +227,73 @@ int of_pci_address_to_resource(struct device_node *dev, int bar,
> return __of_address_to_resource(dev, addrp, size, flags, NULL, r);
> }
> EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
> +
> +int of_pci_range_parser(struct of_pci_range_parser *parser,
> + struct device_node *node)
> +{
> + const int na = 3, ns = 2;
> + int rlen;
> +
> + parser->node = node;
> + parser->pna = of_n_addr_cells(node);
> + parser->np = parser->pna + na + ns;
> +
> + parser->range = of_get_property(node, "ranges", &rlen);
> + if (parser->range == NULL)
> + return -ENOENT;
> +
> + parser->end = parser->range + rlen / sizeof(__be32);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(of_pci_range_parser);
"of_pci_range_parser_init" would be a clearer name
> +struct of_pci_range *of_pci_process_ranges(struct of_pci_range_parser *parser,
> + struct of_pci_range *range)
Similarly, "of_pci_range_parser_one" would be more consistent.
> +{
> + const int na = 3, ns = 2;
> +
> + if (!range)
> + return NULL;
> +
> + if (!parser->range || parser->range + parser->np > parser->end)
> + return NULL;
> +
> + range->pci_space = parser->range[0];
> + range->flags = of_bus_pci_get_flags(parser->range);
> + range->pci_addr = of_read_number(parser->range + 1, ns);
> + range->cpu_addr = of_translate_address(parser->node,
> + parser->range + na);
> + range->size = of_read_number(parser->range + parser->pna + na, ns);
> +
> + parser->range += parser->np;
> +
> + /* Now consume following elements while they are contiguous */
> + while (parser->range + parser->np <= parser->end) {
> + u32 flags, pci_space;
> + u64 pci_addr, cpu_addr, size;
> +
> + pci_space = be32_to_cpup(parser->range);
> + flags = of_bus_pci_get_flags(parser->range);
> + pci_addr = of_read_number(parser->range + 1, ns);
> + cpu_addr = of_translate_address(parser->node,
> + parser->range + na);
> + size = of_read_number(parser->range + parser->pna + na, ns);
> +
> + if (flags != range->flags)
> + break;
> + if (pci_addr != range->pci_addr + range->size ||
> + cpu_addr != range->cpu_addr + range->size)
> + break;
> +
> + range->size += size;
> + parser->range += parser->np;
> + }
> +
> + return range;
> +}
> +EXPORT_SYMBOL_GPL(of_pci_process_ranges);
> +
> #endif /* CONFIG_PCI */
>
> /*
> diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
> index 1626172..e5ab604 100644
> --- a/drivers/of/of_pci.c
> +++ b/drivers/of/of_pci.c
> @@ -2,6 +2,7 @@
> #include <linux/export.h>
> #include <linux/of.h>
> #include <linux/of_pci.h>
> +#include <linux/of_address.h>
> #include <asm/prom.h>
>
> #if defined(CONFIG_PPC32) || defined(CONFIG_PPC64) || defined(CONFIG_MICROBLAZE)
> @@ -82,67 +83,43 @@ EXPORT_SYMBOL_GPL(of_pci_find_child_device);
> void pci_process_bridge_OF_ranges(struct pci_controller *hose,
> struct device_node *dev, int primary)
> {
> - const u32 *ranges;
> - int rlen;
> - int pna = of_n_addr_cells(dev);
> - int np = pna + 5;
> int memno = 0, isa_hole = -1;
> - u32 pci_space;
> - unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
> unsigned long long isa_mb = 0;
> struct resource *res;
> + struct of_pci_range range;
> + struct of_pci_range_parser parser;
> + u32 res_type;
>
> pr_info("PCI host bridge %s %s ranges:\n",
> dev->full_name, primary ? "(primary)" : "");
>
> - /* Get ranges property */
> - ranges = of_get_property(dev, "ranges", &rlen);
> - if (ranges == NULL)
> + /* Check for ranges property */
> + if (of_pci_range_parser(&parser, dev))
> return;
>
> - /* Parse it */
> pr_debug("Parsing ranges property...\n");
> - while ((rlen -= np * 4) >= 0) {
> + for_each_of_pci_range(&parser, &range) {
> /* Read next ranges element */
> - pci_space = ranges[0];
> - pci_addr = of_read_number(ranges + 1, 2);
> - cpu_addr = of_translate_address(dev, ranges + 3);
> - size = of_read_number(ranges + pna + 3, 2);
Tip: the diff on this function would be a whole lot simpler if the
above locals were kept, but updated from the ranges structure. Not at
all a big problem, but it is something that makes changes like this
easier to review. The removal of the locals could also be split into a
separate patch to end up with the same result.
> -
> - pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> - pci_space, pci_addr);
> - pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> - cpu_addr, size);
> -
> - ranges += np;
> + pr_debug("pci_space: 0x%08x pci_addr: 0x%016llx ",
> + range.pci_space, range.pci_addr);
> + pr_debug("cpu_addr: 0x%016llx size: 0x%016llx\n",
> + range.cpu_addr, range.size);
Nit: the patch changed whitespace on the pr_debug() statements, so even
though the first line of each is identical, they look different in the
patch.
>
> /* If we failed translation or got a zero-sized region
> * (some FW try to feed us with non sensical zero sized regions
> * such as power3 which look like some kind of attempt
> * at exposing the VGA memory hole)
> */
> - if (cpu_addr == OF_BAD_ADDR || size == 0)
> + if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> continue;
Can this also be rolled into the parsing iterator?
>
> - /* Now consume following elements while they are contiguous */
> - for (; rlen >= np * sizeof(u32);
> - ranges += np, rlen -= np * 4) {
> - if (ranges[0] != pci_space)
> - break;
> - pci_next = of_read_number(ranges + 1, 2);
> - cpu_next = of_translate_address(dev, ranges + 3);
> - if (pci_next != pci_addr + size ||
> - cpu_next != cpu_addr + size)
> - break;
> - size += of_read_number(ranges + pna + 3, 2);
> - }
> -
> /* Act based on address space type */
> res = NULL;
> - switch ((pci_space >> 24) & 0x3) {
> - case 1: /* PCI IO space */
> + res_type = range.flags & IORESOURCE_TYPE_BITS;
> + if (res_type == IORESOURCE_IO) {
Why the change from switch() to an if/else if sequence?
But those are mostly nitpicks. If this is deferred to v3.10 then I would
suggest fixing them up and posting for another round of review.
g.
^ permalink raw reply
* Re: [PATCH v7 3/3] of/pci: mips: convert to common of_pci_range_parser
From: Jason Cooper @ 2013-04-18 13:09 UTC (permalink / raw)
To: Andrew Murray
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, Linus Walleij, Thierry Reding,
Liviu Dudau, Paul Mackerras, linux-samsung-soc,
Russell King - ARM Linux, Jingoo Han, Jason Gunthorpe,
Thomas Abraham, Arnd Bergmann,
devicetree-discuss@lists.ozlabs.org, rob.herring@calxeda.com,
Kukjin Kim, bhelgaas@google.com,
linux-arm-kernel@lists.infradead.org, Thomas Petazzoni,
Michal Simek, linux-kernel@vger.kernel.org,
suren.reddy@samsung.com, linuxppc-dev@lists.ozlabs.org list
In-Reply-To: <20130418125910.GA17128@arm.com>
On Thu, Apr 18, 2013 at 01:59:10PM +0100, Andrew Murray wrote:
> On Wed, Apr 17, 2013 at 04:42:48PM +0100, Linus Walleij wrote:
> > On Tue, Apr 16, 2013 at 12:18 PM, Andrew Murray <Andrew.Murray@arm.com> wrote:
> >
> > > This patch converts the pci_load_of_ranges function to use the new common
> > > of_pci_range_parser.
> > >
> > > Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
> > > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > > Reviewed-by: Rob Herring <rob.herring@calxeda.com>
> >
> > Tested-by: Linus Walleij <linus.walleij@linaro.org>
>
> Jason - you may not have seen this, but here (Linus Walleij) is another Tested-by
> to add to this patch in your tree (if you can).
Thanks, I saw it. Unfortunately, the PR was already sent, and the branch
is now pulled into arm-soc.
thx,
Jason.
^ permalink raw reply
* Re: [PATCH v7 1/3] of/pci: Unify pci_process_bridge_OF_ranges from Microblaze and PowerPC
From: Jason Cooper @ 2013-04-18 13:06 UTC (permalink / raw)
To: Grant Likely
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, linus.walleij@linaro.org,
thierry.reding@avionic-design.de, Liviu Dudau, Gregory CLEMENT,
paulus@samba.org, linux-samsung-soc@vger.kernel.org,
linux@arm.linux.org.uk, jg1.han@samsung.com,
jgunthorpe@obsidianresearch.com, thomas.abraham@linaro.org,
arnd@arndb.de, devicetree-discuss@lists.ozlabs.org,
rob.herring@calxeda.com, kgene.kim@samsung.com,
bh elgaas@g oogle.com, linux-arm-kernel@lists.infradead.org,
thomas.petazzoni@free-electrons.com, monstr@monstr.eu,
linux-kernel@vger.kernel.org, Andrew Lunn,
suren.reddy@samsung.com, Olof Johansson, Andrew Murray,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20130418124832.D2C733E118C@localhost>
On Thu, Apr 18, 2013 at 01:48:32PM +0100, Grant Likely wrote:
> On Wed, 17 Apr 2013 12:22:23 -0400, Jason Cooper <jason@lakedaemon.net> wrote:
> > On Wed, Apr 17, 2013 at 05:17:48PM +0100, Grant Likely wrote:
> > > On Wed, Apr 17, 2013 at 5:10 PM, Jason Cooper <jason@lakedaemon.net> wrote:
> > > > On Wed, Apr 17, 2013 at 05:00:15PM +0100, Grant Likely wrote:
> > > >> On Tue, 16 Apr 2013 11:30:06 +0100, Andrew Murray <andrew.murray@arm.com> wrote:
> > > >> > On Tue, Apr 16, 2013 at 11:18:26AM +0100, Andrew Murray wrote:
> > > >> > > The pci_process_bridge_OF_ranges function, used to parse the "ranges"
> > > >> > > property of a PCI host device, is found in both Microblaze and PowerPC
> > > >> > > architectures. These implementations are nearly identical. This patch
> > > >> > > moves this common code to a common place.
> > > >> > >
> > > >> > > Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
> > > >> > > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > > >> > > Reviewed-by: Rob Herring <rob.herring@calxeda.com>
> > > >> > > Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> > > >> > > Tested-by: Linus Walleij <linus.walleij@linaro.org>
> > > >> > > Acked-by: Michal Simek <monstr@monstr.eu>
> > > >> > > ---
> > > >> > > arch/microblaze/include/asm/pci-bridge.h | 5 +-
> > > >> > > arch/microblaze/pci/pci-common.c | 192 ----------------------------
> > > >> > > arch/powerpc/include/asm/pci-bridge.h | 5 +-
> > > >> > > arch/powerpc/kernel/pci-common.c | 192 ----------------------------
> > > >> >
> > > >> > Is there anyone on linuxppc-dev/linux-mips that can help test this patchset?
> > > >> >
> > > >> > I've tested that it builds on powerpc with a variety of configs (some which
> > > >> > include fsl_pci.c implementation). Though I don't have hardware to verify that
> > > >> > it works.
> > > >> >
> > > >> > I haven't tested this builds or runs on MIPS.
> > > >> >
> > > >> > You shouldn't see any difference in behaviour or new warnings and PCI devices
> > > >> > should continue to operate as before.
> > > >>
> > > >> I've got through a line-by-line comparison between powerpc, microblaze,
> > > >> and then new code. The differences are purely cosmetic, so I have
> > > >> absolutely no concerns about this patch. I've applied it to my tree.
> > > >
> > > > oops. Due to the number of dependencies the mvebu-pcie series has (this
> > > > being one of them, we (arm-soc/mvebu) asked if we could take this
> > > > through our tree. Rob Herring agreed to this several days ago. Is this
> > > > a problem for you?
> > > >
> > > > It would truly (dogs and cats living together) upset the apple cart for
> > > > us at this stage to pipe these through a different tree...
> > >
> > > Not a problem at all. I'll drop it.
> >
> > Great! Thanks.
>
> You can add my Acked-by: Grant Likely <grant.likely@linaro.org> to the
> first patch. I've not reviewed out the second or third patches yet.
>
> None of this appears to be in linux-next yet. I've boot tested on
> PowerPC, but that isn't the same as an ack by the PowerPC maintainers.
> It is probably too late for the whole now since we're after -rc7.
> However, if you ask me to, I have absolutely no problem putting the
> first patch into my tree for v3.10 merging. I went over the patch
> line-by-line and am convinced that it is functionally identical.
>
> Let me know if you need me to do this.
Thanks for the offer, Olof just pulled the branch last night (my PRs
were a little late trying to sort all this out), so it should be in
shortly.
FYI:
65ee348 of/pci: Unify pci_process_bridge_OF_ranges from Microblaze and PowerPC
is in arm-soc/mvebu/drivers as well as arm-soc/next/drivers (and
arm-soc/for-next if you want to test merging everything...)
thx,
Jason.
^ permalink raw reply
* Re: [PATCH v7 3/3] of/pci: mips: convert to common of_pci_range_parser
From: Andrew Murray @ 2013-04-18 12:59 UTC (permalink / raw)
To: Linus Walleij
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, Thierry Reding, Liviu Dudau,
Paul Mackerras, linux-samsung-soc, Russell King - ARM Linux,
Jingoo Han, Jason Gunthorpe, Thomas Abraham, jason, Arnd Bergmann,
devicetree-discuss@lists.ozlabs.org, rob.herring@calxeda.com,
Kukjin Kim, bhelgaas@google.com,
linux-arm-kernel@lists.infradead.org, Thomas Petazzoni,
Michal Simek, linux-kernel@vger.kernel.org,
suren.reddy@samsung.com, linuxppc-dev@lists.ozlabs.org list
In-Reply-To: <CACRpkdbBaT1OKr5t8HW4+8y_wSDmGxmewAyVMekx8S-K9s3K8Q@mail.gmail.com>
On Wed, Apr 17, 2013 at 04:42:48PM +0100, Linus Walleij wrote:
> On Tue, Apr 16, 2013 at 12:18 PM, Andrew Murray <Andrew.Murray@arm.com> wrote:
>
> > This patch converts the pci_load_of_ranges function to use the new common
> > of_pci_range_parser.
> >
> > Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > Reviewed-by: Rob Herring <rob.herring@calxeda.com>
>
> Tested-by: Linus Walleij <linus.walleij@linaro.org>
Jason - you may not have seen this, but here (Linus Walleij) is another Tested-by
to add to this patch in your tree (if you can).
Andrew Murray
^ permalink raw reply
* Re: [PATCH v7 1/3] of/pci: Unify pci_process_bridge_OF_ranges from Microblaze and PowerPC
From: Grant Likely @ 2013-04-18 12:48 UTC (permalink / raw)
To: Jason Cooper
Cc: linux-mips@linux-mips.org, siva.kallam@samsung.com,
linux-pci@vger.kernel.org, linus.walleij@linaro.org,
thierry.reding@avionic-design.de, Liviu Dudau, Gregory CLEMENT,
paulus@samba.org, linux-samsung-soc@vger.kernel.org,
linux@arm.linux.org.uk, jg1.han@samsung.com,
jgunthorpe@obsidianresearch.com, thomas.abraham@linaro.org,
arnd@arndb.de, devicetree-discuss@lists.ozlabs.org,
rob.herring@calxeda.com, kgene.kim@samsung.com,
bhelgaas@g oogle.com, linux-arm-kernel@lists.infradead.org,
thomas.petazzoni@free-electrons.com, monstr@monstr.eu,
linux-kernel@vger.kernel.org, Andrew Lunn,
suren.reddy@samsung.com, Olof Johansson, Andrew Murray,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20130417162223.GC27197@titan.lakedaemon.net>
On Wed, 17 Apr 2013 12:22:23 -0400, Jason Cooper <jason@lakedaemon.net> wrote:
> On Wed, Apr 17, 2013 at 05:17:48PM +0100, Grant Likely wrote:
> > On Wed, Apr 17, 2013 at 5:10 PM, Jason Cooper <jason@lakedaemon.net> wrote:
> > > On Wed, Apr 17, 2013 at 05:00:15PM +0100, Grant Likely wrote:
> > >> On Tue, 16 Apr 2013 11:30:06 +0100, Andrew Murray <andrew.murray@arm.com> wrote:
> > >> > On Tue, Apr 16, 2013 at 11:18:26AM +0100, Andrew Murray wrote:
> > >> > > The pci_process_bridge_OF_ranges function, used to parse the "ranges"
> > >> > > property of a PCI host device, is found in both Microblaze and PowerPC
> > >> > > architectures. These implementations are nearly identical. This patch
> > >> > > moves this common code to a common place.
> > >> > >
> > >> > > Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
> > >> > > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > >> > > Reviewed-by: Rob Herring <rob.herring@calxeda.com>
> > >> > > Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> > >> > > Tested-by: Linus Walleij <linus.walleij@linaro.org>
> > >> > > Acked-by: Michal Simek <monstr@monstr.eu>
> > >> > > ---
> > >> > > arch/microblaze/include/asm/pci-bridge.h | 5 +-
> > >> > > arch/microblaze/pci/pci-common.c | 192 ----------------------------
> > >> > > arch/powerpc/include/asm/pci-bridge.h | 5 +-
> > >> > > arch/powerpc/kernel/pci-common.c | 192 ----------------------------
> > >> >
> > >> > Is there anyone on linuxppc-dev/linux-mips that can help test this patchset?
> > >> >
> > >> > I've tested that it builds on powerpc with a variety of configs (some which
> > >> > include fsl_pci.c implementation). Though I don't have hardware to verify that
> > >> > it works.
> > >> >
> > >> > I haven't tested this builds or runs on MIPS.
> > >> >
> > >> > You shouldn't see any difference in behaviour or new warnings and PCI devices
> > >> > should continue to operate as before.
> > >>
> > >> I've got through a line-by-line comparison between powerpc, microblaze,
> > >> and then new code. The differences are purely cosmetic, so I have
> > >> absolutely no concerns about this patch. I've applied it to my tree.
> > >
> > > oops. Due to the number of dependencies the mvebu-pcie series has (this
> > > being one of them, we (arm-soc/mvebu) asked if we could take this
> > > through our tree. Rob Herring agreed to this several days ago. Is this
> > > a problem for you?
> > >
> > > It would truly (dogs and cats living together) upset the apple cart for
> > > us at this stage to pipe these through a different tree...
> >
> > Not a problem at all. I'll drop it.
>
> Great! Thanks.
You can add my Acked-by: Grant Likely <grant.likely@linaro.org> to the
first patch. I've not reviewed out the second or third patches yet.
None of this appears to be in linux-next yet. I've boot tested on
PowerPC, but that isn't the same as an ack by the PowerPC maintainers.
It is probably too late for the whole now since we're after -rc7.
However, if you ask me to, I have absolutely no problem putting the
first patch into my tree for v3.10 merging. I went over the patch
line-by-line and am convinced that it is functionally identical.
Let me know if you need me to do this.
g.
^ permalink raw reply
* [PATCH V3 0/5] powerpc, perf: BHRB based branch stack enablement on POWER8
From: Anshuman Khandual @ 2013-04-18 12:26 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
Branch History Rolling Buffer (BHRB) is a new PMU feaure in IBM
POWER8 processor which records the branch instructions inside the execution
pipeline. This patchset enables the basic functionality of the feature through
generic perf branch stack sampling framework.
Sample output
-------------
$./perf record -b top
$./perf report
Overhead Command Source Shared Object Source Symbol Target Shared Object Target Symbol
# ........ ....... .................... ...................................... .................... ...................................
#
7.82% top libc-2.11.2.so [k] _IO_vfscanf libc-2.11.2.so [k] _IO_vfscanf
6.17% top libc-2.11.2.so [k] _IO_vfscanf [unknown] [k] 00000000
2.37% top [unknown] [k] 0xf7aafb30 [unknown] [k] 00000000
1.80% top [unknown] [k] 0x0fe07978 libc-2.11.2.so [k] _IO_vfscanf
1.60% top libc-2.11.2.so [k] _IO_vfscanf [kernel.kallsyms] [k] .do_task_stat
1.20% top [kernel.kallsyms] [k] .do_task_stat [kernel.kallsyms] [k] .do_task_stat
1.02% top libc-2.11.2.so [k] vfprintf libc-2.11.2.so [k] vfprintf
0.92% top top [k] _init [unknown] [k] 0x0fe037f4
Changes in V2
--------------
- Added copyright messages to the newly created files
- Modified couple of commit messages
Changes in V3
-------------
- Incorporated review comments from Segher https://lkml.org/lkml/2013/4/16/350
- Worked on a solution for review comment from Michael Ellerman https://lkml.org/lkml/2013/4/17/548
- Could not move updated cpu_hw_events structure from core-book3s.c file into perf_event_server.h
Because perf_event_server.h is pulled in first inside linux/perf_event.h before the definition of
perf_branch_entry structure. Thats the reason why perf_branch_entry definition is not available
inside perf_event_server.h where we define the array inside cpu_hw_events structure.
- Finally have pulled in the code from perf_event_bhrb.c into core-book3s.c
- Improved documentation for the patchset
Anshuman Khandual (5):
powerpc, perf: Add new BHRB related instructions for POWER8
powerpc, perf: Add basic assembly code to read BHRB entries on POWER8
powerpc, perf: Add new BHRB related generic functions, data and flags
powerpc, perf: Define BHRB generic functions, data and flags for POWER8
powerpc, perf: Enable branch stack sampling framework
arch/powerpc/include/asm/perf_event_server.h | 7 ++
arch/powerpc/include/asm/ppc-opcode.h | 7 ++
arch/powerpc/perf/Makefile | 2 +-
arch/powerpc/perf/bhrb.S | 44 +++++++
arch/powerpc/perf/core-book3s.c | 167 ++++++++++++++++++++++++++-
arch/powerpc/perf/power8-pmu.c | 57 ++++++++-
6 files changed, 279 insertions(+), 5 deletions(-)
create mode 100644 arch/powerpc/perf/bhrb.S
--
1.7.11.7
^ permalink raw reply
* [PATCH V3 2/5] powerpc, perf: Add basic assembly code to read BHRB entries on POWER8
From: Anshuman Khandual @ 2013-04-18 12:26 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
In-Reply-To: <1366287976-3900-1-git-send-email-khandual@linux.vnet.ibm.com>
This patch adds the basic assembly code to read BHRB buffer. BHRB entries
are valid only after a PMU interrupt has happened (when MMCR0[PMAO]=1)
and BHRB has been freezed. BHRB read should not be attempted when it is
still enabled (MMCR0[PMAE]=1) and getting updated, as this can produce
non-deterministic results.
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/perf/Makefile | 2 +-
arch/powerpc/perf/bhrb.S | 44 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 45 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/perf/bhrb.S
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 472db18..510fae1 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -2,7 +2,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-$(CONFIG_PERF_EVENTS) += callchain.o
-obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o
+obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
power8-pmu.o
diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S
new file mode 100644
index 0000000..d85f9a5
--- /dev/null
+++ b/arch/powerpc/perf/bhrb.S
@@ -0,0 +1,44 @@
+/*
+ * Basic assembly code to read BHRB entries
+ *
+ * Copyright 2013 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+ .text
+
+.balign 8
+
+/* r3 = n (where n = [0-31])
+ * The maximum number of BHRB entries supported with PPC_MFBHRBE instruction
+ * is 1024. We have limited number of table entries here as POWER8 implements
+ * 32 BHRB entries.
+ */
+
+/* .global read_bhrb */
+_GLOBAL(read_bhrb)
+ cmpldi r3,31
+ bgt 1f
+ ld r4,bhrb_table@got(r2)
+ sldi r3,r3,3
+ add r3,r4,r3
+ mtctr r3
+ bctr
+1: li r3,0
+ blr
+
+#define MFBHRB_TABLE1(n) PPC_MFBHRBE(R3,n); blr
+#define MFBHRB_TABLE2(n) MFBHRB_TABLE1(n); MFBHRB_TABLE1(n+1)
+#define MFBHRB_TABLE4(n) MFBHRB_TABLE2(n); MFBHRB_TABLE2(n+2)
+#define MFBHRB_TABLE8(n) MFBHRB_TABLE4(n); MFBHRB_TABLE4(n+4)
+#define MFBHRB_TABLE16(n) MFBHRB_TABLE8(n); MFBHRB_TABLE8(n+8)
+#define MFBHRB_TABLE32(n) MFBHRB_TABLE16(n); MFBHRB_TABLE16(n+16)
+
+bhrb_table:
+ MFBHRB_TABLE32(0)
--
1.7.11.7
^ permalink raw reply related
* [PATCH V3 5/5] powerpc, perf: Enable branch stack sampling framework
From: Anshuman Khandual @ 2013-04-18 12:26 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
In-Reply-To: <1366287976-3900-1-git-send-email-khandual@linux.vnet.ibm.com>
Provides basic enablement for perf branch stack sampling framework on
POWER8 processor based platforms. Adds new BHRB related elements into
cpu_hw_event structure to represent current BHRB config, BHRB filter
configuration, manage context and to hold output BHRB buffer during
PMU interrupt before passing to the user space. This also enables
processing of BHRB data and converts them into generic perf branch
stack data format.
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/perf_event_server.h | 1 +
arch/powerpc/perf/core-book3s.c | 167 ++++++++++++++++++++++++++-
2 files changed, 165 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 3f0c15c..f265049 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -73,6 +73,7 @@ extern int register_power_pmu(struct power_pmu *);
struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long int read_bhrb(int n);
/*
* Only override the default definitions in include/linux/perf_event.h
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 4ac6e64..c627843 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -19,6 +19,11 @@
#include <asm/firmware.h>
#include <asm/ptrace.h>
+#define BHRB_MAX_ENTRIES 32
+#define BHRB_TARGET 0x0000000000000002
+#define BHRB_PREDICTION 0x0000000000000001
+#define BHRB_EA 0xFFFFFFFFFFFFFFFC
+
struct cpu_hw_events {
int n_events;
int n_percpu;
@@ -38,7 +43,15 @@ struct cpu_hw_events {
unsigned int group_flag;
int n_txn_start;
+
+ /* BHRB bits */
+ u64 bhrb_filter; /* BHRB HW branch filter */
+ int bhrb_users;
+ void *bhrb_context;
+ struct perf_branch_stack bhrb_stack;
+ struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
};
+
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
struct power_pmu *ppmu;
@@ -858,6 +871,9 @@ static void power_pmu_enable(struct pmu *pmu)
}
out:
+ if (cpuhw->bhrb_users)
+ ppmu->config_bhrb(cpuhw->bhrb_filter);
+
local_irq_restore(flags);
}
@@ -888,6 +904,47 @@ static int collect_events(struct perf_event *group, int max_count,
return n;
}
+/* Reset all possible BHRB entries */
+static void power_pmu_bhrb_reset(void)
+{
+ asm volatile(PPC_CLRBHRB);
+}
+
+void power_pmu_bhrb_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ if (!ppmu->bhrb_nr)
+ return;
+
+ /* Clear BHRB if we changed task context to avoid data leaks */
+ if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
+ power_pmu_bhrb_reset();
+ cpuhw->bhrb_context = event->ctx;
+ }
+ cpuhw->bhrb_users++;
+}
+
+void power_pmu_bhrb_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ if (!ppmu->bhrb_nr)
+ return;
+
+ cpuhw->bhrb_users--;
+ WARN_ON_ONCE(cpuhw->bhrb_users < 0);
+
+ if (!cpuhw->disabled && !cpuhw->bhrb_users) {
+ /* BHRB cannot be turned off when other
+ * events are active on the PMU.
+ */
+
+ /* avoid stale pointer */
+ cpuhw->bhrb_context = NULL;
+ }
+}
+
/*
* Add a event to the PMU.
* If all events are not already frozen, then we disable and
@@ -947,6 +1004,9 @@ nocheck:
ret = 0;
out:
+ if (has_branch_stack(event))
+ power_pmu_bhrb_enable(event);
+
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
@@ -999,6 +1059,9 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
}
+ if (has_branch_stack(event))
+ power_pmu_bhrb_disable(event);
+
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
@@ -1117,6 +1180,15 @@ int power_pmu_commit_txn(struct pmu *pmu)
return 0;
}
+/* Called from ctxsw to prevent one process's branch entries to
+ * mingle with the other process's entries during context switch.
+ */
+void power_pmu_flush_branch_stack(void)
+{
+ if (ppmu->bhrb_nr)
+ power_pmu_bhrb_reset();
+}
+
/*
* Return 1 if we might be able to put event on a limited PMC,
* or 0 if not.
@@ -1231,9 +1303,11 @@ static int power_pmu_event_init(struct perf_event *event)
if (!ppmu)
return -ENOENT;
- /* does not support taken branch sampling */
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
+ if (has_branch_stack(event)) {
+ /* PMU has BHRB enabled */
+ if (!(ppmu->flags & PPMU_BHRB))
+ return -EOPNOTSUPP;
+ }
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
@@ -1314,6 +1388,15 @@ static int power_pmu_event_init(struct perf_event *event)
cpuhw = &get_cpu_var(cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
+
+ if (has_branch_stack(event)) {
+ cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+ event->attr.branch_sample_type);
+
+ if(cpuhw->bhrb_filter == -1)
+ return -EOPNOTSUPP;
+ }
+
put_cpu_var(cpu_hw_events);
if (err)
return -EINVAL;
@@ -1372,8 +1455,79 @@ struct pmu power_pmu = {
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
.event_idx = power_pmu_event_idx,
+ .flush_branch_stack = power_pmu_flush_branch_stack,
};
+/* Processing BHRB entries */
+void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+{
+ u64 val;
+ u64 addr;
+ int r_index, u_index, target, pred;
+
+ r_index = 0;
+ u_index = 0;
+ while (r_index < ppmu->bhrb_nr) {
+ /* Assembly read function */
+ val = read_bhrb(r_index);
+
+ /* Terminal marker: End of valid BHRB entries */
+ if (val == 0) {
+ break;
+ } else {
+ /* BHRB field break up */
+ addr = val & BHRB_EA;
+ pred = val & BHRB_PREDICTION;
+ target = val & BHRB_TARGET;
+
+ /* Probable Missed entry: Not applicable for POWER8 */
+ if ((addr == 0) && (target == 0) && (pred == 1)) {
+ r_index++;
+ continue;
+ }
+
+ /* Real Missed entry: Power8 based missed entry */
+ if ((addr == 0) && (target == 1) && (pred == 1)) {
+ r_index++;
+ continue;
+ }
+
+ /* Reserved condition: Not a valid entry */
+ if ((addr == 0) && (target == 1) && (pred == 0)) {
+ r_index++;
+ continue;
+ }
+
+ /* Is a target address */
+ if (val & BHRB_TARGET) {
+ /* First address cannot be a target address */
+ if (r_index == 0) {
+ r_index++;
+ continue;
+ }
+
+ /* Update target address for the previous entry */
+ cpuhw->bhrb_entries[u_index - 1].to = addr;
+ cpuhw->bhrb_entries[u_index - 1].mispred = pred;
+ cpuhw->bhrb_entries[u_index - 1].predicted = ~pred;
+
+ /* Dont increment u_index */
+ r_index++;
+ } else {
+ /* Update address, flags for current entry */
+ cpuhw->bhrb_entries[u_index].from = addr;
+ cpuhw->bhrb_entries[u_index].mispred = pred;
+ cpuhw->bhrb_entries[u_index].predicted = ~pred;
+
+ /* Successfully popullated one entry */
+ u_index++;
+ r_index++;
+ }
+ }
+ }
+ cpuhw->bhrb_stack.nr = u_index;
+ return;
+}
/*
* A counter has overflowed; update its count and record
@@ -1433,6 +1587,13 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
perf_get_data_addr(regs, &data.addr);
+ if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ struct cpu_hw_events *cpuhw;
+ cpuhw = &__get_cpu_var(cpu_hw_events);
+ power_pmu_bhrb_read(cpuhw);
+ data.br_stack = &cpuhw->bhrb_stack;
+ }
+
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
}
--
1.7.11.7
^ permalink raw reply related
* [PATCH V3 1/5] powerpc, perf: Add new BHRB related instructions for POWER8
From: Anshuman Khandual @ 2013-04-18 12:26 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
In-Reply-To: <1366287976-3900-1-git-send-email-khandual@linux.vnet.ibm.com>
This patch adds new POWER8 instruction encoding for reading
the BHRB buffer entries and also clearing it. Encoding for
"clrbhrb" instruction is straight forward. But "mfbhrbe"
encoding involves reading a certain index of BHRB buffer
into a particular GPR register.
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/ppc-opcode.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 8752bc8..93ae5a1 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -82,6 +82,7 @@
#define __REGA0_R31 31
/* sorted alphabetically */
+#define PPC_INST_BHRBE 0x7c00025c
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
#define PPC_INST_DCBAL 0x7c2005ec
@@ -297,6 +298,12 @@
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
+/* BHRB instructions */
+#define PPC_CLRBHRB stringify_in_c(.long 0x7c00035c)
+#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_INST_BHRBE | \
+ __PPC_RS(r) | \
+ (((n) & 0x1f) << 11))
+
/* Transactional memory instructions */
#define TRECHKPT stringify_in_c(.long PPC_INST_TRECHKPT)
#define TRECLAIM(r) stringify_in_c(.long PPC_INST_TRECLAIM \
--
1.7.11.7
^ permalink raw reply related
* [PATCH V3 3/5] powerpc, perf: Add new BHRB related generic functions, data and flags
From: Anshuman Khandual @ 2013-04-18 12:26 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
In-Reply-To: <1366287976-3900-1-git-send-email-khandual@linux.vnet.ibm.com>
This patch adds couple of generic functions to power_pmu structure
which would configure the BHRB and it's filters. It also adds
representation of the number of BHRB entries present on the PMU.
A new PMU flag PPMU_BHRB would indicate presence of BHRB feature.
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/perf_event_server.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 57b42da..3f0c15c 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -33,6 +33,8 @@ struct power_pmu {
unsigned long *valp);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
+ u64 (*bhrb_filter_map)(u64 branch_sample_type);
+ void (*config_bhrb)(u64 pmu_bhrb_filter);
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
u32 flags;
@@ -42,6 +44,9 @@ struct power_pmu {
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+ /* BHRB entries in the PMU */
+ int bhrb_nr;
};
/*
@@ -54,6 +59,7 @@ struct power_pmu {
#define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */
#define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */
#define PPMU_HAS_SIER 0x00000040 /* Has SIER */
+#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */
/*
* Values for flags to get_alternatives()
--
1.7.11.7
^ permalink raw reply related
* [PATCH V3 4/5] powerpc, perf: Define BHRB generic functions, data and flags for POWER8
From: Anshuman Khandual @ 2013-04-18 12:26 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
In-Reply-To: <1366287976-3900-1-git-send-email-khandual@linux.vnet.ibm.com>
This patch populates BHRB specific data for power_pmu structure. It
also implements POWER8 specific BHRB filter and configuration functions.
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/perf/power8-pmu.c | 57 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 106ae0b..153408c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -109,6 +109,16 @@
#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */
+/* MMCRA IFM bits - POWER8 */
+#define POWER8_MMCRA_IFM1 0x0000000040000000UL
+#define POWER8_MMCRA_IFM2 0x0000000080000000UL
+#define POWER8_MMCRA_IFM3 0x00000000C0000000UL
+
+#define ONLY_PLM \
+ (PERF_SAMPLE_BRANCH_USER |\
+ PERF_SAMPLE_BRANCH_KERNEL |\
+ PERF_SAMPLE_BRANCH_HV)
+
/*
* Layout of constraint bits:
*
@@ -428,6 +438,48 @@ static int power8_generic_events[] = {
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
};
+static u64 power8_bhrb_filter_map(u64 branch_sample_type)
+{
+ u64 pmu_bhrb_filter = 0;
+ u64 br_privilege = branch_sample_type & ONLY_PLM;
+
+ /* BHRB and regular PMU events share the same prvillege state
+ * filter configuration. BHRB is always recorded along with a
+ * regular PMU event. So privilege state filter criteria for BHRB
+ * and the companion PMU events has to be the same. As a default
+ * "perf record" tool sets all privillege bits ON when no filter
+ * criteria is provided in the command line. So as along as all
+ * privillege bits are ON or they are OFF, we are good to go.
+ */
+ if ((br_privilege != 7) && (br_privilege != 0))
+ return -1;
+
+ /* No branch filter requested */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+ return pmu_bhrb_filter;
+
+ /* Invalid branch filter options - HW does not support */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
+ return pmu_bhrb_filter;
+ }
+
+ /* Every thing else is unsupported */
+ return -1;
+}
+
+static void power8_config_bhrb(u64 pmu_bhrb_filter)
+{
+ /* Enable BHRB filter in PMU */
+ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
static struct power_pmu power8_pmu = {
.name = "POWER8",
.n_counter = 6,
@@ -435,12 +487,15 @@ static struct power_pmu power8_pmu = {
.add_fields = POWER8_ADD_FIELDS,
.test_adder = POWER8_TEST_ADDER,
.compute_mmcr = power8_compute_mmcr,
+ .config_bhrb = power8_config_bhrb,
+ .bhrb_filter_map = power8_bhrb_filter_map,
.get_constraint = power8_get_constraint,
.disable_pmc = power8_disable_pmc,
- .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER,
+ .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB,
.n_generic = ARRAY_SIZE(power8_generic_events),
.generic_events = power8_generic_events,
.attr_groups = power8_pmu_attr_groups,
+ .bhrb_nr = 32,
};
static int __init init_power8_pmu(void)
--
1.7.11.7
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox