LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 3/3] soc: fsl: add RCPM driver
From: Ran Wang @ 2019-05-20  6:58 UTC (permalink / raw)
  To: Li Yang, Rob Herring, Mark Rutland, Pavel Machek
  Cc: Len Brown, devicetree, Greg Kroah-Hartman, linux-pm,
	Rafael J . Wysocki, linux-kernel, Ran Wang, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20190520065816.32360-1-ran.wang_1@nxp.com>

The NXP's QorIQ Processors based on ARM Core have RCPM module
(Run Control and Power Management), which performs all device-level
tasks associated with power management such as wakeup source control.

This driver depends on PM wakeup source framework which help to
collect wake information.

Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
---
Change in v3:
	- Some whitespace ajdustment.

Change in v2:
	- Rebase Kconfig and Makefile update to latest mainline.

 drivers/soc/fsl/Kconfig  |    8 +++
 drivers/soc/fsl/Makefile |    1 +
 drivers/soc/fsl/rcpm.c   |  124 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 133 insertions(+), 0 deletions(-)
 create mode 100644 drivers/soc/fsl/rcpm.c

diff --git a/drivers/soc/fsl/Kconfig b/drivers/soc/fsl/Kconfig
index 61f8e14..8e84e40 100644
--- a/drivers/soc/fsl/Kconfig
+++ b/drivers/soc/fsl/Kconfig
@@ -29,4 +29,12 @@ config FSL_MC_DPIO
 	  other DPAA2 objects. This driver does not expose the DPIO
 	  objects individually, but groups them under a service layer
 	  API.
+
+config FSL_RCPM
+	bool "Freescale RCPM support"
+	depends on PM_SLEEP
+	help
+	  The NXP's QorIQ Processors based on ARM Core have RCPM module
+	  (Run Control and Power Management), which performs all device-level
+	  tasks associated with power management, such as wakeup source control.
 endmenu
diff --git a/drivers/soc/fsl/Makefile b/drivers/soc/fsl/Makefile
index 803ef1b..c1be6ee 100644
--- a/drivers/soc/fsl/Makefile
+++ b/drivers/soc/fsl/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_QUICC_ENGINE)		+= qe/
 obj-$(CONFIG_CPM)			+= qe/
 obj-$(CONFIG_FSL_GUTS)			+= guts.o
 obj-$(CONFIG_FSL_MC_DPIO) 		+= dpio/
+obj-$(CONFIG_FSL_RCPM)		+= rcpm.o
diff --git a/drivers/soc/fsl/rcpm.c b/drivers/soc/fsl/rcpm.c
new file mode 100644
index 0000000..678d1cd
--- /dev/null
+++ b/drivers/soc/fsl/rcpm.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// rcpm.c - Freescale QorIQ RCPM driver
+//
+// Copyright 2019 NXP
+//
+// Author: Ran Wang <ran.wang_1@nxp.com>,
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/kernel.h>
+
+#define RCPM_WAKEUP_CELL_MAX_SIZE	7
+
+struct rcpm {
+	unsigned int	wakeup_cells;
+	void __iomem	*ippdexpcr_base;
+	bool		little_endian;
+};
+
+static int rcpm_pm_prepare(struct device *dev)
+{
+	struct device_node	*np = dev->of_node;
+	struct wakeup_source	*ws;
+	struct rcpm		*rcpm;
+	u32 value[RCPM_WAKEUP_CELL_MAX_SIZE + 1], tmp;
+	int i, ret;
+
+	rcpm = dev_get_drvdata(dev);
+	if (!rcpm)
+		return -EINVAL;
+
+	/* Begin with first registered wakeup source */
+	ws = wakeup_source_get_next(NULL);
+	while (ws) {
+		ret = device_property_read_u32_array(ws->attached_dev,
+				"fsl,rcpm-wakeup", value, rcpm->wakeup_cells + 1);
+
+		/*  Wakeup source should refer to current rcpm device */
+		if (ret || (np->phandle != value[0])) {
+			dev_info(dev, "%s doesn't refer to this rcpm\n",
+					ws->name);
+			ws = wakeup_source_get_next(ws);
+			continue;
+		}
+
+		for (i = 0; i < rcpm->wakeup_cells; i++) {
+			/* We can only OR related bits */
+			if (value[i + 1]) {
+				if (rcpm->little_endian) {
+					tmp = ioread32(rcpm->ippdexpcr_base + i * 4);
+					tmp |= value[i + 1];
+					iowrite32(tmp, rcpm->ippdexpcr_base + i * 4);
+				} else {
+					tmp = ioread32be(rcpm->ippdexpcr_base + i * 4);
+					tmp |= value[i + 1];
+					iowrite32be(tmp, rcpm->ippdexpcr_base + i * 4);
+				}
+			}
+		}
+		ws = wakeup_source_get_next(ws);
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops rcpm_pm_ops = {
+	.prepare =  rcpm_pm_prepare,
+};
+
+static int rcpm_probe(struct platform_device *pdev)
+{
+	struct device	*dev = &pdev->dev;
+	struct resource *r;
+	struct rcpm	*rcpm;
+	int ret;
+
+	rcpm = devm_kzalloc(dev, sizeof(*rcpm), GFP_KERNEL);
+	if (!rcpm)
+		return -ENOMEM;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r)
+		return -ENODEV;
+
+	rcpm->ippdexpcr_base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(rcpm->ippdexpcr_base)) {
+		ret =  PTR_ERR(rcpm->ippdexpcr_base);
+		return ret;
+	}
+
+	rcpm->little_endian = device_property_read_bool(
+			&pdev->dev, "little-endian");
+
+	ret = device_property_read_u32(&pdev->dev,
+			"fsl,#rcpm-wakeup-cells", &rcpm->wakeup_cells);
+	if (ret)
+		return ret;
+
+	dev_set_drvdata(&pdev->dev, rcpm);
+
+	return 0;
+}
+
+static const struct of_device_id rcpm_of_match[] = {
+	{ .compatible = "fsl,qoriq-rcpm-2.1+", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rcpm_of_match);
+
+static struct platform_driver rcpm_driver = {
+	.driver = {
+		.name = "rcpm",
+		.of_match_table = rcpm_of_match,
+		.pm	= &rcpm_pm_ops,
+	},
+	.probe = rcpm_probe,
+};
+
+module_platform_driver(rcpm_driver);
-- 
1.7.1


^ permalink raw reply related

* Re: PROBLEM: Power9: kernel oops on memory hotunplug from ppc64le guest
From: Nicholas Piggin @ 2019-05-20  7:00 UTC (permalink / raw)
  To: bharata
  Cc: bharata, linux-kernel, linux-next, aneesh.kumar, srikanth,
	linuxppc-dev
In-Reply-To: <20190520055622.GC22939@in.ibm.com>

Bharata B Rao's on May 20, 2019 3:56 pm:
> On Mon, May 20, 2019 at 02:48:35PM +1000, Nicholas Piggin wrote:
>> >> > git bisect points to
>> >> >
>> >> > commit 4231aba000f5a4583dd9f67057aadb68c3eca99d
>> >> > Author: Nicholas Piggin <npiggin@gmail.com>
>> >> > Date:   Fri Jul 27 21:48:17 2018 +1000
>> >> >
>> >> >     powerpc/64s: Fix page table fragment refcount race vs speculative references
>> >> >
>> >> >     The page table fragment allocator uses the main page refcount racily
>> >> >     with respect to speculative references. A customer observed a BUG due
>> >> >     to page table page refcount underflow in the fragment allocator. This
>> >> >     can be caused by the fragment allocator set_page_count stomping on a
>> >> >     speculative reference, and then the speculative failure handler
>> >> >     decrements the new reference, and the underflow eventually pops when
>> >> >     the page tables are freed.
>> >> >
>> >> >     Fix this by using a dedicated field in the struct page for the page
>> >> >     table fragment allocator.
>> >> >
>> >> >     Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage")
>> >> >     Cc: stable@vger.kernel.org # v3.10+
>> >> 
>> >> That's the commit that added the BUG_ON(), so prior to that you won't
>> >> see the crash.
>> > 
>> > Right, but the commit says it fixes page table page refcount underflow by
>> > introducing a new field &page->pt_frag_refcount. Now we are hitting the underflow
>> > for this pt_frag_refcount.
>> 
>> The fixed underflow is caused by a bug (race on page count) that got 
>> fixed by that patch. You are hitting a different underflow here. It's
>> not certain my patch caused it, I'm just trying to reproduce now.
> 
> Ok.

Can't reproduce I'm afraid, tried adding and removing 8GB memory from a
4GB guest (via host adding / removing memory device), and it just works.

It's likely to be an edge case like an off by one or rounding error
that just happens to trigger in your config. Might be easiest if you
could test with a debug patch.

Thanks,
Nick


^ permalink raw reply

* [PATCH v2] ocxl: Fix potential memory leak on context creation
From: Frederic Barrat @ 2019-05-20  7:16 UTC (permalink / raw)
  To: linuxppc-dev, andrew.donnellan, alastair; +Cc: clombard

If we couldn't fully init a context, we were leaking memory.

Fixes: b9721d275cc2 ("ocxl: Allow external drivers to use OpenCAPI contexts")
Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
---

Changelog:
v2: reset context pointer in case of allocation failure (Andrew)

 drivers/misc/ocxl/context.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index bab9c9364184..24e4fb010275 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -22,6 +22,8 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
 			afu->pasid_base + afu->pasid_max, GFP_KERNEL);
 	if (pasid < 0) {
 		mutex_unlock(&afu->contexts_lock);
+		kfree(*context);
+		*context = NULL;
 		return pasid;
 	}
 	afu->pasid_count++;
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH v2] ocxl: Fix potential memory leak on context creation
From: Andrew Donnellan @ 2019-05-20  7:18 UTC (permalink / raw)
  To: Frederic Barrat, linuxppc-dev, andrew.donnellan, alastair; +Cc: clombard
In-Reply-To: <20190520071618.1722-1-fbarrat@linux.ibm.com>

Acked-by: Andrew Donnellan <ajd@linux.ibm.com>

On 20/5/19 5:16 pm, Frederic Barrat wrote:
> If we couldn't fully init a context, we were leaking memory.
> 
> Fixes: b9721d275cc2 ("ocxl: Allow external drivers to use OpenCAPI contexts")
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
> 
> Changelog:
> v2: reset context pointer in case of allocation failure (Andrew)
> 
>   drivers/misc/ocxl/context.c | 2 ++
>   1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
> index bab9c9364184..24e4fb010275 100644
> --- a/drivers/misc/ocxl/context.c
> +++ b/drivers/misc/ocxl/context.c
> @@ -22,6 +22,8 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
>   			afu->pasid_base + afu->pasid_max, GFP_KERNEL);
>   	if (pasid < 0) {
>   		mutex_unlock(&afu->contexts_lock);
> +		kfree(*context);
> +		*context = NULL;
>   		return pasid;
>   	}
>   	afu->pasid_count++;
> 

-- 
Andrew Donnellan              OzLabs, ADL Canberra
ajd@linux.ibm.com             IBM Australia Limited


^ permalink raw reply

* Re: [PATCH v3 3/3] soc: fsl: add RCPM driver
From: Pavel Machek @ 2019-05-20  7:26 UTC (permalink / raw)
  To: Ran Wang
  Cc: Mark Rutland, Len Brown, devicetree, Greg Kroah-Hartman, linux-pm,
	Rafael J . Wysocki, linux-kernel, Li Yang, Rob Herring,
	linuxppc-dev, linux-arm-kernel
In-Reply-To: <20190520065816.32360-3-ran.wang_1@nxp.com>

[-- Attachment #1: Type: text/plain, Size: 866 bytes --]

Hi!

> The NXP's QorIQ Processors based on ARM Core have RCPM module
> (Run Control and Power Management), which performs all device-level
> tasks associated with power management such as wakeup source control.
> 
> This driver depends on PM wakeup source framework which help to
> collect wake information.
> 
> Signed-off-by: Ran Wang <ran.wang_1@nxp.com>

> +// Copyright 2019 NXP
> +//
> +// Author: Ran Wang <ran.wang_1@nxp.com>,

extra ,

> +	rcpm = dev_get_drvdata(dev);
> +	if (!rcpm)
> +		return -EINVAL;
> +
> +	/* Begin with first registered wakeup source */
> +	ws = wakeup_source_get_next(NULL);
> +	while (ws) {

while (ws = wakeup_source_get_next(NULL))

?

								Pavel
								
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply

* RE: [PATCH v3 3/3] soc: fsl: add RCPM driver
From: Ran Wang @ 2019-05-20  7:39 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Mark Rutland, Len Brown, devicetree@vger.kernel.org,
	Greg Kroah-Hartman, linux-pm@vger.kernel.org, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Leo Li, Rob Herring,
	linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190520072630.GA3674@amd>

Hi Pavel,

On Monday, May 20, 2019 15:27: Pavel Machek wrote:
> 
> Hi!
> 
> > The NXP's QorIQ Processors based on ARM Core have RCPM module (Run
> > Control and Power Management), which performs all device-level tasks
> > associated with power management such as wakeup source control.
> >
> > This driver depends on PM wakeup source framework which help to
> > collect wake information.
> >
> > Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
> 
> > +// Copyright 2019 NXP
> > +//
> > +// Author: Ran Wang <ran.wang_1@nxp.com>,
> 
> extra ,

OK, will update.

> > +	rcpm = dev_get_drvdata(dev);
> > +	if (!rcpm)
> > +		return -EINVAL;
> > +
> > +	/* Begin with first registered wakeup source */
> > +	ws = wakeup_source_get_next(NULL);
> > +	while (ws) {
> 
> while (ws = wakeup_source_get_next(NULL))
> 
> ?
I just answered this in v2 mail thread: 
"Actually, we only pass NULL to wakeup_source_get_next()
at very first call to get 1st wakeup source. Then in the while
loop, we will fetch next source but not 1st, that's different.
I am afraid your suggestion is not quite correct."

Regards
Ran

^ permalink raw reply

* RE: [EXT] Re: [PATCH 2/3] arm64: dts: ls1028a: Add PCIe controller DT nodes
From: Xiaowei Bao @ 2019-05-20  8:12 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Mark Rutland, Roy Zang, Lorenzo Pieralisi, DTML, gregkh,
	Kate Stewart, linuxppc-dev, linux-pci, Linux Kernel Mailing List,
	Kishon, M.h. Lian, Rob Herring, Linux ARM, Philippe Ombredanne,
	Bjorn Helgaas, Leo Li, Shawn Guo, Shawn Lin, Mingkai Hu
In-Reply-To: <CAK8P3a0kKb7njiJvUkwJYwf-yc-hEyErSiWcvbdf0XnMoctzrg@mail.gmail.com>

Hi Arndt,

-----Original Message-----
From: Arnd Bergmann <arnd@arndb.de> 
Sent: 2019年5月17日 16:59
To: Xiaowei Bao <xiaowei.bao@nxp.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>; Rob Herring <robh+dt@kernel.org>; Mark Rutland <mark.rutland@arm.com>; Shawn Guo <shawnguo@kernel.org>; Leo Li <leoyang.li@nxp.com>; Kishon <kishon@ti.com>; Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>; gregkh <gregkh@linuxfoundation.org>; M.h. Lian <minghuan.lian@nxp.com>; Mingkai Hu <mingkai.hu@nxp.com>; Roy Zang <roy.zang@nxp.com>; Kate Stewart <kstewart@linuxfoundation.org>; Philippe Ombredanne <pombredanne@nexb.com>; Shawn Lin <shawn.lin@rock-chips.com>; linux-pci <linux-pci@vger.kernel.org>; DTML <devicetree@vger.kernel.org>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Subject: Re: [EXT] Re: [PATCH 2/3] arm64: dts: ls1028a: Add PCIe controller DT nodes

Caution: EXT Email

On Fri, May 17, 2019 at 5:21 AM Xiaowei Bao <xiaowei.bao@nxp.com> wrote:
> -----Original Message-----
> From: Arnd Bergmann <arnd@arndb.de>
> On Wed, May 15, 2019 at 9:36 AM Xiaowei Bao <xiaowei.bao@nxp.com> wrote:
> > Signed-off-by: Xiaowei Bao <xiaowei.bao@nxp.com>
> > ---
> >  arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi |   52 ++++++++++++++++++++++++
> >  1 files changed, 52 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi 
> > b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
> > index b045812..50b579b 100644
> > --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
> > +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
> > @@ -398,6 +398,58 @@
> >                         status = "disabled";
> >                 };
> >
> > +               pcie@3400000 {
> > +                       compatible = "fsl,ls1028a-pcie";
> > +                       reg = <0x00 0x03400000 0x0 0x00100000   /* controller registers */
> > +                              0x80 0x00000000 0x0 0x00002000>; /* configuration space */
> > +                       reg-names = "regs", "config";
> > +                       interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* PME interrupt */
> > +                                    <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>; /* aer interrupt */
> > +                       interrupt-names = "pme", "aer";
> > +                       #address-cells = <3>;
> > +                       #size-cells = <2>;
> > +                       device_type = "pci";
> > +                       dma-coherent;
> > +                       num-lanes = <4>;
> > +                       bus-range = <0x0 0xff>;
> > +                       ranges = <0x81000000 0x0 0x00000000 0x80 0x00010000 0x0 0x00010000   /* downstream I/O */
> > +                                 0x82000000 0x0 0x40000000 0x80 
> > + 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
>
> Are you sure there is no support for 64-bit BARs or prefetchable memory?
> [Xiaowei Bao] sorry for late reply, Thought that our Layerscape platform has not added prefetchable memory support in DTS, so this platform has not been added, I will submit a separate patch to add prefetchable memory support for all Layerscape platforms.

Ok, thanks.

> Of course, the prefetchable PCIE device can work in our boards, 
> because the RC will assign non-prefetchable memory for this device. We 
> reserve 1G no-prefetchable memory for PCIE device, it is enough for general devices.

Sure, many devices work just fine, this is mostly a question of supporting those devices that do require multiple gigabytes, or that need prefetchable memory semantics to get the expected performance. GPUs are the obvious example, but I think there are others (infiniband?).
[Xiaowei Bao] sorry, I don't know much about infiniband and GPU, as you said, I think many devices works fine with this DTS, I will add the prefetchable memory entry in DTS future and submit another patch.

      Arnd

^ permalink raw reply

* [PATCH v2] mm/nvdimm: Pick the right alignment default when creating dax devices
From: Aneesh Kumar K.V @ 2019-05-20  8:14 UTC (permalink / raw)
  To: dan.j.williams; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V, linux-nvdimm

Allow arch to provide the supported alignments and use hugepage alignment only
if we support hugepage. Right now we depend on compile time configs whereas this
patch switch this to runtime discovery.

Architectures like ppc64 can have THP enabled in code, but then can have
hugepage size disabled by the hypervisor. This allows us to create dax devices
with PAGE_SIZE alignment in this case.

Existing dax namespace with alignment larger than PAGE_SIZE will fail to
initialize in this specific case. We still allow fsdax namespace initialization.

With respect to identifying whether to enable hugepage fault for a dax device,
if THP is enabled during compile, we default to taking hugepage fault and in dax
fault handler if we find the fault size > alignment we retry with PAGE_SIZE
fault size.

This also addresses the below failure scenario on ppc64

ndctl create-namespace --mode=devdax  | grep align
 "align":16777216,
 "align":16777216

cat /sys/devices/ndbus0/region0/dax0.0/supported_alignments
 65536 16777216

daxio.static-debug  -z -o /dev/dax0.0
  Bus error (core dumped)

  $ dmesg | tail
   lpar: Failed hash pte insert with error -4
   hash-mmu: mm: Hashing failure ! EA=0x7fff17000000 access=0x8000000000000006 current=daxio
   hash-mmu:     trap=0x300 vsid=0x22cb7a3 ssize=1 base psize=2 psize 10 pte=0xc000000501002b86
   daxio[3860]: bus error (7) at 7fff17000000 nip 7fff973c007c lr 7fff973bff34 code 2 in libpmem.so.1.0.0[7fff973b0000+20000]
   daxio[3860]: code: 792945e4 7d494b78 e95f0098 7d494b78 f93f00a0 4800012c e93f0088 f93f0120
   daxio[3860]: code: e93f00a0 f93f0128 e93f0120 e95f0128 <f9490000> e93f0088 39290008 f93f0110

The failure was due to guest kernel using wrong page size.

The namespaces created with 16M alignment will appear as below on a config with
16M page size disabled.

$ ndctl list -Ni
[
  {
    "dev":"namespace0.1",
    "mode":"fsdax",
    "map":"dev",
    "size":5351931904,
    "uuid":"fc6e9667-461a-4718-82b4-69b24570bddb",
    "align":16777216,
    "blockdev":"pmem0.1",
    "supported_alignments":[
      65536
    ]
  },
  {
    "dev":"namespace0.0",
    "mode":"fsdax",    <==== devdax 16M alignment marked disabled.
    "map":"mem",
    "size":5368709120,
    "uuid":"a4bdf81a-f2ee-4bc6-91db-7b87eddd0484",
    "state":"disabled"
  }
]

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---

Changes from V1:
* Return -EOPNOTSUPP if we find that we can't initialize the namespace
* Compare fix for DAX signature.

 arch/powerpc/include/asm/libnvdimm.h |  9 ++++++++
 arch/powerpc/mm/Makefile             |  1 +
 arch/powerpc/mm/nvdimm.c             | 34 ++++++++++++++++++++++++++++
 arch/x86/include/asm/libnvdimm.h     | 19 ++++++++++++++++
 drivers/nvdimm/nd.h                  |  6 -----
 drivers/nvdimm/pfn_devs.c            | 32 +++++++++++++++++++++++++-
 drivers/nvdimm/pmem.c                | 26 +++++++++++++++++----
 include/linux/huge_mm.h              |  7 +++++-
 8 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 arch/powerpc/include/asm/libnvdimm.h
 create mode 100644 arch/powerpc/mm/nvdimm.c
 create mode 100644 arch/x86/include/asm/libnvdimm.h

diff --git a/arch/powerpc/include/asm/libnvdimm.h b/arch/powerpc/include/asm/libnvdimm.h
new file mode 100644
index 000000000000..d35fd7f48603
--- /dev/null
+++ b/arch/powerpc/include/asm/libnvdimm.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_LIBNVDIMM_H
+#define _ASM_POWERPC_LIBNVDIMM_H
+
+#define nd_pfn_supported_alignments nd_pfn_supported_alignments
+extern unsigned long *nd_pfn_supported_alignments(void);
+extern unsigned long nd_pfn_default_alignment(void);
+
+#endif
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 0f499db315d6..42e4a399ba5d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -20,3 +20,4 @@ obj-$(CONFIG_HIGHMEM)		+= highmem.o
 obj-$(CONFIG_PPC_COPRO_BASE)	+= copro_fault.o
 obj-$(CONFIG_PPC_PTDUMP)	+= ptdump/
 obj-$(CONFIG_KASAN)		+= kasan/
+obj-$(CONFIG_NVDIMM_PFN)		+= nvdimm.o
diff --git a/arch/powerpc/mm/nvdimm.c b/arch/powerpc/mm/nvdimm.c
new file mode 100644
index 000000000000..a29a4510715e
--- /dev/null
+++ b/arch/powerpc/mm/nvdimm.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+
+#include <linux/mm.h>
+/*
+ * We support only pte and pmd mappings for now.
+ */
+const unsigned long *nd_pfn_supported_alignments(void)
+{
+	static unsigned long supported_alignments[3];
+
+	supported_alignments[0] = PAGE_SIZE;
+
+	if (has_transparent_hugepage())
+		supported_alignments[1] = HPAGE_PMD_SIZE;
+	else
+		supported_alignments[1] = 0;
+
+	supported_alignments[2] = 0;
+	return supported_alignments;
+}
+
+/*
+ * Use pmd mapping if supported as default alignment
+ */
+unsigned long nd_pfn_default_alignment(void)
+{
+
+	if (has_transparent_hugepage())
+		return HPAGE_PMD_SIZE;
+	return PAGE_SIZE;
+}
diff --git a/arch/x86/include/asm/libnvdimm.h b/arch/x86/include/asm/libnvdimm.h
new file mode 100644
index 000000000000..3d5361db9164
--- /dev/null
+++ b/arch/x86/include/asm/libnvdimm.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_LIBNVDIMM_H
+#define _ASM_X86_LIBNVDIMM_H
+
+static inline unsigned long nd_pfn_default_alignment(void)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	return HPAGE_PMD_SIZE;
+#else
+	return PAGE_SIZE;
+#endif
+}
+
+static inline unsigned long nd_altmap_align_size(unsigned long nd_align)
+{
+	return PMD_SIZE;
+}
+
+#endif
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index a5ac3b240293..44fe923b2ee3 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -292,12 +292,6 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
 struct nd_pfn *to_nd_pfn(struct device *dev);
 #if IS_ENABLED(CONFIG_NVDIMM_PFN)
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE
-#else
-#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE
-#endif
-
 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
 bool is_nd_pfn(struct device *dev);
 struct device *nd_pfn_create(struct nd_region *nd_region);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 01f40672507f..cd0969ccbe30 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <asm/libnvdimm.h>
 #include "nd-core.h"
 #include "pfn.h"
 #include "nd.h"
@@ -111,6 +112,8 @@ static ssize_t align_show(struct device *dev,
 	return sprintf(buf, "%ld\n", nd_pfn->align);
 }
 
+#ifndef nd_pfn_supported_alignments
+#define nd_pfn_supported_alignments nd_pfn_supported_alignments
 static const unsigned long *nd_pfn_supported_alignments(void)
 {
 	/*
@@ -133,6 +136,7 @@ static const unsigned long *nd_pfn_supported_alignments(void)
 
 	return data;
 }
+#endif
 
 static ssize_t align_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
@@ -310,7 +314,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
 		return NULL;
 
 	nd_pfn->mode = PFN_MODE_NONE;
-	nd_pfn->align = PFN_DEFAULT_ALIGNMENT;
+	nd_pfn->align = nd_pfn_default_alignment();
 	dev = &nd_pfn->dev;
 	device_initialize(&nd_pfn->dev);
 	if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
@@ -420,6 +424,20 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
 	return 0;
 }
 
+static bool nd_supported_alignment(unsigned long align)
+{
+	int i;
+	const unsigned long *supported = nd_pfn_supported_alignments();
+
+	if (align == 0)
+		return false;
+
+	for (i = 0; supported[i]; i++)
+		if (align == supported[i])
+			return true;
+	return false;
+}
+
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
 	u64 checksum, offset;
@@ -474,6 +492,18 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		align = 1UL << ilog2(offset);
 	mode = le32_to_cpu(pfn_sb->mode);
 
+	/*
+	 * Check whether the we support the alignment. For Dax if the
+	 * superblock alignment is not matching, we won't initialize
+	 * the device.
+	 */
+	if (!nd_supported_alignment(align) &&
+	    !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) {
+		dev_err(&nd_pfn->dev, "init failed, settings mismatch\n");
+		dev_dbg(&nd_pfn->dev, "align: %lx:%lx\n", nd_pfn->align, align);
+		return -EOPNOTSUPP;
+	}
+
 	if (!nd_pfn->uuid) {
 		/*
 		 * When probing a namepace via nd_pfn_probe() the uuid
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 845c5b430cdd..406427c064d9 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -490,6 +490,7 @@ static int pmem_attach_disk(struct device *dev,
 
 static int nd_pmem_probe(struct device *dev)
 {
+	int ret;
 	struct nd_namespace_common *ndns;
 
 	ndns = nvdimm_namespace_common_probe(dev);
@@ -505,12 +506,29 @@ static int nd_pmem_probe(struct device *dev)
 	if (is_nd_pfn(dev))
 		return pmem_attach_disk(dev, ndns);
 
-	/* if we find a valid info-block we'll come back as that personality */
-	if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0
-			|| nd_dax_probe(dev, ndns) == 0)
+	ret = nd_btt_probe(dev, ndns);
+	if (ret == 0)
 		return -ENXIO;
+	else if (ret == -EOPNOTSUPP)
+		return ret;
 
-	/* ...otherwise we're just a raw pmem device */
+	ret = nd_pfn_probe(dev, ndns);
+	if (ret == 0)
+		return -ENXIO;
+	else if (ret == -EOPNOTSUPP)
+		return ret;
+
+	ret = nd_dax_probe(dev, ndns);
+	if (ret == 0)
+		return -ENXIO;
+	else if (ret == -EOPNOTSUPP)
+		return ret;
+	/*
+	 * We have two failure conditions here, there is no
+	 * info reserver block or we found a valid info reserve block
+	 * but failed to initialize the pfn superblock.
+	 * Don't create a raw pmem disk for the second case.
+	 */
 	return pmem_attach_disk(dev, ndns);
 }
 
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 381e872bfde0..d5cfea3d8b86 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -110,7 +110,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 
 	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
 		return true;
-
+	/*
+	 * For dax let's try to do hugepage fault always. If we don't support
+	 * hugepages we will not have enabled namespaces with hugepage alignment.
+	 * This also means we try to handle hugepage fault on device with
+	 * smaller alignment. But for then we will return with VM_FAULT_FALLBACK
+	 */
 	if (vma_is_dax(vma))
 		return true;
 
-- 
2.21.0


^ permalink raw reply related

* Re: PROBLEM: Power9: kernel oops on memory hotunplug from ppc64le guest
From: Bharata B Rao @ 2019-05-20  8:20 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: bharata, linux-kernel, linux-next, aneesh.kumar, srikanth,
	linuxppc-dev
In-Reply-To: <1558335484.9inx69a7ea.astroid@bobo.none>

On Mon, May 20, 2019 at 05:00:21PM +1000, Nicholas Piggin wrote:
> Bharata B Rao's on May 20, 2019 3:56 pm:
> > On Mon, May 20, 2019 at 02:48:35PM +1000, Nicholas Piggin wrote:
> >> >> > git bisect points to
> >> >> >
> >> >> > commit 4231aba000f5a4583dd9f67057aadb68c3eca99d
> >> >> > Author: Nicholas Piggin <npiggin@gmail.com>
> >> >> > Date:   Fri Jul 27 21:48:17 2018 +1000
> >> >> >
> >> >> >     powerpc/64s: Fix page table fragment refcount race vs speculative references
> >> >> >
> >> >> >     The page table fragment allocator uses the main page refcount racily
> >> >> >     with respect to speculative references. A customer observed a BUG due
> >> >> >     to page table page refcount underflow in the fragment allocator. This
> >> >> >     can be caused by the fragment allocator set_page_count stomping on a
> >> >> >     speculative reference, and then the speculative failure handler
> >> >> >     decrements the new reference, and the underflow eventually pops when
> >> >> >     the page tables are freed.
> >> >> >
> >> >> >     Fix this by using a dedicated field in the struct page for the page
> >> >> >     table fragment allocator.
> >> >> >
> >> >> >     Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage")
> >> >> >     Cc: stable@vger.kernel.org # v3.10+
> >> >> 
> >> >> That's the commit that added the BUG_ON(), so prior to that you won't
> >> >> see the crash.
> >> > 
> >> > Right, but the commit says it fixes page table page refcount underflow by
> >> > introducing a new field &page->pt_frag_refcount. Now we are hitting the underflow
> >> > for this pt_frag_refcount.
> >> 
> >> The fixed underflow is caused by a bug (race on page count) that got 
> >> fixed by that patch. You are hitting a different underflow here. It's
> >> not certain my patch caused it, I'm just trying to reproduce now.
> > 
> > Ok.
> 
> Can't reproduce I'm afraid, tried adding and removing 8GB memory from a
> 4GB guest (via host adding / removing memory device), and it just works.

Boot, add 8G, reboot, remove 8G is the sequence to reproduce.

> 
> It's likely to be an edge case like an off by one or rounding error
> that just happens to trigger in your config. Might be easiest if you
> could test with a debug patch.

Sure, I will continue debugging.

Regards,
Bharata.


^ permalink raw reply

* Re: [PATCH V2 3/3] soc: fsl: add RCPM driver
From: Pavel Machek @ 2019-05-20  8:56 UTC (permalink / raw)
  To: Ran Wang
  Cc: Mark Rutland, Len Brown, devicetree@vger.kernel.org, Pavel Machek,
	linux-pm@vger.kernel.org, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Leo Li, Rob Herring,
	Greg Kroah-Hartman, linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <AM5PR0402MB2865EC5E1EF12C6C1D3C5566F1060@AM5PR0402MB2865.eurprd04.prod.outlook.com>

[-- Attachment #1: Type: text/plain, Size: 1172 bytes --]

Hi!

> > > +static int rcpm_pm_prepare(struct device *dev) {
> > > +	struct device_node *np = dev->of_node;
> > > +	struct wakeup_source *ws;
> > > +	struct rcpm *rcpm;
> > > +	u32 value[RCPM_WAKEUP_CELL_MAX_SIZE + 1], tmp;
> > > +	int i, ret;
> > > +
> > > +	rcpm = dev_get_drvdata(dev);
> > > +	if (!rcpm)
> > > +		return -EINVAL;
> > > +
> > > +	/* Begin with first registered wakeup source */
> > > +	ws = wakeup_source_get_next(NULL);
> > > +	while (ws) {
> > 
> > while (ws = wakeup_source_get_next(NULL)) ?
> 
> Actually, we only pass NULL to wakeup_source_get_next() at very first
> call to get 1st wakeup source. Then in the while loop, we will fetch
> next source but not 1st, that's different. I am afraid your suggestion
> is not quite correct.

Sorry, I seen your next version before seeing this explanation.

You are right, but the current code is "interesting". What about

    ws = NULL;
    while (ws = wakeup_source_get_next(NULL)) ...

then?

Best regards,
								Pavel
-- 
DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply

* Re: [RFC PATCH 4/4] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel
From: Naveen N. Rao @ 2019-05-20  8:57 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Steven Rostedt
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1558144594.f92d1rcmes.astroid@bobo.none>

Nicholas Piggin wrote:
> Naveen N. Rao's on May 18, 2019 5:02 am:
>> With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
>> enable function tracing and profiling. So far, with dynamic ftrace, we
>> used to only patch out the branch to _mcount(). However, Nick Piggin
>> points out that "mflr is executed by the branch unit that can only
>> execute one per cycle on POWER9 and shared with branches, so it would be
>> nice to avoid it where possible."
>> 
>> We cannot simply nop out the mflr either. Michael Ellerman pointed out
>> that when enabling function tracing, there can be a race if tracing is
>> enabled when some thread was interrupted after executing a nop'ed out
>> mflr. In this case, the thread would execute the now-patched-in branch
>> to _mcount() without having executed the preceding mflr.
>> 
>> To solve this, we now enable function tracing in 2 steps: patch in the
>> mflr instruction, use synchronize_rcu_tasks() to ensure all existing
>> threads make progress, and then patch in the branch to _mcount(). We
>> override ftrace_replace_code() with a powerpc64 variant for this
>> purpose.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
> 
> Nice! Thanks for doing a real patch. You needn't add my SOB there: my
> hack was obviously garbage :) Suggested-by if anything, then for
> clarity of changelog you can write the motivation directly rather than
> quote me.

Thanks, I meant to call out the fact that I had added your SOB before
sending the patch, but missed doing so. Your patch was perfectly fine ;)

> 
> I don't know the ftrace subsystem well, but the powerpc instructions
> and patching sequence appears to match what we agreed is the right way
> to go.
> 
> As a suggestion, I would perhaps add most of information from the
> second and third paragraphs of the changelog into comments
> (and also explain that the lone mflr r0 is harmless).
> 
> But otherwise it looks good
> 
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

Thanks, I will incorporate those changes.


- Naveen



^ permalink raw reply

* [PATCH] powerpc/powernv: Return for invalid IMC domain
From: Anju T Sudhakar @ 2019-05-20  8:57 UTC (permalink / raw)
  To: mpe; +Cc: pavsubra, maddy, linuxppc-dev, anju

Currently init_imc_pmu() can be failed either because
an IMC unit with invalid domain(i.e an IMC node not
supported by the kernel) is attempted a pmu-registration
or something went wrong while registering a valid IMC unit.
In both the cases kernel provides a 'Registration failed'
error message.

Example:
Log message, when trace-imc node is not supported by the kernel, and the
skiboot supports trace-imc node.

So for kernel, trace-imc node is now an unknown domain.

[    1.731870] nest_phb5_imc performance monitor hardware support registered
[    1.731944] nest_powerbus0_imc performance monitor hardware support registered
[    1.734458] thread_imc performance monitor hardware support registered
[    1.734460] IMC Unknown Device type
[    1.734462] IMC PMU (null) Register failed
[    1.734558] nest_xlink0_imc performance monitor hardware support registered
[    1.734614] nest_xlink1_imc performance monitor hardware support registered
[    1.734670] nest_xlink2_imc performance monitor hardware support registered
[    1.747043] Initialise system trusted keyrings
[    1.747054] Key type blacklist registered


To avoid ambiguity on the error message, return for invalid domain
before attempting a pmu registration. 

Fixes: 8f95faaac56c1 (`powerpc/powernv: Detect and create IMC device`)
Reported-by: Pavaman Subramaniyam <pavsubra@in.ibm.com>
Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/opal-imc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 58a0794..4e8b0e1 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -161,6 +161,10 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
 	struct imc_pmu *pmu_ptr;
 	u32 offset;
 
+	/* Return for unknown domain */
+	if (domain < 0)
+		return -EINVAL;
+
 	/* memory for pmu */
 	pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
 	if (!pmu_ptr)
-- 
1.8.3.1


^ permalink raw reply related

* RE: [PATCH V2 3/3] soc: fsl: add RCPM driver
From: Ran Wang @ 2019-05-20  9:03 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Mark Rutland, Len Brown, devicetree@vger.kernel.org,
	Greg Kroah-Hartman, linux-pm@vger.kernel.org, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Leo Li, Rob Herring,
	linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190520085647.GA9748@amd>

Hi Pavel,

On Monday, May 20, 2019 16:57, Pavel Machek wrote:
> 
> Hi!
> 
> > > > +static int rcpm_pm_prepare(struct device *dev) {
> > > > +	struct device_node *np = dev->of_node;
> > > > +	struct wakeup_source *ws;
> > > > +	struct rcpm *rcpm;
> > > > +	u32 value[RCPM_WAKEUP_CELL_MAX_SIZE + 1], tmp;
> > > > +	int i, ret;
> > > > +
> > > > +	rcpm = dev_get_drvdata(dev);
> > > > +	if (!rcpm)
> > > > +		return -EINVAL;
> > > > +
> > > > +	/* Begin with first registered wakeup source */
> > > > +	ws = wakeup_source_get_next(NULL);
> > > > +	while (ws) {
> > >
> > > while (ws = wakeup_source_get_next(NULL)) ?
> >
> > Actually, we only pass NULL to wakeup_source_get_next() at very first
> > call to get 1st wakeup source. Then in the while loop, we will fetch
> > next source but not 1st, that's different. I am afraid your suggestion
> > is not quite correct.
> 
> Sorry, I seen your next version before seeing this explanation.
> 
> You are right, but the current code is "interesting". What about
> 
>     ws = NULL;
>     while (ws = wakeup_source_get_next(NULL)) ...
> 
> then?

Did you mean:
     ws = NULL;
     while (ws = wakeup_source_get_next(ws)) ...

   Yes, that will be the same to my original logic, do you recommend to change
to this? :)

Regards,
Ran

^ permalink raw reply

* [PATCH] powerpc/perf: Use cpumask_last() to determine the
From: Anju T Sudhakar @ 2019-05-20  9:05 UTC (permalink / raw)
  To: mpe; +Cc: aravinda, maddy, linuxppc-dev, anju, ego

Nest and core imc(In-memory Collection counters) assigns a particular
cpu as the designated target for counter data collection.
During system boot, the first online cpu in a chip gets assigned as
the designated cpu for that chip(for nest-imc) and the first online cpu
in a core gets assigned as the designated cpu for that core(for core-imc).

If the designated cpu goes offline, the next online cpu from the same
chip(for nest-imc)/core(for core-imc) is assigned as the next target,
and the event context is migrated to the target cpu.
Currently, cpumask_any_but() function is used to find the target cpu.
Though this function is expected to return a `random` cpu, this always
returns the next online cpu.

If all cpus in a chip/core is offlined in a sequential manner, starting
from the first cpu, the event migration has to happen for all the cpus
which goes offline. Since the migration process involves a grace period,
the total time taken to offline all the cpus will be significantly high.

Example:
In a system which has 2 sockets, with
NUMA node0 CPU(s):     0-87
NUMA node8 CPU(s):     88-175

Time taken to offline cpu 88-175:
real    2m56.099s
user    0m0.191s
sys     0m0.000s

Use cpumask_last() to choose the target cpu, when the designated cpu
goes online, so the migration will happen only when the last_cpu in the
mask goes offline. This way the time taken to offline all cpus in a
chip/core can be reduced.

With the patch, 

Time taken  to offline cpu 88-175:
real    0m12.207s
user    0m0.171s
sys     0m0.000s

cpumask_last() is a better way to find the target cpu, since in most of the
cases cpuhotplug is performed in an increasing order(even in ppc64_cpu).

cpumask_any_but() can still be used to check the possibility of other
online cpus from the same chip/core if the last cpu in the mask goes
offline.

Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 31fa753..fbfd6e7 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -366,7 +366,14 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
 	 */
 	nid = cpu_to_node(cpu);
 	l_cpumask = cpumask_of_node(nid);
-	target = cpumask_any_but(l_cpumask, cpu);
+	target = cpumask_last(l_cpumask);
+
+	/*
+	 * If this(target) is the last cpu in the cpumask for this chip,
+	 * check for any possible online cpu in the chip.
+	 */
+	if (unlikely(target == cpu))
+		target = cpumask_any_but(l_cpumask, cpu);
 
 	/*
 	 * Update the cpumask with the target cpu and
@@ -671,7 +678,10 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
 		return 0;
 
 	/* Find any online cpu in that core except the current "cpu" */
-	ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+	ncpu = cpumask_last(cpu_sibling_mask(cpu));
+
+	if (unlikely(ncpu == cpu))
+		ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
 
 	if (ncpu >= 0 && ncpu < nr_cpu_ids) {
 		cpumask_set_cpu(ncpu, &core_imc_cpumask);
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH V2 3/3] soc: fsl: add RCPM driver
From: Pavel Machek @ 2019-05-20  9:07 UTC (permalink / raw)
  To: Ran Wang
  Cc: Mark Rutland, Len Brown, devicetree@vger.kernel.org, Pavel Machek,
	linux-pm@vger.kernel.org, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Leo Li, Rob Herring,
	Greg Kroah-Hartman, linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <AM5PR0402MB2865F4574B19761848B001F9F1060@AM5PR0402MB2865.eurprd04.prod.outlook.com>

[-- Attachment #1: Type: text/plain, Size: 1690 bytes --]

On Mon 2019-05-20 09:03:50, Ran Wang wrote:
> Hi Pavel,
> 
> On Monday, May 20, 2019 16:57, Pavel Machek wrote:
> > 
> > Hi!
> > 
> > > > > +static int rcpm_pm_prepare(struct device *dev) {
> > > > > +	struct device_node *np = dev->of_node;
> > > > > +	struct wakeup_source *ws;
> > > > > +	struct rcpm *rcpm;
> > > > > +	u32 value[RCPM_WAKEUP_CELL_MAX_SIZE + 1], tmp;
> > > > > +	int i, ret;
> > > > > +
> > > > > +	rcpm = dev_get_drvdata(dev);
> > > > > +	if (!rcpm)
> > > > > +		return -EINVAL;
> > > > > +
> > > > > +	/* Begin with first registered wakeup source */
> > > > > +	ws = wakeup_source_get_next(NULL);
> > > > > +	while (ws) {
> > > >
> > > > while (ws = wakeup_source_get_next(NULL)) ?
> > >
> > > Actually, we only pass NULL to wakeup_source_get_next() at very first
> > > call to get 1st wakeup source. Then in the while loop, we will fetch
> > > next source but not 1st, that's different. I am afraid your suggestion
> > > is not quite correct.
> > 
> > Sorry, I seen your next version before seeing this explanation.
> > 
> > You are right, but the current code is "interesting". What about
> > 
> >     ws = NULL;
> >     while (ws = wakeup_source_get_next(NULL)) ...
> > 
> > then?
> 
> Did you mean:
>      ws = NULL;
>      while (ws = wakeup_source_get_next(ws)) ...
> 
>    Yes, that will be the same to my original logic, do you recommend to change
> to this? :)

Yes please. It will be less confusing to the reader.

Thanks (and sorry for cross-talk),
								Pavel
-- 
DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply

* Re: [PATCH v2] ocxl: Fix potential memory leak on context creation
From: Greg Kurz @ 2019-05-20  7:55 UTC (permalink / raw)
  To: Frederic Barrat; +Cc: clombard, linuxppc-dev, alastair, andrew.donnellan
In-Reply-To: <20190520071618.1722-1-fbarrat@linux.ibm.com>

On Mon, 20 May 2019 09:16:18 +0200
Frederic Barrat <fbarrat@linux.ibm.com> wrote:

> If we couldn't fully init a context, we were leaking memory.
> 
> Fixes: b9721d275cc2 ("ocxl: Allow external drivers to use OpenCAPI contexts")

Oops... missed that during review :-\

> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
> 
> Changelog:
> v2: reset context pointer in case of allocation failure (Andrew)
> 

Alternatively you could change the code to do:

	ctx = kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;
	.
	.
	.
	if (pasid < 0) {
		mutex_unlock(&afu->contexts_lock);
		kfree(ctx);
		return pasid;
	}
	.
	.
	.
	*context = ctx;
	return 0;
}

This has the advantage of clearing any risk of side-effect with
*context forever, which is a safer practice IMHO.

Patch is correct anyway, so:

Reviewed-by: Greg Kurz <groug@kaod.org>

>  drivers/misc/ocxl/context.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
> index bab9c9364184..24e4fb010275 100644
> --- a/drivers/misc/ocxl/context.c
> +++ b/drivers/misc/ocxl/context.c
> @@ -22,6 +22,8 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
>  			afu->pasid_base + afu->pasid_max, GFP_KERNEL);
>  	if (pasid < 0) {
>  		mutex_unlock(&afu->contexts_lock);
> +		kfree(*context);
> +		*context = NULL;
>  		return pasid;
>  	}
>  	afu->pasid_count++;


^ permalink raw reply

* RE: [PATCH V2 3/3] soc: fsl: add RCPM driver
From: Ran Wang @ 2019-05-20  9:17 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Mark Rutland, Len Brown, devicetree@vger.kernel.org,
	Greg Kroah-Hartman, linux-pm@vger.kernel.org, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Leo Li, Rob Herring,
	linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190520090748.GB9748@amd>

Hi Pavel,

On Monday, May 20, 2019 17:08 Pavel Machek wrote:
> > > Hi!
> > >
> > > > > > +static int rcpm_pm_prepare(struct device *dev) {
> > > > > > +	struct device_node *np = dev->of_node;
> > > > > > +	struct wakeup_source *ws;
> > > > > > +	struct rcpm *rcpm;
> > > > > > +	u32 value[RCPM_WAKEUP_CELL_MAX_SIZE + 1], tmp;
> > > > > > +	int i, ret;
> > > > > > +
> > > > > > +	rcpm = dev_get_drvdata(dev);
> > > > > > +	if (!rcpm)
> > > > > > +		return -EINVAL;
> > > > > > +
> > > > > > +	/* Begin with first registered wakeup source */
> > > > > > +	ws = wakeup_source_get_next(NULL);
> > > > > > +	while (ws) {
> > > > >
> > > > > while (ws = wakeup_source_get_next(NULL)) ?
> > > >
> > > > Actually, we only pass NULL to wakeup_source_get_next() at very
> > > > first call to get 1st wakeup source. Then in the while loop, we
> > > > will fetch next source but not 1st, that's different. I am afraid
> > > > your suggestion is not quite correct.
> > >
> > > Sorry, I seen your next version before seeing this explanation.
> > >
> > > You are right, but the current code is "interesting". What about
> > >
> > >     ws = NULL;
> > >     while (ws = wakeup_source_get_next(NULL)) ...
> > >
> > > then?
> >
> > Did you mean:
> >      ws = NULL;
> >      while (ws = wakeup_source_get_next(ws)) ...
> >
> >    Yes, that will be the same to my original logic, do you recommend
> > to change to this? :)
> 
> Yes please. It will be less confusing to the reader.

OK, if no other comment, I will work out v4, fix this and extra ','
 
> Thanks (and sorry for cross-talk),

That's OK, thanks for your time.

Regards,
Ran

^ permalink raw reply

* Re: [PATCH] powerpc/perf: Use cpumask_last() to determine the
From: Anju T Sudhakar @ 2019-05-20  9:22 UTC (permalink / raw)
  To: mpe; +Cc: aravinda, maddy, linuxppc-dev, ego
In-Reply-To: <20190520090501.20415-1-anju@linux.vnet.ibm.com>

Hi,

Somehow the subject of this patch didn't appear completely here.

The Subject of this patch is as follows,

`Subject [PATCH] powerpc/perf: Use cpumask_last() to determine the 
designated cpu for nest/core units.`

Thanks,

Anju


On 5/20/19 2:35 PM, Anju T Sudhakar wrote:
> Nest and core imc(In-memory Collection counters) assigns a particular
> cpu as the designated target for counter data collection.
> During system boot, the first online cpu in a chip gets assigned as
> the designated cpu for that chip(for nest-imc) and the first online cpu
> in a core gets assigned as the designated cpu for that core(for core-imc).
>
> If the designated cpu goes offline, the next online cpu from the same
> chip(for nest-imc)/core(for core-imc) is assigned as the next target,
> and the event context is migrated to the target cpu.
> Currently, cpumask_any_but() function is used to find the target cpu.
> Though this function is expected to return a `random` cpu, this always
> returns the next online cpu.
>
> If all cpus in a chip/core is offlined in a sequential manner, starting
> from the first cpu, the event migration has to happen for all the cpus
> which goes offline. Since the migration process involves a grace period,
> the total time taken to offline all the cpus will be significantly high.
>


^ permalink raw reply

* Re: [PATCH V2 3/3] soc: fsl: add RCPM driver
From: Pavel Machek @ 2019-05-20  9:24 UTC (permalink / raw)
  To: Ran Wang
  Cc: Mark Rutland, Len Brown, devicetree@vger.kernel.org, Pavel Machek,
	linux-pm@vger.kernel.org, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Leo Li, Rob Herring,
	Greg Kroah-Hartman, linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <AM5PR0402MB2865E28B2E2296CB878ACEA2F1060@AM5PR0402MB2865.eurprd04.prod.outlook.com>

[-- Attachment #1: Type: text/plain, Size: 895 bytes --]

Hi!

> > > > You are right, but the current code is "interesting". What about
> > > >
> > > >     ws = NULL;
> > > >     while (ws = wakeup_source_get_next(NULL)) ...
> > > >
> > > > then?
> > >
> > > Did you mean:
> > >      ws = NULL;
> > >      while (ws = wakeup_source_get_next(ws)) ...
> > >
> > >    Yes, that will be the same to my original logic, do you recommend
> > > to change to this? :)
> > 
> > Yes please. It will be less confusing to the reader.
> 
> OK, if no other comment, I will work out v4, fix this and extra ','
>  
> > Thanks (and sorry for cross-talk),
> 
> That's OK, thanks for your time.

You can add

Acked-by: Pavel Machek <pavel@ucw.cz>

to that version.

Best regards,
								Pavel
-- 
DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply

* Re: [PATCH] kbuild: do not check name uniqueness of builtin modules
From: Arnd Bergmann @ 2019-05-20  9:28 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: Michael Schmitz, Stephen Rothwell, linuxppc-dev, Kees Cook,
	Linux Kbuild mailing list, Greg KH, Rusty Russell,
	Lucas De Marchi, Linux Kernel Mailing List, Lucas De Marchi,
	Linus Torvalds, Jessica Yu, Sam Ravnborg
In-Reply-To: <20190520025437.13825-1-yamada.masahiro@socionext.com>

On Mon, May 20, 2019 at 4:57 AM Masahiro Yamada
<yamada.masahiro@socionext.com> wrote:
>
> I just thought it was a good idea to scan builtin.modules in the name
> uniqueness checking, but Stephen reported a false positive.
>
> ppc64_defconfig produces:
>
>   warning: same basename if the following are built as modules:
>     arch/powerpc/platforms/powermac/nvram.ko
>     drivers/char/nvram.ko
>
> ..., which is a false positive because the former is never built as
> a module as you see in arch/powerpc/platforms/powermac/Makefile:
>
>   # CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
>   # need this to be a bool.  Cheat here and pretend CONFIG_NVRAM=m is really
>   # CONFIG_NVRAM=y
>   obj-$(CONFIG_NVRAM:m=y)         += nvram.o
>
> Since we cannot predict how tricky Makefiles are written in wild,
> builtin.modules may potentially contain false positives. I do not
> think it is a big deal as far as kmod is concerned, but false positive
> warnings in the kernel build makes people upset. It is better to not
> do it.
>
> Even without checking builtin.modules, we have enough (and more solid)
> test coverage with allmodconfig.
>
> While I touched this part, I replaced the sed code with neater one
> provided by Stephen.
>
> Link: https://lkml.org/lkml/2019/5/19/120
> Link: https://lkml.org/lkml/2019/5/19/123
> Fixes: 3a48a91901c5 ("kbuild: check uniqueness of module names")
> Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>

Looks good to me

Acked-by: Arnd Bergmann <arnd@arndb.de>

> ---
>
>  scripts/modules-check.sh | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh
> index 2f659530e1ec..39e8cb36ba19 100755
> --- a/scripts/modules-check.sh
> +++ b/scripts/modules-check.sh
> @@ -6,10 +6,10 @@ set -e
>  # Check uniqueness of module names
>  check_same_name_modules()
>  {
> -       for m in $(sed 's:.*/::' modules.order modules.builtin | sort | uniq -d)
> +       for m in $(sed 's:.*/::' modules.order | sort | uniq -d)
>         do
> -               echo "warning: same basename if the following are built as modules:" >&2
> -               sed "/\/$m/!d;s:^kernel/:  :" modules.order modules.builtin >&2
> +               echo "warning: same module names found:" >&2
> +               sed -n "/\/$m/s:^kernel/:  :p" modules.order >&2
>         done
>  }
>
> --
> 2.17.1
>

^ permalink raw reply

* Re: [PATCH] kbuild: do not check name uniqueness of builtin modules
From: Greg KH @ 2019-05-20  9:40 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: Michael Schmitz, Stephen Rothwell, linuxppc-dev, Kees Cook,
	Arnd Bergmann, linux-kbuild, Linus Torvalds, Rusty Russell,
	Lucas De Marchi, linux-kernel, Lucas De Marchi, Jessica Yu,
	Sam Ravnborg
In-Reply-To: <20190520025437.13825-1-yamada.masahiro@socionext.com>

On Mon, May 20, 2019 at 11:54:37AM +0900, Masahiro Yamada wrote:
> I just thought it was a good idea to scan builtin.modules in the name
> uniqueness checking, but Stephen reported a false positive.
> 
> ppc64_defconfig produces:
> 
>   warning: same basename if the following are built as modules:
>     arch/powerpc/platforms/powermac/nvram.ko
>     drivers/char/nvram.ko
> 
> ..., which is a false positive because the former is never built as
> a module as you see in arch/powerpc/platforms/powermac/Makefile:
> 
>   # CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
>   # need this to be a bool.  Cheat here and pretend CONFIG_NVRAM=m is really
>   # CONFIG_NVRAM=y
>   obj-$(CONFIG_NVRAM:m=y)         += nvram.o
> 
> Since we cannot predict how tricky Makefiles are written in wild,
> builtin.modules may potentially contain false positives. I do not
> think it is a big deal as far as kmod is concerned, but false positive
> warnings in the kernel build makes people upset. It is better to not
> do it.
> 
> Even without checking builtin.modules, we have enough (and more solid)
> test coverage with allmodconfig.
> 
> While I touched this part, I replaced the sed code with neater one
> provided by Stephen.
> 
> Link: https://lkml.org/lkml/2019/5/19/120
> Link: https://lkml.org/lkml/2019/5/19/123
> Fixes: 3a48a91901c5 ("kbuild: check uniqueness of module names")
> Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
> ---

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

^ permalink raw reply

* [PATCH v4 2/3] Documentation: dt: binding: fsl: Add 'little-endian' and update Chassis define
From: Ran Wang @ 2019-05-20  9:52 UTC (permalink / raw)
  To: Li Yang, Rob Herring, Mark Rutland, Pavel Machek
  Cc: Len Brown, devicetree, Greg Kroah-Hartman, linux-pm,
	Rafael J . Wysocki, linux-kernel, Ran Wang, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20190520095238.29210-1-ran.wang_1@nxp.com>

By default, QorIQ SoC's RCPM register block is Big Endian. But
there are some exceptions, such as LS1088A and LS2088A, are Little
Endian. So add this optional property to help identify them.

Actually LS2021A and other Layerscapes won't totally follow Chassis
2.1, so separate them from powerpc SoC.

Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
---
Change in v4:
	- Adjust indectation of 'ls1021a, ls1012a, ls1043a, ls1046a'.

Change in v3:
	- None.

Change in v2:
	- None.

 Documentation/devicetree/bindings/soc/fsl/rcpm.txt |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/Documentation/devicetree/bindings/soc/fsl/rcpm.txt b/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
index e284e4e..058154c 100644
--- a/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
@@ -20,6 +20,7 @@ Required properites:
 	* "fsl,qoriq-rcpm-1.0": for chassis 1.0 rcpm
 	* "fsl,qoriq-rcpm-2.0": for chassis 2.0 rcpm
 	* "fsl,qoriq-rcpm-2.1": for chassis 2.1 rcpm
+	* "fsl,qoriq-rcpm-2.1+": for chassis 2.1+ rcpm
 
 All references to "1.0" and "2.0" refer to the QorIQ chassis version to
 which the chip complies.
@@ -27,7 +28,12 @@ Chassis Version		Example Chips
 ---------------		-------------------------------
 1.0				p4080, p5020, p5040, p2041, p3041
 2.0				t4240, b4860, b4420
-2.1				t1040, ls1021
+2.1				t1040,
+2.1+				ls1021a, ls1012a, ls1043a, ls1046a
+
+Optional properties:
+ - little-endian : RCPM register block is Little Endian. Without it RCPM
+   will be Big Endian (default case).
 
 Example:
 The RCPM node for T4240:
-- 
1.7.1


^ permalink raw reply related

* [PATCH v4 3/3] soc: fsl: add RCPM driver
From: Ran Wang @ 2019-05-20  9:52 UTC (permalink / raw)
  To: Li Yang, Rob Herring, Mark Rutland, Pavel Machek
  Cc: Len Brown, devicetree, Greg Kroah-Hartman, linux-pm,
	Rafael J . Wysocki, linux-kernel, Ran Wang, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20190520095238.29210-1-ran.wang_1@nxp.com>

The NXP's QorIQ Processors based on ARM Core have RCPM module
(Run Control and Power Management), which performs all device-level
tasks associated with power management such as wakeup source control.

This driver depends on PM wakeup source framework which help to
collect wake information.

Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
Change in v4:
	- Remove extra ',' in author line of rcpm.c
	- Update usage of wakeup_source_get_next() to be less confusing to the
	  reader, code logic remain the same.

Change in v3:
	- Some whitespace ajdustment.

Change in v2:
	- Rebase Kconfig and Makefile update to latest mainline.

 drivers/soc/fsl/Kconfig  |    8 +++
 drivers/soc/fsl/Makefile |    1 +
 drivers/soc/fsl/rcpm.c   |  122 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 0 deletions(-)
 create mode 100644 drivers/soc/fsl/rcpm.c

diff --git a/drivers/soc/fsl/Kconfig b/drivers/soc/fsl/Kconfig
index 61f8e14..8e84e40 100644
--- a/drivers/soc/fsl/Kconfig
+++ b/drivers/soc/fsl/Kconfig
@@ -29,4 +29,12 @@ config FSL_MC_DPIO
 	  other DPAA2 objects. This driver does not expose the DPIO
 	  objects individually, but groups them under a service layer
 	  API.
+
+config FSL_RCPM
+	bool "Freescale RCPM support"
+	depends on PM_SLEEP
+	help
+	  The NXP's QorIQ Processors based on ARM Core have RCPM module
+	  (Run Control and Power Management), which performs all device-level
+	  tasks associated with power management, such as wakeup source control.
 endmenu
diff --git a/drivers/soc/fsl/Makefile b/drivers/soc/fsl/Makefile
index 803ef1b..c1be6ee 100644
--- a/drivers/soc/fsl/Makefile
+++ b/drivers/soc/fsl/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_QUICC_ENGINE)		+= qe/
 obj-$(CONFIG_CPM)			+= qe/
 obj-$(CONFIG_FSL_GUTS)			+= guts.o
 obj-$(CONFIG_FSL_MC_DPIO) 		+= dpio/
+obj-$(CONFIG_FSL_RCPM)		+= rcpm.o
diff --git a/drivers/soc/fsl/rcpm.c b/drivers/soc/fsl/rcpm.c
new file mode 100644
index 0000000..ff27adc
--- /dev/null
+++ b/drivers/soc/fsl/rcpm.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// rcpm.c - Freescale QorIQ RCPM driver
+//
+// Copyright 2019 NXP
+//
+// Author: Ran Wang <ran.wang_1@nxp.com>
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/kernel.h>
+
+#define RCPM_WAKEUP_CELL_MAX_SIZE	7
+
+struct rcpm {
+	unsigned int	wakeup_cells;
+	void __iomem	*ippdexpcr_base;
+	bool		little_endian;
+};
+
+static int rcpm_pm_prepare(struct device *dev)
+{
+	struct device_node	*np = dev->of_node;
+	struct wakeup_source	*ws;
+	struct rcpm		*rcpm;
+	u32 value[RCPM_WAKEUP_CELL_MAX_SIZE + 1], tmp;
+	int i, ret;
+
+	rcpm = dev_get_drvdata(dev);
+	if (!rcpm)
+		return -EINVAL;
+
+	/* Begin with first registered wakeup source */
+	ws = NULL;
+	while (wakeup_source_get_next(ws)) {
+		ret = device_property_read_u32_array(ws->attached_dev,
+				"fsl,rcpm-wakeup", value, rcpm->wakeup_cells + 1);
+
+		/*  Wakeup source should refer to current rcpm device */
+		if (ret || (np->phandle != value[0])) {
+			dev_info(dev, "%s doesn't refer to this rcpm\n",
+					ws->name);
+			continue;
+		}
+
+		for (i = 0; i < rcpm->wakeup_cells; i++) {
+			/* We can only OR related bits */
+			if (value[i + 1]) {
+				if (rcpm->little_endian) {
+					tmp = ioread32(rcpm->ippdexpcr_base + i * 4);
+					tmp |= value[i + 1];
+					iowrite32(tmp, rcpm->ippdexpcr_base + i * 4);
+				} else {
+					tmp = ioread32be(rcpm->ippdexpcr_base + i * 4);
+					tmp |= value[i + 1];
+					iowrite32be(tmp, rcpm->ippdexpcr_base + i * 4);
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops rcpm_pm_ops = {
+	.prepare =  rcpm_pm_prepare,
+};
+
+static int rcpm_probe(struct platform_device *pdev)
+{
+	struct device	*dev = &pdev->dev;
+	struct resource *r;
+	struct rcpm	*rcpm;
+	int ret;
+
+	rcpm = devm_kzalloc(dev, sizeof(*rcpm), GFP_KERNEL);
+	if (!rcpm)
+		return -ENOMEM;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r)
+		return -ENODEV;
+
+	rcpm->ippdexpcr_base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(rcpm->ippdexpcr_base)) {
+		ret =  PTR_ERR(rcpm->ippdexpcr_base);
+		return ret;
+	}
+
+	rcpm->little_endian = device_property_read_bool(
+			&pdev->dev, "little-endian");
+
+	ret = device_property_read_u32(&pdev->dev,
+			"fsl,#rcpm-wakeup-cells", &rcpm->wakeup_cells);
+	if (ret)
+		return ret;
+
+	dev_set_drvdata(&pdev->dev, rcpm);
+
+	return 0;
+}
+
+static const struct of_device_id rcpm_of_match[] = {
+	{ .compatible = "fsl,qoriq-rcpm-2.1+", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rcpm_of_match);
+
+static struct platform_driver rcpm_driver = {
+	.driver = {
+		.name = "rcpm",
+		.of_match_table = rcpm_of_match,
+		.pm	= &rcpm_pm_ops,
+	},
+	.probe = rcpm_probe,
+};
+
+module_platform_driver(rcpm_driver);
-- 
1.7.1


^ permalink raw reply related

* [PATCH v4 1/3] PM: wakeup: Add routine to help fetch wakeup source object.
From: Ran Wang @ 2019-05-20  9:52 UTC (permalink / raw)
  To: Li Yang, Rob Herring, Mark Rutland, Pavel Machek
  Cc: Len Brown, devicetree, Greg Kroah-Hartman, linux-pm,
	Rafael J . Wysocki, linux-kernel, Ran Wang, linuxppc-dev,
	linux-arm-kernel

Some user might want to go through all registered wakeup sources
and doing things accordingly. For example, SoC PM driver might need to
do HW programming to prevent powering down specific IP which wakeup
source depending on. And is user's responsibility to identify if this
wakeup source he is interested in.

Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
---
Change in v4:
	- None.

Change in v3:
	- Adjust indentation of *attached_dev;.

Change in v2:
	- None.

 drivers/base/power/wakeup.c |   18 ++++++++++++++++++
 include/linux/pm_wakeup.h   |    3 +++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 5b2b6a0..6904485 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -14,6 +14,7 @@
 #include <linux/suspend.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/of_device.h>
 #include <linux/pm_wakeirq.h>
 #include <trace/events/power.h>
 
@@ -226,6 +227,22 @@ void wakeup_source_unregister(struct wakeup_source *ws)
 	}
 }
 EXPORT_SYMBOL_GPL(wakeup_source_unregister);
+/**
+ * wakeup_source_get_next - Get next wakeup source from the list
+ * @ws: Previous wakeup source object, null means caller want first one.
+ */
+struct wakeup_source *wakeup_source_get_next(struct wakeup_source *ws)
+{
+	struct list_head *ws_head = &wakeup_sources;
+
+	if (ws)
+		return list_next_or_null_rcu(ws_head, &ws->entry,
+				struct wakeup_source, entry);
+	else
+		return list_entry_rcu(ws_head->next,
+				struct wakeup_source, entry);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_get_next);
 
 /**
  * device_wakeup_attach - Attach a wakeup source object to a device object.
@@ -242,6 +259,7 @@ static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws)
 		return -EEXIST;
 	}
 	dev->power.wakeup = ws;
+	ws->attached_dev = dev;
 	if (dev->power.wakeirq)
 		device_wakeup_attach_irq(dev, dev->power.wakeirq);
 	spin_unlock_irq(&dev->power.lock);
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 0ff134d..913b2fb 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -50,6 +50,7 @@
  * @wakeup_count: Number of times the wakeup source might abort suspend.
  * @active: Status of the wakeup source.
  * @has_timeout: The wakeup source has been activated with a timeout.
+ * @attached_dev: The device it attached to
  */
 struct wakeup_source {
 	const char 		*name;
@@ -70,6 +71,7 @@ struct wakeup_source {
 	unsigned long		wakeup_count;
 	bool			active:1;
 	bool			autosleep_enabled:1;
+	struct device		*attached_dev;
 };
 
 #ifdef CONFIG_PM_SLEEP
@@ -101,6 +103,7 @@ static inline void device_set_wakeup_path(struct device *dev)
 extern void wakeup_source_remove(struct wakeup_source *ws);
 extern struct wakeup_source *wakeup_source_register(const char *name);
 extern void wakeup_source_unregister(struct wakeup_source *ws);
+extern struct wakeup_source *wakeup_source_get_next(struct wakeup_source *ws);
 extern int device_wakeup_enable(struct device *dev);
 extern int device_wakeup_disable(struct device *dev);
 extern void device_set_wakeup_capable(struct device *dev, bool capable);
-- 
1.7.1


^ permalink raw reply related

* [PATCH v2] selftests/powerpc: Add a test of bad (out-of-range) accesses
From: Michael Ellerman @ 2019-05-20 10:20 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: npigging

Userspace isn't allowed to access certain address ranges, make sure we
actually test that to at least some degree.

This would have caught the recent bug where the SLB fault handler was
incorrectly called on an out-of-range access when using the Radix MMU.
It also would have caught the bug we had in get_region_id() where we
were inserting SLB entries for bad addresses.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
v2: Incorporate changes from Nick's version. Check the faulted address,
    and make sure we get BNDERR for things that are completely out of
    bounds.

 tools/testing/selftests/powerpc/mm/.gitignore |   3 +-
 tools/testing/selftests/powerpc/mm/Makefile   |   4 +-
 .../selftests/powerpc/mm/bad_accesses.c       | 167 ++++++++++++++++++
 3 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/mm/bad_accesses.c

diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index ba919308fe30..c189170ba041 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -3,4 +3,5 @@ subpage_prot
 tempfile
 prot_sao
 segv_errors
-wild_bctr
\ No newline at end of file
+wild_bctr
+bad_accesses
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 43d68420e363..f9667f980241 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,8 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors \
+		  wild_bctr bad_accesses
 TEST_GEN_FILES := tempfile
 
 top_srcdir = ../../../../..
@@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c
 $(OUTPUT)/prot_sao: ../utils.c
 
 $(OUTPUT)/wild_bctr: CFLAGS += -m64
+$(OUTPUT)/bad_accesses: CFLAGS += -m64
 
 $(OUTPUT)/tempfile:
 	dd if=/dev/zero of=$@ bs=64k count=1
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
new file mode 100644
index 000000000000..f18b2b91b263
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019, Michael Ellerman, IBM Corp.
+//
+// Test that out-of-bounds reads/writes behave as expected.
+
+#include <setjmp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+// 64-bit kernel is always here
+#define PAGE_OFFSET	(0xcul << 60)
+
+static unsigned long kernel_virt_end;
+
+static volatile int fault_code;
+static volatile unsigned long fault_addr;
+static jmp_buf setjmp_env;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+	fault_code = info->si_code;
+	fault_addr = (unsigned long)info->si_addr;
+	siglongjmp(setjmp_env, 1);
+}
+
+int bad_access(char *p, bool write)
+{
+	char x;
+
+	fault_code = 0;
+	fault_addr = 0;
+
+	if (sigsetjmp(setjmp_env, 1) == 0) {
+		if (write)
+			*p = 1;
+		else
+			x = *p;
+
+		printf("Bad - no SEGV! (%c)\n", x);
+		return 1;
+	}
+
+	// If we see MAPERR that means we took a page fault rather than an SLB
+	// miss. We only expect to take page faults for addresses within the
+	// valid kernel range.
+	FAIL_IF(fault_code == SEGV_MAPERR && \
+		(fault_addr < PAGE_OFFSET || fault_addr >= kernel_virt_end));
+
+	FAIL_IF(fault_code != SEGV_MAPERR && fault_code != SEGV_BNDERR);
+
+	return 0;
+}
+
+static int using_hash_mmu(bool *using_hash)
+{
+	char line[128];
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/cpuinfo", "r");
+	FAIL_IF(!f);
+
+	rc = 0;
+	while (fgets(line, sizeof(line), f) != NULL) {
+		if (strcmp(line, "MMU		: Hash\n") == 0) {
+			*using_hash = true;
+			goto out;
+		}
+
+		if (strcmp(line, "MMU		: Radix\n") == 0) {
+			*using_hash = false;
+			goto out;
+		}
+	}
+
+	rc = -1;
+out:
+	fclose(f);
+	return rc;
+}
+
+static int test(void)
+{
+	unsigned long i, j, addr, region_shift, page_shift, page_size;
+	struct sigaction sig;
+	bool hash_mmu;
+
+	sig = (struct sigaction) {
+		.sa_sigaction = segv_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+
+	FAIL_IF(sigaction(SIGSEGV, &sig, NULL) != 0);
+
+	FAIL_IF(using_hash_mmu(&hash_mmu));
+
+	page_size = sysconf(_SC_PAGESIZE);
+	if (page_size == (64 * 1024))
+		page_shift = 16;
+	else
+		page_shift = 12;
+
+	if (page_size == (64 * 1024) || !hash_mmu) {
+		region_shift = 52;
+
+		// We have 7 512T regions (4 kernel linear, vmalloc, io, vmemmap)
+		kernel_virt_end = PAGE_OFFSET + (7 * (512ul << 40));
+	} else if (page_size == (4 * 1024) && hash_mmu) {
+		region_shift = 46;
+
+		// We have 7 64T regions (4 kernel linear, vmalloc, io, vmemmap)
+		kernel_virt_end = PAGE_OFFSET + (7 * (64ul << 40));
+	} else
+		FAIL_IF(true);
+
+	printf("Using %s MMU, PAGE_SIZE = %dKB start address 0x%016lx\n",
+	       hash_mmu ? "hash" : "radix",
+	       (1 << page_shift) >> 10,
+	       1ul << region_shift);
+
+	// This generates access patterns like:
+	//   0x0010000000000000
+	//   0x0010000000010000
+	//   0x0010000000020000
+	//   ...
+	//   0x0014000000000000
+	//   0x0018000000000000
+	//   0x0020000000000000
+	//   0x0020000000010000
+	//   0x0020000000020000
+	//   ...
+	//   0xf400000000000000
+	//   0xf800000000000000
+
+	for (i = 1; i <= ((0xful << 60) >> region_shift); i++) {
+		for (j = page_shift - 1; j < 60; j++) {
+			unsigned long base, delta;
+
+			base  = i << region_shift;
+			delta = 1ul << j;
+
+			if (delta >= base)
+				break;
+
+			addr = (base | delta) & ~((1 << page_shift) - 1);
+
+			FAIL_IF(bad_access((char *)addr, false));
+			FAIL_IF(bad_access((char *)addr, true));
+		}
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test, "bad_accesses");
+}
-- 
2.20.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox