LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 5/8] powerpc/64: Make meltdown reporting Book3S 64 specific
From: Michael Ellerman @ 2018-07-27 23:06 UTC (permalink / raw)
  To: linuxppc-dev, diana.craciun; +Cc: oss, leoyang.li, bharat.bhushan
In-Reply-To: <20180727230639.25413-1-mpe@ellerman.id.au>

From: Diana Craciun <diana.craciun@nxp.com>

In a subsequent patch we will enable building security.c for Book3E.
However the NXP platforms are not vulnerable to Meltdown, so make the
Meltdown vulnerability reporting PPC_BOOK3S_64 specific.

Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
[mpe: Split out of larger patch]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/security.c | 2 ++
 1 file changed, 2 insertions(+)

v5: Split out.

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 77f253a6f8c9..ef72161de474 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -92,6 +92,7 @@ static __init int barrier_nospec_debugfs_init(void)
 device_initcall(barrier_nospec_debugfs_init);
 #endif /* CONFIG_DEBUG_FS */
 
+#ifdef CONFIG_PPC_BOOK3S_64
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	bool thread_priv;
@@ -124,6 +125,7 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
 
 	return sprintf(buf, "Vulnerable\n");
 }
+#endif
 
 ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
 {
-- 
2.14.1

^ permalink raw reply related

* [PATCH v5 6/8] powerpc/fsl: Add barrier_nospec implementation for NXP PowerPC Book3E
From: Michael Ellerman @ 2018-07-27 23:06 UTC (permalink / raw)
  To: linuxppc-dev, diana.craciun; +Cc: oss, leoyang.li, bharat.bhushan
In-Reply-To: <20180727230639.25413-1-mpe@ellerman.id.au>

From: Diana Craciun <diana.craciun@nxp.com>

Implement the barrier_nospec as a isync;sync instruction sequence.
The implementation uses the infrastructure built for BOOK3S 64.

Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
[mpe: Split out of larger patch]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig               |  2 +-
 arch/powerpc/include/asm/barrier.h |  8 +++++++-
 arch/powerpc/lib/feature-fixups.c  | 31 +++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

v5: Split out.

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index aef1c4e049f1..a0e9946083f4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -244,7 +244,7 @@ config PPC
 config PPC_BARRIER_NOSPEC
     bool
     default y
-    depends on PPC_BOOK3S_64
+    depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
 
 config GENERIC_CSUM
 	def_bool n
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index ec43375463ba..449474f667c4 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -76,12 +76,18 @@ do {									\
 	___p1;								\
 })
 
+#ifdef CONFIG_PPC_BOOK3S_64
+#define NOSPEC_BARRIER_SLOT   nop
+#elif defined(CONFIG_PPC_FSL_BOOK3E)
+#define NOSPEC_BARRIER_SLOT   nop; nop
+#endif
+
 #ifdef CONFIG_PPC_BARRIER_NOSPEC
 /*
  * Prevent execution of subsequent instructions until preceding branches have
  * been fully resolved and are no longer executing speculatively.
  */
-#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; nop
+#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; NOSPEC_BARRIER_SLOT
 
 // This also acts as a compiler barrier due to the memory clobber.
 #define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory")
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 0e604b41b5d1..e613b02bb2f0 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -318,6 +318,37 @@ void do_barrier_nospec_fixups(bool enable)
 }
 #endif /* CONFIG_PPC_BARRIER_NOSPEC */
 
+#ifdef CONFIG_PPC_FSL_BOOK3E
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+	unsigned int instr[2], *dest;
+	long *start, *end;
+	int i;
+
+	start = fixup_start;
+	end = fixup_end;
+
+	instr[0] = PPC_INST_NOP;
+	instr[1] = PPC_INST_NOP;
+
+	if (enable) {
+		pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
+		instr[0] = PPC_INST_ISYNC;
+		instr[1] = PPC_INST_SYNC;
+	}
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+		patch_instruction(dest, instr[0]);
+		patch_instruction(dest + 1, instr[1]);
+	}
+
+	printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_FSL_BOOK3E */
+
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
 	long *start, *end;
-- 
2.14.1

^ permalink raw reply related

* [PATCH v5 7/8] powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms
From: Michael Ellerman @ 2018-07-27 23:06 UTC (permalink / raw)
  To: linuxppc-dev, diana.craciun; +Cc: oss, leoyang.li, bharat.bhushan
In-Reply-To: <20180727230639.25413-1-mpe@ellerman.id.au>

From: Diana Craciun <diana.craciun@nxp.com>

Used barrier_nospec to sanitize the syscall table.

Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/entry_32.S | 10 ++++++++++
 1 file changed, 10 insertions(+)

v5: No change.

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 973577f2141c..8f05280c8d92 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -33,6 +33,7 @@
 #include <asm/unistd.h>
 #include <asm/ptrace.h>
 #include <asm/export.h>
+#include <asm/barrier.h>
 
 /*
  * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
@@ -358,6 +359,15 @@ syscall_dotrace_cont:
 	ori	r10,r10,sys_call_table@l
 	slwi	r0,r0,2
 	bge-	66f
+
+	barrier_nospec_asm
+	/*
+	 * Prevent the load of the handler below (based on the user-passed
+	 * system call number) being speculatively executed until the test
+	 * against NR_syscalls and branch to .66f above has
+	 * committed.
+	 */
+
 	lwzx	r10,r10,r0	/* Fetch system call handler [ptr] */
 	mtlr	r10
 	addi	r9,r1,STACK_FRAME_OVERHEAD
-- 
2.14.1

^ permalink raw reply related

* [PATCH v5 8/8] Documentation: Add nospectre_v1 parameter
From: Michael Ellerman @ 2018-07-27 23:06 UTC (permalink / raw)
  To: linuxppc-dev, diana.craciun; +Cc: oss, leoyang.li, bharat.bhushan
In-Reply-To: <20180727230639.25413-1-mpe@ellerman.id.au>

From: Diana Craciun <diana.craciun@nxp.com>

Currently only supported on powerpc.

Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/admin-guide/kernel-parameters.txt | 4 ++++
 1 file changed, 4 insertions(+)

v5: Change log and whitespace.

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index efc7aa7a0670..4167bbea51e1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2687,6 +2687,10 @@
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 
+	nospectre_v1	[PPC] Disable mitigations for Spectre Variant 1 (bounds
+			check bypass). With this option data leaks are possible
+			in the system.
+
 	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
 			(indirect branch prediction) vulnerability. System may
 			allow data leaks with this option, which is equivalent
-- 
2.14.1

^ permalink raw reply related

* Re: [PATCH v3] PCI: Data corruption happening due to race condition
From: Benjamin Herrenschmidt @ 2018-07-28  0:45 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Hari Vyas, bhelgaas, linux-pci, ray.jui, Paul Mackerras,
	Michael Ellerman, linuxppc-dev
In-Reply-To: <20180727222540.GH173328@bhelgaas-glaptop.roam.corp.google.com>

On Fri, 2018-07-27 at 17:25 -0500, Bjorn Helgaas wrote:
> > As for the powerpc bits, I'm probably the one who wrote them, however,
> > I'm on vacation this week and right now, no bandwidth to context switch
> > all that back in :-) So give me a few days and/or ping me next week.
> 
> OK, here's a ping :)
> 
> Some powerpc cleanup would be ideal, but I'd like to fix the race for
> v4.19, so I'm fine with this patch as-is.  But I'd definitely want
> your ack before inserting the ugly #include path in the powerpc code.

Go for it. Looks like I got a last minute meeting in Austin next week
so i"ll have no time to look at any of this for a while.

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH v4 00/11] macintosh: Resolve various PMU driver problems
From: Michael Schmitz @ 2018-07-28  7:08 UTC (permalink / raw)
  To: Finn Thain, Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-m68k, linux-kernel
In-Reply-To: <cover.1530519301.git.fthain@telegraphics.com.au>

No regressions on my PowerBook G4, so for this series:

Tested-by: Michael Schmitz <schmitzmic@gmail.com>

Am 02.07.2018 um 20:21 schrieb Finn Thain:
> This series of patches has the following aims.
>
> 1) Eliminate duplicated code. Linux presently has two drivers for
>    the 68HC05-based PMU devices found in Macs: via-pmu and via-pmu68k.
>    There's no value in having separate PMU drivers for each architecture.
>
> 2) Avoid further work on via-pmu68k that's not needed for via-pmu.
>
> 3) Fix some bugs in the via-pmu driver.
>
> 4) Enable the /dev/pmu and /proc/pmu/* userspace APIs on m68k Macs
>    by adopting via-pmu.
>
> 5) Improve stability on early 100-series PowerBooks by loading no PMU
>    driver at all. Neither via-pmu nor via-pmu68k supports the early
>    M50753-based PMU device found in these models.
>
> 6) Assist the out-of-tree NuBus PowerMac port to support PMU designs
>    shared with the m68k Mac port (e.g. PowerBooks 190 and 5300).
>
> This patch series has been regression tested on various PowerBooks
> (190, 520, 3400, Pismo G3) and PowerMacs (Beige G3, G5). These patches
> did not affect userland utilities. (Note that there is a userland-
> visible change to the contents of /proc/pmu/interrupts.)
>
> Changed since v1:
> 1) Added blank lines after 'break' statements in patch 10.
> 2) Improved patch description for patch 3.
> 3) Added reviewed-by tags.
> 4) Split patch 8 to make code review easier.
>
> Changed since v2:
> 1) Added reviewed-by tag.
> 2) Retained PMU_68K_V1 and PMU_68K_V2 symbols.
>
> Changed since v3:
> 1) Rebased on v4.18-rc2.
> 2) Omitted patch 10/12, since these RTC changes now conflict with mainline.
>    It will be reworked once the mainline m68k/powerpc RTC code stabilizes.
>
>
> Finn Thain (11):
>   macintosh/via-pmu: Fix section mismatch warning
>   macintosh/via-pmu: Add missing mmio accessors
>   macintosh/via-pmu: Don't clear shift register interrupt flag twice
>   macintosh/via-pmu: Enhance state machine with new 'uninitialized'
>     state
>   macintosh/via-pmu: Replace via pointer with via1 and via2 pointers
>   macintosh/via-pmu: Add support for m68k PowerBooks
>   macintosh/via-pmu: Explicitly specify CONFIG_PPC_PMAC dependencies
>   macintosh/via-pmu68k: Don't load driver on unsupported hardware
>   macintosh/via-pmu: Replace via-pmu68k driver with via-pmu driver
>   macintosh/via-pmu: Clean up interrupt statistics
>   macintosh/via-pmu: Disambiguate interrupt statistics
>
>  arch/m68k/configs/mac_defconfig   |   2 +-
>  arch/m68k/configs/multi_defconfig |   2 +-
>  arch/m68k/mac/config.c            |   2 +-
>  arch/m68k/mac/misc.c              |  54 +--
>  drivers/macintosh/Kconfig         |  19 +-
>  drivers/macintosh/Makefile        |   1 -
>  drivers/macintosh/adb.c           |   2 +-
>  drivers/macintosh/via-pmu.c       | 346 ++++++++++------
>  drivers/macintosh/via-pmu68k.c    | 850 --------------------------------------
>  include/uapi/linux/pmu.h          |   4 +-
>  10 files changed, 235 insertions(+), 1047 deletions(-)
>  delete mode 100644 drivers/macintosh/via-pmu68k.c
>

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Anshuman Khandual @ 2018-07-28  8:37 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: robh, srikar, aik, jasowang, linuxram, linux-kernel,
	virtualization, hch, paulus, joe, linuxppc-dev, elfring, haren,
	david
In-Reply-To: <20180727143008-mutt-send-email-mst@kernel.org>

On 07/27/2018 05:01 PM, Michael S. Tsirkin wrote:
> On Wed, Jul 25, 2018 at 08:56:23AM +0530, Anshuman Khandual wrote:
>> Results with and without the patches are similar.
> 
> Thanks! And another thing to try is virtio-net with
> a fast NIC backend (40G and up). Unfortunately
> at this point loopback tests stress the host
> scheduler too much.
> 

Sure. Will look around for a 40G NIC system. BTW I have been testing
virtio-net with a TAP device as back end.

ip tuntap add dev tap0 mode tap user $(whoami)
ip link set tap0 master virbr0
ip link set dev virbr0 up
ip link set dev tap0 up

which is exported into the guest as follows

-device virtio-net,netdev=network0,mac=52:55:00:d1:55:01 \
-netdev tap,id=network0,ifname=tap0,script=no,downscript=no \

But I have not run any network benchmarks on it though.

^ permalink raw reply

* Re: [RFC 2/4] virtio: Override device's DMA OPS with virtio_direct_dma_ops selectively
From: Anshuman Khandual @ 2018-07-28  8:56 UTC (permalink / raw)
  To: virtualization, linux-kernel
  Cc: linuxppc-dev, aik, robh, joe, elfring, david, jasowang, benh, mpe,
	mst, hch, linuxram, haren, paulus, srikar
In-Reply-To: <20180720035941.6844-3-khandual@linux.vnet.ibm.com>

On 07/20/2018 09:29 AM, Anshuman Khandual wrote:
> Now that virtio core always needs all virtio devices to have DMA OPS, we
> need to make sure that the structure it points is the right one. In the
> absence of VIRTIO_F_IOMMU_PLATFORM flag QEMU expects GPA from guest kernel.
> In such case, virtio device must use default virtio_direct_dma_ops DMA OPS
> structure which transforms scatter gather buffer addresses as GPA. This
> DMA OPS override must happen as early as possible during virtio device
> initializatin sequence before virtio core starts using given device's DMA
> OPS callbacks for I/O transactions. This change detects device's IOMMU flag
> and does the override in case the flag is cleared.
> 
> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> ---
>  drivers/virtio/virtio.c | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index 7907ad3..6b13987 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -166,6 +166,8 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
>  }
>  EXPORT_SYMBOL_GPL(virtio_add_status);
> 
> +const struct dma_map_ops virtio_direct_dma_ops;
> +
>  int virtio_finalize_features(struct virtio_device *dev)
>  {
>  	int ret = dev->config->finalize_features(dev);
> @@ -174,6 +176,9 @@ int virtio_finalize_features(struct virtio_device *dev)
>  	if (ret)
>  		return ret;


The previous patch removed the code block for XEN guests which forced
the use of DMA API all the time irrespective of VIRTIO_F_IOMMU_PLATFORM
flag on the device. Here is what I have removed with patch 2/4 which
breaks the existing semantics on XEN guests.

-static bool vring_use_dma_api(struct virtio_device *vdev)
-{
-	if (!virtio_has_iommu_quirk(vdev))
-		return true;
-
-	/* Otherwise, we are left to guess. */
-	/*
-	 * In theory, it's possible to have a buggy QEMU-supposed
-	 * emulated Q35 IOMMU and Xen enabled at the same time.  On
-	 * such a configuration, virtio has never worked and will
-	 * not work without an even larger kludge.  Instead, enable
-	 * the DMA API if we're a Xen guest, which at least allows
-	 * all of the sensible Xen configurations to work correctly.
-	 */
-	if (xen_domain())
-		return true;
-
-	return false;
-}

XEN guests would not like override with virtio_direct_dma_ops in any
case irrespective of the flag VIRTIO_F_IOMMU_PLATFORM. So the existing
semantics can be preserved with something like this. It just assumes
that dev->dma_ops is non-NULL and a valid one set by the architecture.
If required we can add those tests here before skipping the override.

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 7907ad3..6b13987 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -166,6 +166,8 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
 }
 EXPORT_SYMBOL_GPL(virtio_add_status);

+const struct dma_map_ops virtio_direct_dma_ops;
+
 int virtio_finalize_features(struct virtio_device *dev)
 {
 	int ret = dev->config->finalize_features(dev);
@@ -174,6 +176,9 @@ int virtio_finalize_features(struct virtio_device *dev)
 	if (ret)
 		return ret;
+
+	if (xen_domain())
+		goto skip_override;
+
+	if (virtio_has_iommu_quirk(dev))
+		set_dma_ops(dev->dev.parent, &virtio_direct_dma_ops);
+
+ skip_override:
+
 	if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
 		return 0

Will incorporate these changes in the next version.

^ permalink raw reply related

* Re: [RFC 2/4] virtio: Override device's DMA OPS with virtio_direct_dma_ops selectively
From: Michael S. Tsirkin @ 2018-07-28 21:16 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: virtualization, linux-kernel, linuxppc-dev, aik, robh, joe,
	elfring, david, jasowang, benh, mpe, hch, linuxram, haren, paulus,
	srikar
In-Reply-To: <c443ad8c-fb81-302d-edb2-5521831d38da@linux.vnet.ibm.com>

On Sat, Jul 28, 2018 at 02:26:24PM +0530, Anshuman Khandual wrote:
> On 07/20/2018 09:29 AM, Anshuman Khandual wrote:
> > Now that virtio core always needs all virtio devices to have DMA OPS, we
> > need to make sure that the structure it points is the right one. In the
> > absence of VIRTIO_F_IOMMU_PLATFORM flag QEMU expects GPA from guest kernel.
> > In such case, virtio device must use default virtio_direct_dma_ops DMA OPS
> > structure which transforms scatter gather buffer addresses as GPA. This
> > DMA OPS override must happen as early as possible during virtio device
> > initializatin sequence before virtio core starts using given device's DMA
> > OPS callbacks for I/O transactions. This change detects device's IOMMU flag
> > and does the override in case the flag is cleared.
> > 
> > Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> > ---
> >  drivers/virtio/virtio.c | 5 +++++
> >  1 file changed, 5 insertions(+)
> > 
> > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > index 7907ad3..6b13987 100644
> > --- a/drivers/virtio/virtio.c
> > +++ b/drivers/virtio/virtio.c
> > @@ -166,6 +166,8 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
> >  }
> >  EXPORT_SYMBOL_GPL(virtio_add_status);
> > 
> > +const struct dma_map_ops virtio_direct_dma_ops;
> > +
> >  int virtio_finalize_features(struct virtio_device *dev)
> >  {
> >  	int ret = dev->config->finalize_features(dev);
> > @@ -174,6 +176,9 @@ int virtio_finalize_features(struct virtio_device *dev)
> >  	if (ret)
> >  		return ret;
> 
> 
> The previous patch removed the code block for XEN guests which forced
> the use of DMA API all the time irrespective of VIRTIO_F_IOMMU_PLATFORM
> flag on the device. Here is what I have removed with patch 2/4 which
> breaks the existing semantics on XEN guests.
> 
> -static bool vring_use_dma_api(struct virtio_device *vdev)
> -{
> -	if (!virtio_has_iommu_quirk(vdev))
> -		return true;
> -
> -	/* Otherwise, we are left to guess. */
> -	/*
> -	 * In theory, it's possible to have a buggy QEMU-supposed
> -	 * emulated Q35 IOMMU and Xen enabled at the same time.  On
> -	 * such a configuration, virtio has never worked and will
> -	 * not work without an even larger kludge.  Instead, enable
> -	 * the DMA API if we're a Xen guest, which at least allows
> -	 * all of the sensible Xen configurations to work correctly.
> -	 */
> -	if (xen_domain())
> -		return true;
> -
> -	return false;
> -}
> 
> XEN guests would not like override with virtio_direct_dma_ops in any
> case irrespective of the flag VIRTIO_F_IOMMU_PLATFORM. So the existing
> semantics can be preserved with something like this. It just assumes
> that dev->dma_ops is non-NULL and a valid one set by the architecture.
> If required we can add those tests here before skipping the override.
> 
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index 7907ad3..6b13987 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -166,6 +166,8 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
>  }
>  EXPORT_SYMBOL_GPL(virtio_add_status);
> 
> +const struct dma_map_ops virtio_direct_dma_ops;
> +
>  int virtio_finalize_features(struct virtio_device *dev)
>  {
>  	int ret = dev->config->finalize_features(dev);
> @@ -174,6 +176,9 @@ int virtio_finalize_features(struct virtio_device *dev)
>  	if (ret)
>  		return ret;
> +
> +	if (xen_domain())
> +		goto skip_override;
> +
> +	if (virtio_has_iommu_quirk(dev))
> +		set_dma_ops(dev->dev.parent, &virtio_direct_dma_ops);
> +
> + skip_override:
> +

I prefer normal if scoping as opposed to goto spaghetti pls.
Better yet move vring_use_dma_api here and use it.
Less of a chance something will break.

>  	if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
>  		return 0
> 
> Will incorporate these changes in the next version.

^ permalink raw reply

* [PATCH v02] powerpc/migration: Handle unitialized timer reset
From: Michael Bringmann @ 2018-07-29 13:10 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon

After migration of a powerpc LPAR, the kernel executes code to
update the system state to reflect new platform characteristics.
Such changes include modifications to device tree properties
provided to the system by PHYP.  Property notifications received
by the powerpc 'migration_store' code are passed along to the
kernel in general through a call to 'of_update_property' which
in turn passes such events back to all modules through entries like
the '.notifier_call' function within the NUMA module.  When the
NUMA module updates its state, it resets its event timer.  If this
occurs after a previous call to 'stop_topology_update' or on a
system without VPHN enabled, the code runs into an unitialized
timer structure and crashes.  This patch adds a safety check
along this path toward the problem code.

Note: This crash was observed on every LPM in the 4.17-rc7 kernel
(and in the 4.18 kernel) of a system with dedicated CPUs enabled.
An example crash log without the patch is as follows.

  [ 2571.437467] ibmvscsi 30000081: Re-enabling adapter!
[ 2571.673850] ------------[ cut here ]------------
[ 2571.673863] kernel BUG at kernel/time/timer.c:958!
[ 2571.673875] Oops: Exception in kernel mode, sig: 5 [#1]
[ 2571.673877] LE SMP NR_CPUS=2048 NUMA pSeries
[ 2571.673886] Modules linked in: nfsv3 nfs_acl nfs tcp_diag udp_diag inet_diag lockd unix_diag af_packet_diag netlink_diag grace fscache sunrpc xts vmx_crypto pseries_rng sg binfmt_misc ip_tables xfs libcrc32c sd_mod ibmvscsi ibmveth scsi_transport_srp dm_mirror dm_region_hash dm_log dm_mod
[ 2571.673969] CPU: 11 PID: 3067 Comm: drmgr Not tainted 4.17.0+ #179
[ 2571.673972] NIP:  c000000000198a2c LR: c000000000075990 CTR: 0000000000000000
[ 2571.673975] REGS: c0000003f9407560 TRAP: 0700   Not tainted  (4.17.0+)
[ 2571.673977] MSR:  800000010282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE,TM[E]>  CR: 44482824  XER: 00000000
[ 2571.673990] CFAR: c00000000007598c SOFTE: 0
[ 2571.673990] GPR00: c000000000075990 c0000003f94077e0 c00000000113a900 c0000000012e5968
[ 2571.673990] GPR04: 000000010003776b c0000003ffa05180 0000000000000020 c0000003f9407850
[ 2571.673990] GPR08: 0000000000000000 0000000000000001 ffffffffffffffff 0000000000000220
[ 2571.673990] GPR12: c000000000076560 c00000001ec90200 0000000040000000 0000000000000018
[ 2571.673990] GPR16: 0000000000000000 000000000000000d c0000003e9ea5015 0000000000000010
[ 2571.673990] GPR20: 000000000000000b 0000000000000050 c0000003e9ea4068 0000000000000001
[ 2571.673990] GPR24: 000000000000001c c0000003ffff3ab0 0000000000000000 c0000003f9407990
[ 2571.673990] GPR28: 0000000000000005 c0000003ffff3ab0 000000010003776b c0000000012e5968
[ 2571.674042] NIP [c000000000198a2c] mod_timer+0x4c/0x400
[ 2571.674051] LR [c000000000075990] reset_topology_timer+0x40/0x60
[ 2571.674053] Call Trace:
[ 2571.674056] [c0000003f94077e0] [c0000003f9407830] 0xc0000003f9407830 (unreliable)
[ 2571.674060] [c0000003f9407860] [c000000000075990] reset_topology_timer+0x40/0x60
[ 2571.674063] [c0000003f9407880] [c000000000076660] dt_update_callback+0x100/0x120
[ 2571.674072] [c0000003f94078d0] [c00000000012ada0] notifier_call_chain+0x90/0x100
[ 2571.674077] [c0000003f9407920] [c00000000012b270] __blocking_notifier_call_chain+0x60/0x90
[ 2571.674092] [c0000003f9407970] [c0000000007b9a60] of_property_notify+0x90/0xd0
[ 2571.674096] [c0000003f94079d0] [c0000000007b4644] of_update_property+0x104/0x150
[ 2571.674103] [c0000003f9407a30] [c0000000000c0ddc] update_dt_property+0xdc/0x1f0
[ 2571.674106] [c0000003f9407a90] [c0000000000c11c0] pseries_devicetree_update+0x2d0/0x510
[ 2571.674110] [c0000003f9407bc0] [c0000000000c147c] post_mobility_fixup+0x7c/0xf0
[ 2571.674113] [c0000003f9407c30] [c0000000000c1594] migration_store+0xa4/0xc0
[ 2571.674123] [c0000003f9407c70] [c000000000989940] kobj_attr_store+0x30/0x60
[ 2571.674133] [c0000003f9407c90] [c00000000040b294] sysfs_kf_write+0x64/0xa0
[ 2571.674136] [c0000003f9407cb0] [c00000000040a02c] kernfs_fop_write+0x16c/0x240
[ 2571.674146] [c0000003f9407d00] [c00000000034eeb0] __vfs_write+0x40/0x200
[ 2571.674149] [c0000003f9407d90] [c00000000034f288] vfs_write+0xc8/0x240
[ 2571.674152] [c0000003f9407de0] [c00000000034f5bc] ksys_write+0x5c/0x100
[ 2571.674158] [c0000003f9407e30] [c00000000000b284] system_call+0x58/0x6c
[ 2571.674161] Instruction dump:
[ 2571.674163] fb01ffc0 7c7f1b78 fb21ffc8 fb41ffd0 fb61ffd8 fb81ffe0 fba1ffe8 f8010010
[ 2571.674168] f821ff81 e9230018 7d290074 7929d182 <0b090000> e9230008 2fa90000 419e0278
[ 2571.674176] ---[ end trace 0c7939657d5522df ]---

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Revise patch description.
---
 arch/powerpc/mm/numa.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0c7e05d..a789d57 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1457,7 +1457,8 @@ static void topology_timer_fn(struct timer_list *unused)
 
 static void reset_topology_timer(void)
 {
-	mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
+	if (vphn_enabled)
+		mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
 }
 
 #ifdef CONFIG_SMP

^ permalink raw reply related

* [PATCH] powerpc/mobility: Fix node detach/rename problem
From: Michael Bringmann @ 2018-07-29 13:11 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon

During LPAR migration, the content of the device tree/sysfs may
be updated including deletion and replacement of nodes in the
tree.  When nodes are added to the internal node structures, they
are appended in FIFO order to a list of nodes maintained by the
OF code APIs.  When nodes are removed from the device tree, they
are marked OF_DETACHED, but not actually deleted from the system
to allow for pointers cached elsewhere in the kernel.  The order
and content of the entries in the list of nodes is not altered,
though.

During LPAR migration some common nodes are deleted and re-added
e.g. "ibm,platform-facilities".  If a node is re-added to the OF
node lists, the of_attach_node function checks to make sure that
the name + ibm,phandle of the to-be-added data is unique.  As the
previous copy of a re-added node is not modified beyond the addition
of a bit flag, the code (1) finds the old copy, (2) prints a WARNING
notice to the console, (3) renames the to-be-added node to avoid
filename collisions within a directory, and (3) adds entries to
the sysfs/kernfs.

This patch fixes the 'migration add' problem by changing the
stored 'phandle' of the OF_DETACHed node to 0 (reserved value for
of_find_node_by_phandle), so that subsequent re-add operations,
such as those during migration, do not find the detached node,
do not observe duplicate names, do not rename them,  and the
extra WARNING notices are removed from the console output.

In addition, it erases the 'name' field of the OF_DETACHED node,
to prevent any future calls to of_find_node_by_name() or
of_find_node_by_path() from matching this node.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/dlpar.c |    3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 2de0f0d..9d82c28 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -274,6 +274,9 @@ int dlpar_detach_node(struct device_node *dn)
 	if (rc)
 		return rc;
 
+	dn->phandle = 0;
+	memset(dn->name, 0, strlen(dn->name));
+
 	return 0;
 }
 

^ permalink raw reply related

* [PATCH v08 0/5] powerpc/hotplug: Update affinity for migrated CPUs
From: Michael Bringmann @ 2018-07-29 13:18 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon

The migration of LPARs across Power systems affects many attributes
including that of the associativity of CPUs.  The patches in this
set execute when a system is coming up fresh upon a migration target.
They are intended to,

* Recognize changes to the associativity of CPUs recorded in internal
  data structures when compared to the latest copies in the device tree.
* Generate calls to other code layers to reset the data structures
  related to associativity of the CPUs.
* Re-register the 'changed' entities into the target system.
  Re-registration of CPUs mostly entails acting as if they have been
  newly hot-added into the target system.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>

Michael Bringmann (9):
  hotplug/cpu: Conditionally acquire/release DRC index
  hotplug/cpu: Add operation queuing function
  hotplug/cpu: Provide CPU readd operation
  mobility/numa: Ensure numa update does not overlap
  numa: Disable/enable arch_update_cpu_topology
  pmt/numa: Disable arch_update_cpu_topology during CPU readd
  powerpc/rtas: Allow disabling rtas_event_scan
  hotplug/rtas: No rtas_event_scan during PMT update
  hotplug/pmt: Update topology after PMT
---
Changes in patch:
  -- Restructure and rearrange content of patches to co-locate
     similar or related modifications
  -- Rename pseries_update_drconf_cpu to pseries_update_processor
  -- Simplify code to update CPU nodes during mobility checks.
     Remove functions to generate extra HP_ELOG messages in favor
     of direct function calls to dlpar_cpu_readd_by_index.
  -- Revise code order in dlpar_cpu_readd_by_index() to present
     more appropriate error codes from underlying layers of the
     implementation.
  -- Add hotplug device lock around all property updates
  -- Add call to rebuild_sched_domains in case of changes
  -- Various code cleanups and compaction
  -- Rebase to 4.18 kernel
  -- Change operation to run CPU readd after end of migration store.
  -- Improve descriptive text
  -- Cleanup patch reference to outdated function
  -- Code cleanup a 'acquire_drc' check in dlpar_cpu_add.
  -- Code cleanup a 'release_drc' check in dlpar_cpu_remove.
  -- Add more information to patch descriptions.
  -- More code cleanup
  -- Rearrange call to rebuild_sched_domains to allow removal
     of some locking code.

^ permalink raw reply

* [PATCH v08 1/5] hotplug/cpu: Conditionally acquire/release DRC index
From: Michael Bringmann @ 2018-07-29 13:19 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <9573dc30-01ad-2650-c247-28df08875cef@linux.vnet.ibm.com>

powerpc/cpu: Modify dlpar_cpu_add and dlpar_cpu_remove to allow the
skipping of DRC index acquire or release operations during the CPU
add or remove operations.  This is intended to support subsequent
changes to provide a 'CPU readd' operation.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Move new validity check added to pseries_smp_notifier
     to another patch
  -- Revise one of checks for 'acquire_drc' in dlpar_cpu_add.
  -- Revise one of checks for 'release_drc' in dlpar_cpu_remove.
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |   71 +++++++++++++++-----------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6ef77ca..7ede3b0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -432,7 +432,7 @@ static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
 	return found;
 }
 
-static ssize_t dlpar_cpu_add(u32 drc_index)
+static ssize_t dlpar_cpu_add(u32 drc_index, bool acquire_drc)
 {
 	struct device_node *dn, *parent;
 	int rc, saved_rc;
@@ -457,19 +457,22 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 		return -EINVAL;
 	}
 
-	rc = dlpar_acquire_drc(drc_index);
-	if (rc) {
-		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
-			rc, drc_index);
-		of_node_put(parent);
-		return -EINVAL;
+	if (acquire_drc) {
+		rc = dlpar_acquire_drc(drc_index);
+		if (rc) {
+			pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
+				rc, drc_index);
+			of_node_put(parent);
+			return -EINVAL;
+		}
 	}
 
 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
 	if (!dn) {
 		pr_warn("Failed call to configure-connector, drc index: %x\n",
 			drc_index);
-		dlpar_release_drc(drc_index);
+		if (acquire_drc)
+			dlpar_release_drc(drc_index);
 		of_node_put(parent);
 		return -EINVAL;
 	}
@@ -484,9 +487,11 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
 			dn->name, rc, drc_index);
 
-		rc = dlpar_release_drc(drc_index);
-		if (!rc)
-			dlpar_free_cc_nodes(dn);
+		if (acquire_drc) {
+			rc = dlpar_release_drc(drc_index);
+			if (!rc)
+				dlpar_free_cc_nodes(dn);
+		}
 
 		return saved_rc;
 	}
@@ -498,7 +503,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 			dn->name, rc, drc_index);
 
 		rc = dlpar_detach_node(dn);
-		if (!rc)
+		if (!rc && acquire_drc)
 			dlpar_release_drc(drc_index);
 
 		return saved_rc;
@@ -566,7 +571,8 @@ static int dlpar_offline_cpu(struct device_node *dn)
 
 }
 
-static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index,
+				bool release_drc)
 {
 	int rc;
 
@@ -579,12 +585,14 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 		return -EINVAL;
 	}
 
-	rc = dlpar_release_drc(drc_index);
-	if (rc) {
-		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
-			drc_index, dn->name, rc);
-		dlpar_online_cpu(dn);
-		return rc;
+	if (release_drc) {
+		rc = dlpar_release_drc(drc_index);
+		if (rc) {
+			pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
+				drc_index, dn->name, rc);
+			dlpar_online_cpu(dn);
+			return rc;
+		}
 	}
 
 	rc = dlpar_detach_node(dn);
@@ -593,8 +601,9 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 
 		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
 
-		rc = dlpar_acquire_drc(drc_index);
-		if (!rc)
+		if (release_drc)
+			rc = dlpar_acquire_drc(drc_index);
+		if (!release_drc || !rc)
 			dlpar_online_cpu(dn);
 
 		return saved_rc;
@@ -622,7 +631,7 @@ static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
 	return dn;
 }
 
-static int dlpar_cpu_remove_by_index(u32 drc_index)
+static int dlpar_cpu_remove_by_index(u32 drc_index, bool release_drc)
 {
 	struct device_node *dn;
 	int rc;
@@ -634,7 +643,7 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
 		return -ENODEV;
 	}
 
-	rc = dlpar_cpu_remove(dn, drc_index);
+	rc = dlpar_cpu_remove(dn, drc_index, release_drc);
 	of_node_put(dn);
 	return rc;
 }
@@ -699,7 +708,7 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
 	}
 
 	for (i = 0; i < cpus_to_remove; i++) {
-		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
+		rc = dlpar_cpu_remove_by_index(cpu_drcs[i], true);
 		if (rc)
 			break;
 
@@ -710,7 +719,7 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
 		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");
 
 		for (i = 0; i < cpus_removed; i++)
-			dlpar_cpu_add(cpu_drcs[i]);
+			dlpar_cpu_add(cpu_drcs[i], true);
 
 		rc = -EINVAL;
 	} else {
@@ -780,7 +789,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
 	}
 
 	for (i = 0; i < cpus_to_add; i++) {
-		rc = dlpar_cpu_add(cpu_drcs[i]);
+		rc = dlpar_cpu_add(cpu_drcs[i], true);
 		if (rc)
 			break;
 
@@ -791,7 +800,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
 		pr_warn("CPU hot-add failed, removing any added CPUs\n");
 
 		for (i = 0; i < cpus_added; i++)
-			dlpar_cpu_remove_by_index(cpu_drcs[i]);
+			dlpar_cpu_remove_by_index(cpu_drcs[i], true);
 
 		rc = -EINVAL;
 	} else {
@@ -817,7 +826,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
 			rc = dlpar_cpu_remove_by_count(count);
 		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
-			rc = dlpar_cpu_remove_by_index(drc_index);
+			rc = dlpar_cpu_remove_by_index(drc_index, true);
 		else
 			rc = -EINVAL;
 		break;
@@ -825,7 +834,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
 			rc = dlpar_cpu_add_by_count(count);
 		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
-			rc = dlpar_cpu_add(drc_index);
+			rc = dlpar_cpu_add(drc_index, true);
 		else
 			rc = -EINVAL;
 		break;
@@ -850,7 +859,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	if (rc)
 		return -EINVAL;
 
-	rc = dlpar_cpu_add(drc_index);
+	rc = dlpar_cpu_add(drc_index, true);
 
 	return rc ? rc : count;
 }
@@ -871,7 +880,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 		return -EINVAL;
 	}
 
-	rc = dlpar_cpu_remove(dn, drc_index);
+	rc = dlpar_cpu_remove(dn, drc_index, true);
 	of_node_put(dn);
 
 	return rc ? rc : count;

^ permalink raw reply related

* [PATCH v08 2/5] hotplug/cpu: Add operation queuing function
From: Michael Bringmann @ 2018-07-29 13:19 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon

<9573dc30-01ad-2650-c247-28df08875cef@linux.vnet.ibm.com>hotplug/cpu: This patch adds function dlpar_queue_action() which
will queued up information about a CPU/Memory 'readd' operation
according to resource type, action code, and DRC index.  Examples
of such operations include 'readd' of CPU and Memory blocks
identified as having changed their associativity during an LPAR
migration event.

At a subsequent point, the list of operations can be run/played
in series from a worker function added to the pseries work queue.
In the case of 'migration_store', the code has identified a set
of post migration topology changes to be applied.  There may be
a small number of CPU and memory changes to apply on small systems.
On large SAP HANA systems though, there may changes to thousands
and thousands of CPUs and memory blocks.  Applying these changes
may take a significant amount of time -- much longer than the
timeouts used by an associated HMC.  In order to avoid such
timeout errors, the worker queue implementation allows the code
to exit 'migration_store' with a good status, and then apply the
changes to CPUs and memory blocks afterward.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Correct drc_index before adding to pseries_hp_errorlog struct
  -- Correct text of notice
  -- Revise queuing model to save up all of the DLPAR actions for
     later execution.
  -- Restore list init statement missing from patch
  -- Move call to apply queued operations into 'mobility.c'
  -- Compress some code
  -- Rename some of queueing function APIs
  -- Revise implementation to push execution of queued operations
     to a workqueue task.
  -- Cleanup reference to outdated queuing operation.
  -- Add more information to patch description.
  -- Remove sleep calls from new code.
---
 arch/powerpc/include/asm/rtas.h           |    2 +
 arch/powerpc/platforms/pseries/dlpar.c    |   57 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/mobility.c |    4 ++
 arch/powerpc/platforms/pseries/pseries.h  |    2 +
 4 files changed, 65 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 71e393c..4f601c7 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -310,12 +310,14 @@ struct pseries_hp_errorlog {
 		struct { __be32 count, index; } ic;
 		char	drc_name[1];
 	} _drc_u;
+	struct list_head list;
 };
 
 #define PSERIES_HP_ELOG_RESOURCE_CPU	1
 #define PSERIES_HP_ELOG_RESOURCE_MEM	2
 #define PSERIES_HP_ELOG_RESOURCE_SLOT	3
 #define PSERIES_HP_ELOG_RESOURCE_PHB	4
+#define PSERIES_HP_ELOG_RESOURCE_PMT	5
 
 #define PSERIES_HP_ELOG_ACTION_ADD	1
 #define PSERIES_HP_ELOG_ACTION_REMOVE	2
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c0..13ac1cc 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -25,6 +25,7 @@
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <linux/uaccess.h>
+#include <linux/delay.h>
 #include <asm/rtas.h>
 
 static struct workqueue_struct *pseries_hp_wq;
@@ -329,6 +330,8 @@ int dlpar_release_drc(u32 drc_index)
 	return 0;
 }
 
+static int dlpar_pmt(struct pseries_hp_errorlog *work);
+
 static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
 	int rc;
@@ -357,6 +360,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 	case PSERIES_HP_ELOG_RESOURCE_CPU:
 		rc = dlpar_cpu(hp_elog);
 		break;
+	case PSERIES_HP_ELOG_RESOURCE_PMT:
+		rc = dlpar_pmt(hp_elog);
+		break;
 	default:
 		pr_warn_ratelimited("Invalid resource (%d) specified\n",
 				    hp_elog->resource);
@@ -407,6 +413,57 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
 	}
 }
 
+LIST_HEAD(dlpar_delayed_list);
+
+int dlpar_queue_action(int resource, int action, u32 drc_index)
+{
+	struct pseries_hp_errorlog *hp_errlog;
+
+	hp_errlog = kmalloc(sizeof(struct pseries_hp_errorlog), GFP_KERNEL);
+	if (!hp_errlog)
+		return -ENOMEM;
+
+	hp_errlog->resource = resource;
+	hp_errlog->action = action;
+	hp_errlog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+	hp_errlog->_drc_u.drc_index = cpu_to_be32(drc_index);
+
+	list_add_tail(&hp_errlog->list, &dlpar_delayed_list);
+
+	return 0;
+}
+
+static int dlpar_pmt(struct pseries_hp_errorlog *work)
+{
+	struct list_head *pos, *q;
+
+	list_for_each_safe(pos, q, &dlpar_delayed_list) {
+		struct pseries_hp_errorlog *tmp;
+
+		tmp = list_entry(pos, struct pseries_hp_errorlog, list);
+		handle_dlpar_errorlog(tmp);
+
+		list_del(pos);
+		kfree(tmp);
+	}
+
+	return 0;
+}
+
+int dlpar_queued_actions_run(void)
+{
+	if (!list_empty(&dlpar_delayed_list)) {
+		struct pseries_hp_errorlog hp_errlog;
+
+		hp_errlog.resource = PSERIES_HP_ELOG_RESOURCE_PMT;
+		hp_errlog.action = 0;
+		hp_errlog.id_type = 0;
+
+		queue_hotplug_event(&hp_errlog, 0, 0);
+	}
+	return 0;
+}
+
 static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
 {
 	char *arg;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index f6364d9..d0d1cae 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -378,6 +378,10 @@ static ssize_t migration_store(struct class *class,
 		return rc;
 
 	post_mobility_fixup();
+
+	/* Apply any necessary changes identified during fixup */
+	dlpar_queued_actions_run();
+
 	return count;
 }
 
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 60db2ee..72ca996 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -61,6 +61,8 @@ extern struct device_node *dlpar_configure_connector(__be32,
 
 void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
 			 struct completion *hotplug_done, int *rc);
+int dlpar_queue_action(int resource, int action, u32 drc_index);
+int dlpar_queued_actions_run(void);
 #ifdef CONFIG_MEMORY_HOTPLUG
 int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
 #else

^ permalink raw reply related

* [PATCH v08 3/5] hotplug/cpu: Provide CPU readd operation
From: Michael Bringmann @ 2018-07-29 13:19 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon

<9573dc30-01ad-2650-c247-28df08875cef@linux.vnet.ibm.com>powerpc/dlpar: Provide hotplug CPU 'readd by index' operation to
support LPAR Post Migration state updates.  When such changes are
invoked by the PowerPC 'mobility' code, they will be queued up so
that modifications to CPU properties will take place after the new
property value is written to the device-tree.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Add CPU validity check to pseries_smp_notifier
  -- Improve check on 'ibm,associativity' property
  -- Add check for cpu type to new update property entry
  -- Cleanup reference to outdated queuing function.
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |   58 ++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7ede3b0..1906ee57 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -305,6 +305,36 @@ static int pseries_add_processor(struct device_node *np)
 	return err;
 }
 
+static int pseries_update_processor(struct of_reconfig_data *pr)
+{
+	int old_entries, new_entries, rc = 0;
+	__be32 *old_assoc, *new_assoc;
+
+	/* We only handle changes due to 'ibm,associativity' property
+	 */
+	old_assoc = pr->old_prop->value;
+	old_entries = be32_to_cpu(*old_assoc++);
+
+	new_assoc = pr->prop->value;
+	new_entries = be32_to_cpu(*new_assoc++);
+
+	if (old_entries == new_entries) {
+		int sz = old_entries * sizeof(int);
+
+		if (memcmp(old_assoc, new_assoc, sz))
+			rc = dlpar_queue_action(
+					PSERIES_HP_ELOG_RESOURCE_CPU,
+					PSERIES_HP_ELOG_ACTION_READD,
+					pr->dn->phandle);
+	} else {
+		rc = dlpar_queue_action(PSERIES_HP_ELOG_RESOURCE_CPU,
+					PSERIES_HP_ELOG_ACTION_READD,
+					pr->dn->phandle);
+	}
+
+	return rc;
+}
+
 /*
  * Update the present map for a cpu node which is going away, and set
  * the hard id in the paca(s) to -1 to be consistent with boot time
@@ -648,6 +678,26 @@ static int dlpar_cpu_remove_by_index(u32 drc_index, bool release_drc)
 	return rc;
 }
 
+static int dlpar_cpu_readd_by_index(u32 drc_index)
+{
+	int rc = 0;
+
+	pr_info("Attempting to re-add CPU, drc index %x\n", drc_index);
+
+	rc = dlpar_cpu_remove_by_index(drc_index, false);
+	if (!rc)
+		rc = dlpar_cpu_add(drc_index, false);
+
+	if (rc)
+		pr_info("Failed to update cpu at drc_index %lx\n",
+				(unsigned long int)drc_index);
+	else
+		pr_info("CPU at drc_index %lx was updated\n",
+				(unsigned long int)drc_index);
+
+	return rc;
+}
+
 static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
 {
 	struct device_node *dn;
@@ -838,6 +888,9 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 		else
 			rc = -EINVAL;
 		break;
+	case PSERIES_HP_ELOG_ACTION_READD:
+		rc = dlpar_cpu_readd_by_index(drc_index);
+		break;
 	default:
 		pr_err("Invalid action (%d) specified\n", hp_elog->action);
 		rc = -EINVAL;
@@ -901,6 +954,11 @@ static int pseries_smp_notifier(struct notifier_block *nb,
 	case OF_RECONFIG_DETACH_NODE:
 		pseries_remove_processor(rd->dn);
 		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		if (!strcmp(rd->dn->type, "cpu") &&
+		    !strcmp(rd->prop->name, "ibm,associativity"))
+			pseries_update_processor(rd);
+		break;
 	}
 	return notifier_from_errno(err);
 }

^ permalink raw reply related

* [PATCH v08 4/5] mobility/numa: Ensure numa update does not overlap
From: Michael Bringmann @ 2018-07-29 13:19 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <9573dc30-01ad-2650-c247-28df08875cef@linux.vnet.ibm.com>

mobility/numa: Ensure that numa_update_cpu_topology() can not be
entered multiple times concurrently.  It may be accessed through
many different paths through the code.  Without some protection
against multiple entry, the code may acquire and update associativity
information from the PHYP in multiple threads, and apply changes
separately in each thread.  Applying the changes concurrently may
perform considerable work multiple times, but more importantly,
may acquire locks in other modules concurrently and end up blocking
each of the competing threads.  This patch allows only the first
entrant to the code to execute the operation and to recognize any
CPU topology changes.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Added information to description.
---
 arch/powerpc/mm/numa.c |    9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a789d57..b22e27a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1079,6 +1079,7 @@ struct topology_update_data {
 static int topology_timer_secs = 1;
 static int topology_inited;
 static int topology_update_needed;
+static struct mutex topology_update_lock;
 
 /*
  * Change polling interval for associativity changes.
@@ -1320,6 +1321,11 @@ int numa_update_cpu_topology(bool cpus_locked)
 	if (!updates)
 		return 0;
 
+	if (!mutex_trylock(&topology_update_lock)) {
+		kfree(updates);
+		return 0;
+	}
+
 	cpumask_clear(&updated_cpus);
 
 	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
@@ -1424,6 +1430,7 @@ int numa_update_cpu_topology(bool cpus_locked)
 out:
 	kfree(updates);
 	topology_update_needed = 0;
+	mutex_unlock(&topology_update_lock);
 	return changed;
 }
 
@@ -1598,6 +1605,8 @@ static ssize_t topology_write(struct file *file, const char __user *buf,
 
 static int topology_update_init(void)
 {
+	mutex_init(&topology_update_lock);
+
 	/* Do not poll for changes if disabled at boot */
 	if (topology_updates_enabled)
 		start_topology_update();

^ permalink raw reply related

* [PATCH v08 5/5] hotplug/pmt: Update topology after PMT
From: Michael Bringmann @ 2018-07-29 13:19 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <9573dc30-01ad-2650-c247-28df08875cef@linux.vnet.ibm.com>

hotplug/pmt: Invoke rebuild_sched_domains before applying any CPU
'readd' modifications.  The call leads to arch_update_cpu_topology()
which will recognize and report any changes to the CPU topology
are now applied, and that the relevant system data structures are
updated.  It may also initialize nodes that were unused in the
topology of the source migration system.  This will avoid some
locking synchronization issues that were observed during development
of this patch set.  Also, the addition of the explicit call ensures
that such changes are made even when the automatic topology update
worker, triggered by a timer, has been disabled.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Add information to description.
  -- Remove long ssleep calls
  -- Rearrange code to perform rebuild before any other PMT mods
---
 arch/powerpc/platforms/pseries/dlpar.c |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 13ac1cc..2de0f0d 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -16,6 +16,7 @@
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
+#include <linux/cpuset.h>
 #include <linux/slab.h>
 #include <linux/of.h>
 
@@ -437,6 +438,17 @@ static int dlpar_pmt(struct pseries_hp_errorlog *work)
 {
 	struct list_head *pos, *q;
 
+	/* Rebuild the domains and init any memoryless nodes
+	 * first to avoid later sync issues with CPU readd.
+	 */
+	rebuild_sched_domains();
+	msleep(100);
+		/* Ensure that the worker for rebuild_sched_domains
+		 * has the opportunity to actually begin work as we
+		 * don't want it delayed by the CPU readd hotplug
+		 * locking.
+		 */
+
 	list_for_each_safe(pos, q, &dlpar_delayed_list) {
 		struct pseries_hp_errorlog *tmp;
 

^ permalink raw reply related

* Re: [PATCH] powerpc/mm: Don't report PUDs as memory leaks when using kmemleak
From: Paul Menzel @ 2018-07-29 13:35 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: aneesh.kumar
In-Reply-To: <20180719143316.23486-1-mpe@ellerman.id.au>

Dear Michael,


Am 19.07.2018 um 16:33 schrieb Michael Ellerman:
> Paul Menzel reported that kmemleak was producing reports such as:
> 
>    unreferenced object 0xc0000000f8b80000 (size 16384):
>      comm "init", pid 1, jiffies 4294937416 (age 312.240s)
>      hex dump (first 32 bytes):
>        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>      backtrace:
>        [<00000000d997deb7>] __pud_alloc+0x80/0x190
>        [<0000000087f2e8a3>] move_page_tables+0xbac/0xdc0
>        [<00000000091e51c2>] shift_arg_pages+0xc0/0x210
>        [<00000000ab88670c>] setup_arg_pages+0x22c/0x2a0
>        [<0000000060871529>] load_elf_binary+0x41c/0x1648
>        [<00000000ecd9d2d4>] search_binary_handler.part.11+0xbc/0x280
>        [<0000000034e0cdd7>] __do_execve_file.isra.13+0x73c/0x940
>        [<000000005f953a6e>] sys_execve+0x58/0x70
>        [<000000009700a858>] system_call+0x5c/0x70
> 
> Indicating that a PUD was being leaked.
> 
> However what's really happening is that kmemleak is not able to
> recognise the references from the PGD to the PUD, because they are not
> fully qualified pointers.
> 
> We can confirm that in xmon, eg:
> 
> Find the task struct for pid 1 "init":
>    0:mon> P
>         task_struct     ->thread.ksp    PID   PPID S  P CMD
>    c0000001fe7c0000 c0000001fe803960      1      0 S 13 systemd
> 
> Dump virtual address 0 to find the PGD:
>    0:mon> dv 0 c0000001fe7c0000
>    pgd  @ 0xc0000000f8b01000
> 
> Dump the memory of the PGD:
>    0:mon> d c0000000f8b01000
>    c0000000f8b01000 00000000f8b90000 0000000000000000  |................|
>    c0000000f8b01010 0000000000000000 0000000000000000  |................|
>    c0000000f8b01020 0000000000000000 0000000000000000  |................|
>    c0000000f8b01030 0000000000000000 00000000f8b80000  |................|
>                                      ^^^^^^^^^^^^^^^^
> 
> There we can see the reference to our supposedly leaked PUD. But
> because it's missing the leading 0xc, kmemleak won't recognise it.
> 
> We can confirm it's still in use by translating an address that is
> mapped via it:
>    0:mon> dv 7fff94000000 c0000001fe7c0000
>    pgd  @ 0xc0000000f8b01000
>    pgdp @ 0xc0000000f8b01038 = 0x00000000f8b80000 <--
>    pudp @ 0xc0000000f8b81ff8 = 0x00000000037c4000
>    pmdp @ 0xc0000000037c5ca0 = 0x00000000fbd89000
>    ptep @ 0xc0000000fbd89000 = 0xc0800001d5ce0386
>    Maps physical address = 0x00000001d5ce0000
>    Flags = Accessed Dirty Read Write
> 
> The fix is fairly simple. We need to tell kmemleak to ignore PUD
> allocations and never report them as leaks. We can also tell it not to
> scan the PGD, because it will never find pointers in there. However it
> will still notice if we allocate a PGD and then leak it.
> 
> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> > ---
>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 23 +++++++++++++++++++++--
>   1 file changed, 21 insertions(+), 2 deletions(-)

[…]

Tested-by: Paul Menzel <pmenzel@molgen.mpg.de> on IBM S822LC


Kind regards,

Paul

^ permalink raw reply

* Re: [PATCH] powerpc/mobility: Fix node detach/rename problem
From: kbuild test robot @ 2018-07-29 16:31 UTC (permalink / raw)
  To: Michael Bringmann
  Cc: kbuild-all, linuxppc-dev, Nathan Fontenot, Michael Bringmann,
	Thomas Falcon, Tyrel Datwyler, John Allen
In-Reply-To: <c2fba52a-baac-25fb-c26b-c84b25c3178c@linux.vnet.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 2459 bytes --]

Hi Michael,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.18-rc6 next-20180727]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Michael-Bringmann/powerpc-mobility-Fix-node-detach-rename-problem/20180729-213517
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

   arch/powerpc/platforms/pseries/dlpar.c: In function 'dlpar_detach_node':
>> arch/powerpc/platforms/pseries/dlpar.c:276:9: error: passing argument 1 of 'memset' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]
     memset(dn->name, 0, strlen(dn->name));
            ^~
   In file included from include/linux/string.h:20:0,
                    from arch/powerpc/include/asm/paca.h:19,
                    from arch/powerpc/include/asm/current.h:16,
                    from include/linux/mutex.h:14,
                    from include/linux/notifier.h:14,
                    from arch/powerpc/platforms/pseries/dlpar.c:16:
   arch/powerpc/include/asm/string.h:23:15: note: expected 'void *' but argument is of type 'const char *'
    extern void * memset(void *,int,__kernel_size_t);
                  ^~~~~~
   cc1: all warnings being treated as errors

vim +276 arch/powerpc/platforms/pseries/dlpar.c

   259	
   260	int dlpar_detach_node(struct device_node *dn)
   261	{
   262		struct device_node *child;
   263		int rc;
   264	
   265		child = of_get_next_child(dn, NULL);
   266		while (child) {
   267			dlpar_detach_node(child);
   268			child = of_get_next_child(dn, child);
   269		}
   270	
   271		rc = of_detach_node(dn);
   272		if (rc)
   273			return rc;
   274	
   275		dn->phandle = 0;
 > 276		memset(dn->name, 0, strlen(dn->name));
   277	
   278		return 0;
   279	}
   280	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 23683 bytes --]

^ permalink raw reply

* Re: [PATCH v2 1/1] powerpc/pseries: fix EEH recovery of some IOV devices
From: Sam Bobroff @ 2018-07-30  1:58 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, linux-pci, bhelgaas, bryantly
In-Reply-To: <87in5ymp03.fsf@concordia.ellerman.id.au>

[-- Attachment #1: Type: text/plain, Size: 2101 bytes --]

On Mon, Jul 02, 2018 at 10:59:24AM +1000, Michael Ellerman wrote:
> Sam Bobroff <sbobroff@linux.ibm.com> writes:
> 
> > EEH recovery currently fails on pSeries for some IOV capable PCI
> > devices, if CONFIG_PCI_IOV is on and the hypervisor doesn't provide
> > certain device tree properties for the device. (Found on an IOV
> > capable device using the ipr driver.)
> >
> > Recovery fails in pci_enable_resources() at the check on r->parent,
> > because r->flags is set and r->parent is not.  This state is due to
> > sriov_init() setting the start, end and flags members of the IOV BARs
> > but the parent not being set later in
> > pseries_pci_fixup_iov_resources(), because the
> > "ibm,open-sriov-vf-bar-info" property is missing.
> >
> > Correct this by zeroing the resource flags for IOV BARs when they
> > can't be configured.
> >
> > Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
> > ---
> > Hi,
> >
> > This is a fix to allow EEH recovery to succeed in a specific situation,
> > which I've tried to explain in the commit message.
> >
> > As with the RFC version, the IOV BARs are disabled by setting the resource
> > flags to 0 but the other fields are now left as-is because that is what is done
> > elsewhere (see sriov_init() and __pci_read_base()).
> >
> > I've also examined the concern raised by Bjorn Helgaas, that VFs could be
> > enabled later after the BARs are disabled, and it already seems safe: enabling
> > VFs (on pseries) depends on another device tree property,
> > "ibm,number-of-configurable-vfs" as well as support for the RTAS function
> > "ibm_map_pes". Since these are all part of the hypervisor's support for IOV it
> > seems unlikely that we would ever see some of them but not all. (None are
> > currently provided by QEMU/KVM.) (Additionally, the ipr driver on which the EEH
> > recovery failure was discovered doesn't even seem to have SR-IOV support so it
> > certainly can't enable VFs.)
> 
> Can you fold/reword the above into the change log, it seems like useful
> detail.
> 
> cheers

Sure, sounds good.


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* [PATCH v3 1/1] powerpc/pseries: fix EEH recovery of some IOV devices
From: Sam Bobroff @ 2018-07-30  1:59 UTC (permalink / raw)
  To: linuxppc-dev, linux-pci; +Cc: mpe, bhelgaas, bryantly

EEH recovery currently fails on pSeries for some IOV capable PCI
devices, if CONFIG_PCI_IOV is on and the hypervisor doesn't provide
certain device tree properties for the device. (Found on an IOV
capable device using the ipr driver.)

Recovery fails in pci_enable_resources() at the check on r->parent,
because r->flags is set and r->parent is not.  This state is due to
sriov_init() setting the start, end and flags members of the IOV BARs
but the parent not being set later in
pseries_pci_fixup_iov_resources(), because the
"ibm,open-sriov-vf-bar-info" property is missing.

Correct this by zeroing the resource flags for IOV BARs when they
can't be configured (this is the same method used by sriov_init() and
__pci_read_base()).

VFs cleared this way can't be enabled later, because that requires
another device tree property, "ibm,number-of-configurable-vfs" as well
as support for the RTAS function "ibm_map_pes". These are all part of
hypervisor support for IOV and it seems unlikely that a hypervisor
would ever partially, but not fully, support it. (None are currently
provided by QEMU/KVM.)

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
Hi,

This is a fix to allow EEH recovery to succeed in a specific situation,
which I've tried to explain in the commit message.

As with the RFC version, the IOV BARs are disabled by setting the resource
flags to 0 but the other fields are now left as-is because that is what is done
elsewhere (see sriov_init() and __pci_read_base()).

I've also examined the concern raised by Bjorn Helgaas, that VFs could be
enabled later after the BARs are disabled, and it already seems safe: enabling
VFs (on pseries) depends on another device tree property,
"ibm,number-of-configurable-vfs" as well as support for the RTAS function
"ibm_map_pes". Since these are all part of the hypervisor's support for IOV it
seems unlikely that we would ever see some of them but not all. (None are
currently provided by QEMU/KVM.) (Additionally, the ipr driver on which the EEH
recovery failure was discovered doesn't even seem to have SR-IOV support so it
certainly can't enable VFs.)

Cheers,
Sam.

Patch set v3:
Patch 1/1: powerpc/pseries: fix EEH recovery of some IOV devices
* Moved some useful information from the cover letter to the commit log.

Patch set v2:
Patch 1/1: powerpc/pseries: fix EEH recovery of some IOV devices
* Moved the BAR disabling code to a function.
* Also check in pseries_pci_fixup_resources().

Patch set v1:
Patch 1/1: powerpc/pseries: fix EEH recovery of IOV devices

 arch/powerpc/platforms/pseries/setup.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index b55ad4286dc7..0a9e4243ae1d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -645,6 +645,15 @@ void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
 	}
 }
 
+static void pseries_disable_sriov_resources(struct pci_dev *pdev)
+{
+	int i;
+
+	pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
+}
+
 static void pseries_pci_fixup_resources(struct pci_dev *pdev)
 {
 	const int *indexes;
@@ -652,10 +661,10 @@ static void pseries_pci_fixup_resources(struct pci_dev *pdev)
 
 	/*Firmware must support open sriov otherwise dont configure*/
 	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
-	if (!indexes)
-		return;
-	/* Assign the addresses from device tree*/
-	of_pci_set_vf_bar_size(pdev, indexes);
+	if (indexes)
+		of_pci_set_vf_bar_size(pdev, indexes);
+	else
+		pseries_disable_sriov_resources(pdev);
 }
 
 static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
@@ -667,10 +676,10 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
 		return;
 	/*Firmware must support open sriov otherwise dont configure*/
 	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
-	if (!indexes)
-		return;
-	/* Assign the addresses from device tree*/
-	of_pci_parse_iov_addrs(pdev, indexes);
+	if (indexes)
+		of_pci_parse_iov_addrs(pdev, indexes);
+	else
+		pseries_disable_sriov_resources(pdev);
 }
 
 static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
-- 
2.16.1.74.g9b0b1f47b

^ permalink raw reply related

* Re: [PATCH 1/2] KVM: PPC: Book3S HV: Allow creating max number of VCPUs on POWER9
From: Sam Bobroff @ 2018-07-30  3:37 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, linuxppc-dev, David Gibson
In-Reply-To: <20180726054353.GB1672@fergus>

[-- Attachment #1: Type: text/plain, Size: 2645 bytes --]

On Thu, Jul 26, 2018 at 03:43:54PM +1000, Paul Mackerras wrote:
> Commit 1e175d2 ("KVM: PPC: Book3S HV: Pack VCORE IDs to access full
> VCPU ID space", 2018-07-25) allowed use of VCPU IDs up to
> KVM_MAX_VCPU_ID on POWER9 in all guest SMT modes and guest emulated
> hardware SMT modes.  However, with the current definition of
> KVM_MAX_VCPU_ID, a guest SMT mode of 1 and an emulated SMT mode of 8,
> it is only possible to create KVM_MAX_VCPUS / 2 VCPUS, because
> threads_per_subcore is 4 on POWER9 CPUs.  (Using an emulated SMT mode
> of 8 is useful when migrating VMs to or from POWER8 hosts.)
> 
> This increases KVM_MAX_VCPU_ID to 8 * KVM_MAX_VCPUS when HV KVM is
> configured in, so that a full complement of KVM_MAX_VCPUS VCPUs can
> be created on POWER9 in all guest SMT modes and emulated hardware
> SMT modes.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
> This and the next patch apply on my kvm-ppc-next branch, which
> includes Sam Bobroff's patch "KVM: PPC: Book3S HV: Pack VCORE IDs to
> access full VCPU ID space".

Thanks!

I've tested kvm-ppc-next with all three above patches, and I'm able to
access the entire VCPU ID space (tested using CONFIG_NR_CPUS 1024 in the
host, rather than the default 2048, as that's the limit imposed by
QEMU). So:

Tested-by: Sam Bobroff <sbobroff@linux.ibm.com>

> 
>  arch/powerpc/include/asm/kvm_host.h | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 5b9e660..906bcbdf 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -42,7 +42,14 @@
>  #define KVM_USER_MEM_SLOTS	512
>  
>  #include <asm/cputhreads.h>
> -#define KVM_MAX_VCPU_ID                (threads_per_subcore * KVM_MAX_VCORES)
> +
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +#include <asm/kvm_book3s_asm.h>		/* for MAX_SMT_THREADS */
> +#define KVM_MAX_VCPU_ID		(MAX_SMT_THREADS * KVM_MAX_VCORES)
> +
> +#else
> +#define KVM_MAX_VCPU_ID		KVM_MAX_VCPUS
> +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
>  
>  #define __KVM_HAVE_ARCH_INTC_INITIALIZED
>  
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  https://urldefense.proofpoint.com/v2/url?u=http-3A__vger.kernel.org_majordomo-2Dinfo.html&d=DwIBAg&c=jf_iaSHvJObTbx-siA1ZOg&r=FFlZoKpUuHop_w02rKHYxbjM9foF4tIm4FyKg8muba0&m=XJuK_j7qbnfsRxN3uHJ65CPxG7brNIut6LNuFyQYF8k&s=2zUtzqXA51owzthAuMl8kD0fWpLcjXBZswb_t6SYFSI&e=
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [RFC 2/4] virtio: Override device's DMA OPS with virtio_direct_dma_ops selectively
From: Anshuman Khandual @ 2018-07-30  4:15 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtualization, linux-kernel, linuxppc-dev, aik, robh, joe,
	elfring, david, jasowang, benh, mpe, hch, linuxram, haren, paulus,
	srikar
In-Reply-To: <20180729001344-mutt-send-email-mst@kernel.org>

On 07/29/2018 02:46 AM, Michael S. Tsirkin wrote:
> On Sat, Jul 28, 2018 at 02:26:24PM +0530, Anshuman Khandual wrote:
>> On 07/20/2018 09:29 AM, Anshuman Khandual wrote:
>>> Now that virtio core always needs all virtio devices to have DMA OPS, we
>>> need to make sure that the structure it points is the right one. In the
>>> absence of VIRTIO_F_IOMMU_PLATFORM flag QEMU expects GPA from guest kernel.
>>> In such case, virtio device must use default virtio_direct_dma_ops DMA OPS
>>> structure which transforms scatter gather buffer addresses as GPA. This
>>> DMA OPS override must happen as early as possible during virtio device
>>> initializatin sequence before virtio core starts using given device's DMA
>>> OPS callbacks for I/O transactions. This change detects device's IOMMU flag
>>> and does the override in case the flag is cleared.
>>>
>>> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
>>> ---
>>>  drivers/virtio/virtio.c | 5 +++++
>>>  1 file changed, 5 insertions(+)
>>>
>>> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
>>> index 7907ad3..6b13987 100644
>>> --- a/drivers/virtio/virtio.c
>>> +++ b/drivers/virtio/virtio.c
>>> @@ -166,6 +166,8 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_add_status);
>>>
>>> +const struct dma_map_ops virtio_direct_dma_ops;
>>> +
>>>  int virtio_finalize_features(struct virtio_device *dev)
>>>  {
>>>  	int ret = dev->config->finalize_features(dev);
>>> @@ -174,6 +176,9 @@ int virtio_finalize_features(struct virtio_device *dev)
>>>  	if (ret)
>>>  		return ret;
>>
>>
>> The previous patch removed the code block for XEN guests which forced
>> the use of DMA API all the time irrespective of VIRTIO_F_IOMMU_PLATFORM
>> flag on the device. Here is what I have removed with patch 2/4 which
>> breaks the existing semantics on XEN guests.
>>
>> -static bool vring_use_dma_api(struct virtio_device *vdev)
>> -{
>> -	if (!virtio_has_iommu_quirk(vdev))
>> -		return true;
>> -
>> -	/* Otherwise, we are left to guess. */
>> -	/*
>> -	 * In theory, it's possible to have a buggy QEMU-supposed
>> -	 * emulated Q35 IOMMU and Xen enabled at the same time.  On
>> -	 * such a configuration, virtio has never worked and will
>> -	 * not work without an even larger kludge.  Instead, enable
>> -	 * the DMA API if we're a Xen guest, which at least allows
>> -	 * all of the sensible Xen configurations to work correctly.
>> -	 */
>> -	if (xen_domain())
>> -		return true;
>> -
>> -	return false;
>> -}
>>
>> XEN guests would not like override with virtio_direct_dma_ops in any
>> case irrespective of the flag VIRTIO_F_IOMMU_PLATFORM. So the existing
>> semantics can be preserved with something like this. It just assumes
>> that dev->dma_ops is non-NULL and a valid one set by the architecture.
>> If required we can add those tests here before skipping the override.
>>
>> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
>> index 7907ad3..6b13987 100644
>> --- a/drivers/virtio/virtio.c
>> +++ b/drivers/virtio/virtio.c
>> @@ -166,6 +166,8 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
>>  }
>>  EXPORT_SYMBOL_GPL(virtio_add_status);
>>
>> +const struct dma_map_ops virtio_direct_dma_ops;
>> +
>>  int virtio_finalize_features(struct virtio_device *dev)
>>  {
>>  	int ret = dev->config->finalize_features(dev);
>> @@ -174,6 +176,9 @@ int virtio_finalize_features(struct virtio_device *dev)
>>  	if (ret)
>>  		return ret;
>> +
>> +	if (xen_domain())
>> +		goto skip_override;
>> +
>> +	if (virtio_has_iommu_quirk(dev))
>> +		set_dma_ops(dev->dev.parent, &virtio_direct_dma_ops);
>> +
>> + skip_override:
>> +
> 
> I prefer normal if scoping as opposed to goto spaghetti pls.
> Better yet move vring_use_dma_api here and use it.
> Less of a chance something will break.

Sure, will move vring_use_dma_api() function in here.

^ permalink raw reply

* Re: [PATCH] powerpc/mobility: Fix node detach/rename problem
From: Michael Ellerman @ 2018-07-30  6:31 UTC (permalink / raw)
  To: Michael Bringmann, linuxppc-dev
  Cc: Nathan Fontenot, Michael Bringmann, Thomas Falcon, Tyrel Datwyler,
	John Allen
In-Reply-To: <c2fba52a-baac-25fb-c26b-c84b25c3178c@linux.vnet.ibm.com>

Michael Bringmann <mwb@linux.vnet.ibm.com> writes:

> During LPAR migration, the content of the device tree/sysfs may
> be updated including deletion and replacement of nodes in the
> tree.  When nodes are added to the internal node structures, they
> are appended in FIFO order to a list of nodes maintained by the
> OF code APIs.

That hasn't been true for several years. The data structure is an n-ary
tree. What kernel version are you working on?

> When nodes are removed from the device tree, they
> are marked OF_DETACHED, but not actually deleted from the system
> to allow for pointers cached elsewhere in the kernel.  The order
> and content of the entries in the list of nodes is not altered,
> though.

Something is going wrong if this is actually happening.

When the node is detached it should be *detached* from the tree of all
nodes, so it should not be discoverable other than by having an existing
pointer to it.

That's what __of_detach_node() does:

	parent = np->parent;
	if (WARN_ON(!parent))
		return;

	if (parent->child == np)
		parent->child = np->sibling;
	else {
		struct device_node *prevsib;
		for (prevsib = np->parent->child;
		     prevsib->sibling != np;
		     prevsib = prevsib->sibling)
			;
		prevsib->sibling = np->sibling;
	}

ie. the node must already have a NULL parent, and then it is spliced out
of its parent's child list.


Please give us more info so we can work out what's actually happening.

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/mm: Don't report PUDs as memory leaks when using kmemleak
From: Michael Ellerman @ 2018-07-30  6:43 UTC (permalink / raw)
  To: Paul Menzel, linuxppc-dev; +Cc: aneesh.kumar
In-Reply-To: <e653db46-a829-4673-4378-4c0afef03cde@molgen.mpg.de>

Paul Menzel <pmenzel@molgen.mpg.de> writes:
> Am 19.07.2018 um 16:33 schrieb Michael Ellerman:
...
>>=20
>> The fix is fairly simple. We need to tell kmemleak to ignore PUD
>> allocations and never report them as leaks. We can also tell it not to
>> scan the PGD, because it will never find pointers in there. However it
>> will still notice if we allocate a PGD and then leak it.
>>=20
>> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
>> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> > ---
>>   arch/powerpc/include/asm/book3s/64/pgalloc.h | 23 ++++++++++++++++++++=
+--
>>   1 file changed, 21 insertions(+), 2 deletions(-)
>
> [=E2=80=A6]
>
> Tested-by: Paul Menzel <pmenzel@molgen.mpg.de> on IBM S822LC

Thanks.

cheers

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox