LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 3/3] powerpc/mm/nv/hash: Reduce contention on hpte lock
From: Aneesh Kumar K.V @ 2018-06-29  8:36 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe
  Cc: linuxppc-dev, Aneesh Kumar K.V, Aneesh Kumar K . V
In-Reply-To: <20180629083631.28017-1-aneesh.kumar@linux.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We do this in some part. This patch make sure we always try to search for
hpte without holding lock and redo the compare with lock held once match found.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/hash_native_64.c | 49 +++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 68e6eaf41bb9..ffbd5ed4e8de 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -568,9 +568,19 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
 
 	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
-	native_lock_hpte(hptep);
 	hpte_v = hpte_get_old_v(hptep);
 
+	if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+		native_lock_hpte(hptep);
+		/* recheck with locks held */
+		hpte_v = hpte_get_old_v(hptep);
+
+		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
+			/* Invalidate the hpte. NOTE: this also unlocks it */
+			hptep->v = 0;
+		else
+			native_unlock_hpte(hptep);
+	}
 	/*
 	 * We need to invalidate the TLB always because hpte_remove doesn't do
 	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -578,13 +588,6 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	 * (hpte_remove) because we assume the old translation is still
 	 * technically "valid".
 	 */
-	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
-		native_unlock_hpte(hptep);
-	else
-		/* Invalidate the hpte. NOTE: this also unlocks it */
-		hptep->v = 0;
-
-	/* Invalidate the TLB */
 	tlbie(vpn, bpsize, apsize, ssize, local);
 
 	local_irq_restore(flags);
@@ -626,15 +629,23 @@ static void native_hugepage_invalidate(unsigned long vsid,
 
 		hptep = htab_address + slot;
 		want_v = hpte_encode_avpn(vpn, psize, ssize);
-		native_lock_hpte(hptep);
 		hpte_v = hpte_get_old_v(hptep);
 
 		/* Even if we miss, we need to invalidate the TLB */
-		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
-			native_unlock_hpte(hptep);
-		else
-			/* Invalidate the hpte. NOTE: this also unlocks it */
-			hptep->v = 0;
+		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+			/* recheck with locks held */
+			native_lock_hpte(hptep);
+			hpte_v = hpte_get_old_v(hptep);
+
+			if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+				/*
+				 * Invalidate the hpte. NOTE: this also unlocks it
+				 */
+
+				hptep->v = 0;
+			} else
+				native_unlock_hpte(hptep);
+		}
 		/*
 		 * We need to do tlb invalidate for all the address, tlbie
 		 * instruction compares entry_VA in tlb with the VA specified
@@ -802,13 +813,19 @@ static void native_flush_hash_range(unsigned long number, int local)
 			slot += hidx & _PTEIDX_GROUP_IX;
 			hptep = htab_address + slot;
 			want_v = hpte_encode_avpn(vpn, psize, ssize);
+			hpte_v = hpte_get_old_v(hptep);
+
+			if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
+				continue;
+			/* lock and try again */
 			native_lock_hpte(hptep);
 			hpte_v = hpte_get_old_v(hptep);
-			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
-			    !(hpte_v & HPTE_V_VALID))
+
+			if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
 				native_unlock_hpte(hptep);
 			else
 				hptep->v = 0;
+
 		} pte_iterate_hashed_end();
 	}
 
-- 
2.17.1

^ permalink raw reply related

* [PATCH] powerpc/mm/hash: Improve error reporting on HCALL failures
From: Aneesh Kumar K.V @ 2018-06-29  8:39 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This patch adds error reporting to H_ENTER and H_READ hcalls. A failure for
both these hcalls are mostly fatal and it would be good to log the failure
reason.

We also switch printk to pr_*

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/lpar.c | 56 ++++++++++++++-------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 5a392e40f3d2..a3b2a1eb8bfd 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -21,6 +21,7 @@
 
 /* Enables debugging of low-level hash table routines - careful! */
 #undef DEBUG
+#define pr_fmt(fmt) "lpar: " fmt
 
 #include <linux/kernel.h>
 #include <linux/dma-mapping.h>
@@ -165,8 +166,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 
 	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
 	if (unlikely(lpar_rc == H_PTEG_FULL)) {
-		if (!(vflags & HPTE_V_BOLTED))
-			pr_devel(" full\n");
+		pr_devel("Hash table group is full\n");
 		return -1;
 	}
 
@@ -176,14 +176,17 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	 * or we will loop forever, so return -2 in this case.
 	 */
 	if (unlikely(lpar_rc != H_SUCCESS)) {
-		if (!(vflags & HPTE_V_BOLTED))
-			pr_devel(" lpar err %ld\n", lpar_rc);
+		pr_err("Failed hash pte insert with error %ld\n", lpar_rc);
 		return -2;
 	}
+	/*
+	 * Avoid all the boot debug prints.
+	 */
 	if (!(vflags & HPTE_V_BOLTED))
 		pr_devel(" -> slot: %lu\n", slot & 7);
 
-	/* Because of iSeries, we have to pass down the secondary
+	/*
+	 * Because of iSeries, we have to pass down the secondary
 	 * bucket bit here as well
 	 */
 	return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3);
@@ -240,8 +243,11 @@ static void manual_hpte_clear_all(void)
          */
 	for (i = 0; i < hpte_count; i += 4) {
 		lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
-		if (lpar_rc != H_SUCCESS)
+		if (lpar_rc != H_SUCCESS) {
+			pr_info("Failed to read hash page table at %ld err %ld\n",
+				i, lpar_rc);
 			continue;
+		}
 		for (j = 0; j < 4; j++){
 			if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
 				HPTE_V_VRMA_MASK)
@@ -340,8 +346,11 @@ static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_gr
 	for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
 
 		lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
-		if (lpar_rc != H_SUCCESS)
+		if (lpar_rc != H_SUCCESS) {
+			pr_info("Failed to read hash page table at %ld err %ld\n",
+				hpte_group, lpar_rc);
 			continue;
+		}
 
 		for (j = 0; j < 4; j++) {
 			if (HPTE_V_COMPARE(ptes[j].pteh, want_v) &&
@@ -612,8 +621,8 @@ static int __init disable_bulk_remove(char *str)
 {
 	if (strcmp(str, "off") == 0 &&
 	    firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
-			printk(KERN_INFO "Disabling BULK_REMOVE firmware feature");
-			powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
+		pr_info("Disabling BULK_REMOVE firmware feature");
+		powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
 	}
 	return 1;
 }
@@ -659,8 +668,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 	if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
 		return -ENODEV;
 
-	printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
-	       shift);
+	pr_info("Attempting to resize HPT to shift %lu\n", shift);
 
 	t0 = ktime_get();
 
@@ -672,8 +680,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 			/* prepare with shift==0 cancels an in-progress resize */
 			rc = plpar_resize_hpt_prepare(0, 0);
 			if (rc != H_SUCCESS)
-				printk(KERN_WARNING
-				       "lpar: Unexpected error %d cancelling timed out HPT resize\n",
+				pr_warn("Unexpected error %d cancelling timed out HPT resize\n",
 				       rc);
 			return -ETIMEDOUT;
 		}
@@ -691,9 +698,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 	case H_RESOURCE:
 		return -EPERM;
 	default:
-		printk(KERN_WARNING
-		       "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
-		       rc);
+		pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
 		return -EIO;
 	}
 
@@ -706,22 +711,19 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 	if (rc != 0) {
 		switch (state.commit_rc) {
 		case H_PTEG_FULL:
-			printk(KERN_WARNING
-			       "lpar: Hash collision while resizing HPT\n");
+			pr_warn("Hash collision while resizing HPT\n");
 			return -ENOSPC;
 
 		default:
-			printk(KERN_WARNING
-			       "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
-			       state.commit_rc);
+			pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
+				state.commit_rc);
 			return -EIO;
 		};
 	}
 
-	printk(KERN_INFO
-	       "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
-	       shift, (long long) ktime_ms_delta(t1, t0),
-	       (long long) ktime_ms_delta(t2, t1));
+	pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
+		shift, (long long) ktime_ms_delta(t1, t0),
+		(long long) ktime_ms_delta(t2, t1));
 
 	return 0;
 }
@@ -785,13 +787,13 @@ static int __init cmo_free_hint(char *str)
 	parm = strstrip(str);
 
 	if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) {
-		printk(KERN_INFO "cmo_free_hint: CMO free page hinting is not active.\n");
+		pr_info("%s: CMO free page hinting is not active.\n", __func__);
 		cmo_free_hint_flag = 0;
 		return 1;
 	}
 
 	cmo_free_hint_flag = 1;
-	printk(KERN_INFO "cmo_free_hint: CMO free page hinting is active.\n");
+	pr_info("%s: CMO free page hinting is active.\n", __func__);
 
 	if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0)
 		return 1;
-- 
2.17.1

^ permalink raw reply related

* [PATCH] powerpc/mpic: Cleanup irq vector accounting
From: Bharat Bhushan @ 2018-06-29 10:24 UTC (permalink / raw)
  To: benh, paulus, mpe, robh, geoff, tyreld, linuxppc-dev,
	linux-kernel
  Cc: Bharat Bhushan

Available vector space accounts ipis and timer interrupts
while spurious vector was not accounted. Also later
mpic_setup_error_int() escape one more vector, seemingly it
assumes one spurious vector.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
---
 arch/powerpc/sysdev/fsl_mpic_err.c | 2 +-
 arch/powerpc/sysdev/mpic.c         | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index 488ec45..2a98837 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -76,7 +76,7 @@ int mpic_setup_error_int(struct mpic *mpic, int intvec)
 	mpic->flags |= MPIC_FSL_HAS_EIMR;
 	/* allocate interrupt vectors for error interrupts */
 	for (i = MPIC_MAX_ERR - 1; i >= 0; i--)
-		mpic->err_int_vecs[i] = --intvec;
+		mpic->err_int_vecs[i] = intvec--;
 
 	return 0;
 }
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 1d4e0ef6..e098d1e 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1380,12 +1380,12 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 		 * global vector number space, as in case of ipis
 		 * and timer interrupts.
 		 *
-		 * Available vector space = intvec_top - 12, where 12
+		 * Available vector space = intvec_top - 13, where 13
 		 * is the number of vectors which have been consumed by
-		 * ipis and timer interrupts.
+		 * ipis, timer interrupts and spurious.
 		 */
 		if (fsl_version >= 0x401) {
-			ret = mpic_setup_error_int(mpic, intvec_top - 12);
+			ret = mpic_setup_error_int(mpic, intvec_top - 13);
 			if (ret)
 				return NULL;
 		}
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH v2 0/3] powerpc/fsl: Speculation barrier for NXP PowerPC Book3E
From: Diana Madalina Craciun @ 2018-06-29 14:34 UTC (permalink / raw)
  To: linuxppc-dev@lists.ozlabs.org
  Cc: mpe@ellerman.id.au, oss@buserror.net, Leo Li, Bharat Bhushan
In-Reply-To: <1528721608-15443-1-git-send-email-diana.craciun@nxp.com>

Hi,=0A=
=0A=
Should I rebase the series on top of the latest kernel?=0A=
=0A=
Diana=0A=
=0A=
On 6/11/2018 3:53 PM, Diana Craciun wrote:=0A=
> Implement barrier_nospec for NXP PowerPC Book3E processors. =0A=
>=0A=
> Diana Craciun (3):=0A=
>   Disable the speculation barrier from the command line=0A=
>   Add barrier_nospec implementation for NXP PowerPC Book3E=0A=
>   Implement cpu_show_spectre_v1/v2 for NXP PowerPC Book3E=0A=
>=0A=
>  arch/powerpc/Kconfig               |  2 +-=0A=
>  arch/powerpc/include/asm/barrier.h | 10 +++++++++=0A=
>  arch/powerpc/include/asm/setup.h   |  2 +-=0A=
>  arch/powerpc/kernel/Makefile       |  2 +-=0A=
>  arch/powerpc/kernel/module.c       |  5 +++--=0A=
>  arch/powerpc/kernel/security.c     | 42 ++++++++++++++++++++++++++++++++=
+++++-=0A=
>  arch/powerpc/kernel/setup_32.c     |  5 +++++=0A=
>  arch/powerpc/kernel/setup_64.c     |  6 ++++++=0A=
>  arch/powerpc/kernel/vmlinux.lds.S  |  4 +++-=0A=
>  arch/powerpc/lib/feature-fixups.c  | 35 ++++++++++++++++++++++++++++++-=
=0A=
>  10 files changed, 105 insertions(+), 8 deletions(-)=0A=
>=0A=
> --=0A=
> History:=0A=
>=0A=
> v1 --> v2=0A=
> - added implementation for cpu_show_spectre_x functions=0A=
> - the mitigation is no longer enabled through device tree options=0A=
> 2.5.5=0A=
>=0A=
>=0A=
=0A=

^ permalink raw reply

* [PATCH] ALSA: snd-aoa: add of_node_put() in error path
From: Nicholas Mc Guire @ 2018-06-29 17:07 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Jaroslav Kysela, Takashi Iwai, linuxppc-dev, alsa-devel,
	linux-kernel, Nicholas Mc Guire

 Both calls to of_find_node_by_name() and of_get_next_child() return a
node pointer with refcount incremented thus it must be explicidly
decremented here after the last usage. As we are assured to have a
refcounted  np  either from the initial
of_find_node_by_name(NULL, name); or from the of_get_next_child(gpio, np)
in the while loop if we reached the error code path below, an
x of_node_put(np) is needed.

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Fixes: commit f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa")
---

Problem located by an experimental coccinelle script

Patch was compiletested with: ppc64_defconfig (implies CONFIG_SND_AOA=m)

Patch is against 4.18-rc2 (localversion-next is next-20180629)

 sound/aoa/core/gpio-feature.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/aoa/core/gpio-feature.c b/sound/aoa/core/gpio-feature.c
index 7196008..6555742 100644
--- a/sound/aoa/core/gpio-feature.c
+++ b/sound/aoa/core/gpio-feature.c
@@ -88,8 +88,10 @@ static struct device_node *get_gpio(char *name,
 	}
 
 	reg = of_get_property(np, "reg", NULL);
-	if (!reg)
+	if (!reg) {
+		of_node_put(np);
 		return NULL;
+	}
 
 	*gpioptr = *reg;
 
-- 
2.1.4

^ permalink raw reply related

* Re: [PATCH] ALSA: snd-aoa: add of_node_put() in error path
From: Takashi Iwai @ 2018-06-29 18:37 UTC (permalink / raw)
  To: Nicholas Mc Guire
  Cc: Johannes Berg, alsa-devel, linuxppc-dev, Jaroslav Kysela,
	linux-kernel
In-Reply-To: <1530292062-31567-1-git-send-email-hofrat@osadl.org>

On Fri, 29 Jun 2018 19:07:42 +0200,
Nicholas Mc Guire wrote:
> 
>  Both calls to of_find_node_by_name() and of_get_next_child() return a
> node pointer with refcount incremented thus it must be explicidly
> decremented here after the last usage. As we are assured to have a
> refcounted  np  either from the initial
> of_find_node_by_name(NULL, name); or from the of_get_next_child(gpio, np)
> in the while loop if we reached the error code path below, an
> x of_node_put(np) is needed.
> 
> Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
> Fixes: commit f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa")
> ---
> 
> Problem located by an experimental coccinelle script
> 
> Patch was compiletested with: ppc64_defconfig (implies CONFIG_SND_AOA=m)
> 
> Patch is against 4.18-rc2 (localversion-next is next-20180629)

Applied, thanks.


Takashi

^ permalink raw reply

* [PATCH] powerpc/msi: Remove VLA usage
From: Kees Cook @ 2018-06-29 18:52 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Paul Mackerras, Michael Ellerman, Andrew Morton, Randy Dunlap,
	Tyrel Datwyler, Rob Herring, Ingo Molnar, linuxppc-dev,
	linux-kernel

In the quest to remove all stack VLA usage from the kernel[1], this
switches from an unchanging variable to a constant expression to eliminate
the VLA generation.

[1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/powerpc/sysdev/msi_bitmap.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 6243a7e537d0..e64a411d1a00 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -225,22 +225,23 @@ static void __init test_of_node(void)
 	struct device_node of_node;
 	struct property prop;
 	struct msi_bitmap bmp;
-	int size = 256;
-	DECLARE_BITMAP(expected, size);
+#define SIZE_EXPECTED 256
+	DECLARE_BITMAP(expected, SIZE_EXPECTED);
 
 	/* There should really be a struct device_node allocator */
 	memset(&of_node, 0, sizeof(of_node));
 	of_node_init(&of_node);
 	of_node.full_name = node_name;
 
-	WARN_ON(msi_bitmap_alloc(&bmp, size, &of_node));
+	WARN_ON(msi_bitmap_alloc(&bmp, SIZE_EXPECTED, &of_node));
 
 	/* No msi-available-ranges, so expect > 0 */
 	WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
 
 	/* Should all still be free */
-	WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
-	bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, SIZE_EXPECTED,
+					get_count_order(SIZE_EXPECTED)));
+	bitmap_release_region(bmp.bitmap, 0, get_count_order(SIZE_EXPECTED));
 
 	/* Now create a fake msi-available-ranges property */
 
@@ -256,8 +257,8 @@ static void __init test_of_node(void)
 	WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp));
 
 	/* Check we got the expected result */
-	WARN_ON(bitmap_parselist(expected_str, expected, size));
-	WARN_ON(!bitmap_equal(expected, bmp.bitmap, size));
+	WARN_ON(bitmap_parselist(expected_str, expected, SIZE_EXPECTED));
+	WARN_ON(!bitmap_equal(expected, bmp.bitmap, SIZE_EXPECTED));
 
 	msi_bitmap_free(&bmp);
 	kfree(bmp.bitmap);
-- 
2.17.1


-- 
Kees Cook
Pixel Security

^ permalink raw reply related

* [PATCH] powerpc: mpc5200: Remove VLA usage
From: Kees Cook @ 2018-06-29 18:53 UTC (permalink / raw)
  To: Anatolij Gustschin
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linuxppc-dev, linux-kernel

In the quest to remove all stack VLA usage from the kernel[1], this
switches to using a stack size large enough for the saved routine and
adds a sanity check.

[1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/powerpc/platforms/52xx/mpc52xx_pm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
index 31d3515672f3..b23da85fa73c 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pm.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
@@ -117,7 +117,10 @@ int mpc52xx_pm_enter(suspend_state_t state)
 	u32 intr_main_mask;
 	void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500;
 	unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size;
-	char saved_0x500[mpc52xx_ds_cached_size];
+	char saved_0x500[256];
+
+	if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500)))
+		return -ENOMEM;
 
 	/* disable all interrupts in PIC */
 	intr_main_mask = in_be32(&intr->main_mask);
-- 
2.17.1


-- 
Kees Cook
Pixel Security

^ permalink raw reply related

* [Update] Regression in 4.18 - 32-bit PowerPC crashes on boot - bisected to commit 1d40a5ea01d5
From: Larry Finger @ 2018-06-29 20:42 UTC (permalink / raw)
  To: Matthew Wilcox, Kirill A. Shutemov, Vlastimil Babka,
	Christoph Lameter, Dave Hansen, Jérôme Glisse,
	Lai Jiangshan, Martin Schwidefsky, Pekka Enberg, Randy Dunlap,
	Andrey Ryabinin, Andrew Morton, Linus Torvalds,
	Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, LKML

My PowerBook G4 Aluminum crashes on boot with 4.18-rcX kernels with a kernel BUG 
at include/linux/page-flags.h:700! The problem was bisected to commit 
1d40a5ea01d5 ("mm: mark pages in use for page tables"). It is not possible to 
capture the bug with anything other than a camera. The first few lines of the 
traceback are as follows:

free_pgd_range+0x19c/0x30c (unreliable)
free_pgtables_0xa0/0xb0
exit_pmap+0xf4/0x16c
mmput+0x64/0xf0
do_exit+0x33c/0x89c
oops_end+0x13c/0x144
_exception_pkey+0x58/0x128
ret_from_except_full+0x0/0x4
--- interrupt: 700 at free_pgd_range+0x19c/0x30c
     LR = free_pgd_range+0x19c/0x30c
free_pgtables+0xa/0xb
exit_mnap+0xf4/0x16c
mmput+0x64/0xf0
flush_old_exec+0x490/0x550

I have more information regarding this BUG. Line 700 of page-flags.h is the 
macro PAGE_TYPE_OPS(Table, table). For further debugging, I manually expanded 
the macro, and found that the bug line is VM_BUG_ON_PAGE(!PageTable(page), page) 
in routine __ClearPageTable(), which is called from pgtable_page_dtor() in 
include/linux/mm.h. I also added a printk call to PageTable() that logs 
page->page_type. The routine was called twice. The first had page_type of 
0xfffffbff, which would have been expected for a . The second call had 
0xffffffff, which led to the BUG.

Larry

^ permalink raw reply

* Re: [Update] Regression in 4.18 - 32-bit PowerPC crashes on boot - bisected to commit 1d40a5ea01d5
From: Linus Torvalds @ 2018-06-29 21:01 UTC (permalink / raw)
  To: Larry Finger
  Cc: Matthew Wilcox, Kirill A. Shutemov, Vlastimil Babka,
	Christoph Lameter, Dave Hansen, Jerome Glisse, Lai Jiangshan,
	Martin Schwidefsky, Pekka Enberg, Randy Dunlap, Andrey Ryabinin,
	Andrew Morton, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, ppc-dev, Linux Kernel Mailing List
In-Reply-To: <99169786-61dd-b19c-ac81-84bcd0a67de4@lwfinger.net>

On Fri, Jun 29, 2018 at 1:42 PM Larry Finger <Larry.Finger@lwfinger.net> wrote:
>
> I have more information regarding this BUG. Line 700 of page-flags.h is the
> macro PAGE_TYPE_OPS(Table, table). For further debugging, I manually expanded
> the macro, and found that the bug line is VM_BUG_ON_PAGE(!PageTable(page), page)
> in routine __ClearPageTable(), which is called from pgtable_page_dtor() in
> include/linux/mm.h. I also added a printk call to PageTable() that logs
> page->page_type. The routine was called twice. The first had page_type of
> 0xfffffbff, which would have been expected for a . The second call had
> 0xffffffff, which led to the BUG.

So it looks to me like the tear-down of the page tables first found a
page that is indeed a page table, and cleared the page table bit
(well, it set it - the bits are reversed).

Then it took an exception (that "interrupt: 700") and that causes
do_exit() again, and it tries to free the same page table - and now
it's no longer marked as a page table, because it already went through
the __ClearPageTable() dance once.

So on the second path through, it catches that "the bit already said
it wasn't a page table" and does the BUG.

But the real question is what the problem was the *first* time around.
I assume that has scrolled off the screen? This part:

  _exception_pkey+0x58/0x128
  ret_from_except_full+0x0/0x4
  --- interrupt: 700 at free_pgd_range+0x19c/0x30c
       LR = free_pgd_range+0x19c/0x30c
  free_pgtables+0xa/0xb
  exit_mnap+0xf4/0x16c
  mmput+0x64/0xf0

Does reverting that commit 1d40a5ea01d5 make everything work for you?
Because if so, judging by the deafening silence on this so far, I
think that's what we should do.

That said, can some ppc person who knows the 32-bit ppc code and maybe
knows what that "interrupt: 700" means talk about that oddity in the
trace, please?

                    Linus

^ permalink raw reply

* Re: [PATCH] powerpc: mpc5200: Remove VLA usage
From: Arnd Bergmann @ 2018-06-29 21:02 UTC (permalink / raw)
  To: Kees Cook
  Cc: Anatolij Gustschin, Paul Mackerras, linuxppc-dev,
	Linux Kernel Mailing List
In-Reply-To: <20180629185339.GA37582@beast>

On Fri, Jun 29, 2018 at 8:53 PM, Kees Cook <keescook@chromium.org> wrote:
> In the quest to remove all stack VLA usage from the kernel[1], this
> switches to using a stack size large enough for the saved routine and
> adds a sanity check.
>
> [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
>
> Signed-off-by: Kees Cook <keescook@chromium.org>

This seems particularly nice, not only avoids it the dynamic stack
allocation, it
also makes sure the new 0x500 handler doesn't overflow into the 0x600
exception handler.

It would help to explain how you arrived at that '256 byte' number in
the changelog though.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>

> ---
>  arch/powerpc/platforms/52xx/mpc52xx_pm.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
> index 31d3515672f3..b23da85fa73c 100644
> --- a/arch/powerpc/platforms/52xx/mpc52xx_pm.c
> +++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
> @@ -117,7 +117,10 @@ int mpc52xx_pm_enter(suspend_state_t state)
>         u32 intr_main_mask;
>         void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500;
>         unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size;
> -       char saved_0x500[mpc52xx_ds_cached_size];
> +       char saved_0x500[256];
> +
> +       if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500)))
> +               return -ENOMEM;
>
>         /* disable all interrupts in PIC */
>         intr_main_mask = in_be32(&intr->main_mask);

^ permalink raw reply

* Re: [PATCH] powerpc: mpc5200: Remove VLA usage
From: Kees Cook @ 2018-06-29 21:44 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Anatolij Gustschin, Paul Mackerras, linuxppc-dev,
	Linux Kernel Mailing List
In-Reply-To: <CAK8P3a3zrXii=bxBm1zHWMHWgYLCQ0rpo460zmKyfsf_-nxxhQ@mail.gmail.com>

On Fri, Jun 29, 2018 at 2:02 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Fri, Jun 29, 2018 at 8:53 PM, Kees Cook <keescook@chromium.org> wrote:
>> In the quest to remove all stack VLA usage from the kernel[1], this
>> switches to using a stack size large enough for the saved routine and
>> adds a sanity check.
>>
>> [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
>>
>> Signed-off-by: Kees Cook <keescook@chromium.org>
>
> This seems particularly nice, not only avoids it the dynamic stack
> allocation, it
> also makes sure the new 0x500 handler doesn't overflow into the 0x600
> exception handler.
>
> It would help to explain how you arrived at that '256 byte' number in
> the changelog though.

Honestly, I just counted instructions, multiplied by 8 and rounded up
to the next nearest power of 2, and the result felt right for giving
some level of flexibility for code growth before tripping the WARN. :P

I'm happy to adjust, of course. :)

-Kees

> Reviewed-by: Arnd Bergmann <arnd@arndb.de>

Thanks!

-Kees

>
>> ---
>>  arch/powerpc/platforms/52xx/mpc52xx_pm.c | 5 ++++-
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
>> index 31d3515672f3..b23da85fa73c 100644
>> --- a/arch/powerpc/platforms/52xx/mpc52xx_pm.c
>> +++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
>> @@ -117,7 +117,10 @@ int mpc52xx_pm_enter(suspend_state_t state)
>>         u32 intr_main_mask;
>>         void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500;
>>         unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size;
>> -       char saved_0x500[mpc52xx_ds_cached_size];
>> +       char saved_0x500[256];
>> +
>> +       if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500)))
>> +               return -ENOMEM;
>>
>>         /* disable all interrupts in PIC */
>>         intr_main_mask = in_be32(&intr->main_mask);



-- 
Kees Cook
Pixel Security

^ permalink raw reply

* Re: [Update] Regression in 4.18 - 32-bit PowerPC crashes on boot - bisected to commit 1d40a5ea01d5
From: Kirill A. Shutemov @ 2018-06-29 21:46 UTC (permalink / raw)
  To: Linus Torvalds, Aneesh Kumar K.V
  Cc: Larry Finger, Matthew Wilcox, Kirill A. Shutemov, Vlastimil Babka,
	Christoph Lameter, Dave Hansen, Jerome Glisse, Lai Jiangshan,
	Martin Schwidefsky, Pekka Enberg, Randy Dunlap, Andrey Ryabinin,
	Andrew Morton, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, ppc-dev, Linux Kernel Mailing List
In-Reply-To: <CA+55aFzZ7PND2Xvz9wB1jaCmp0rBMTSmJtKiFwSeOWy9iLSd8Q@mail.gmail.com>

On Fri, Jun 29, 2018 at 02:01:46PM -0700, Linus Torvalds wrote:
> On Fri, Jun 29, 2018 at 1:42 PM Larry Finger <Larry.Finger@lwfinger.net> wrote:
> >
> > I have more information regarding this BUG. Line 700 of page-flags.h is the
> > macro PAGE_TYPE_OPS(Table, table). For further debugging, I manually expanded
> > the macro, and found that the bug line is VM_BUG_ON_PAGE(!PageTable(page), page)
> > in routine __ClearPageTable(), which is called from pgtable_page_dtor() in
> > include/linux/mm.h. I also added a printk call to PageTable() that logs
> > page->page_type. The routine was called twice. The first had page_type of
> > 0xfffffbff, which would have been expected for a . The second call had
> > 0xffffffff, which led to the BUG.
> 
> So it looks to me like the tear-down of the page tables first found a
> page that is indeed a page table, and cleared the page table bit
> (well, it set it - the bits are reversed).
> 
> Then it took an exception (that "interrupt: 700") and that causes
> do_exit() again, and it tries to free the same page table - and now
> it's no longer marked as a page table, because it already went through
> the __ClearPageTable() dance once.
> 
> So on the second path through, it catches that "the bit already said
> it wasn't a page table" and does the BUG.
> 
> But the real question is what the problem was the *first* time around.

+Aneesh.

Looks like pgtable_page_dtor() gets called in __pte_free_tlb() path twice.
Once in __pte_free_tlb() itself and the second time in pgtable_free().

Would this help?

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 6a6673907e45..e7a2f0e6b695 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -137,7 +137,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb,
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
-	pgtable_page_dtor(table);
 	pgtable_free_tlb(tlb, page_address(table), 0);
 }
 #endif /* _ASM_POWERPC_BOOK3S_32_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 1707781d2f20..30a13b80fd58 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -139,7 +139,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
 	tlb_flush_pgtable(tlb, address);
-	pgtable_page_dtor(table);
 	pgtable_free_tlb(tlb, page_address(table), 0);
 }
 #endif /* _ASM_POWERPC_PGALLOC_32_H */
-- 
 Kirill A. Shutemov

^ permalink raw reply related

* [PATCH v05 0/9] powerpc/hotplug: Update affinity for migrated CPUs
From: Michael Bringmann @ 2018-06-29 22:03 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon

The migration of LPARs across Power systems affects many attributes
including that of the associativity of CPUs.  The patches in this
set execute when a system is coming up fresh upon a migration target.
They are intended to,

* Recognize changes to the associativity of CPUs recorded in internal
  data structures when compared to the latest copies in the device tree.
* Generate calls to other code layers to reset the data structures
  related to associativity of the CPUs.
* Re-register the 'changed' entities into the target system.
  Re-registration of CPUs mostly entails acting as if they have been
  newly hot-added into the target system.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>

Michael Bringmann (9):
  hotplug/cpu: Conditionally acquire/release DRC index
  hotplug/cpu: Add operation queuing function
  hotplug/cpu: Provide CPU readd operation
  mobility/numa: Ensure numa update does not overlap
  numa: Disable/enable arch_update_cpu_topology
  pmt/numa: Disable arch_update_cpu_topology during CPU readd
  powerpc/rtas: Allow disabling rtas_event_scan
  hotplug/rtas: No rtas_event_scan during PMT update
  hotplug/pmt: Update topology after PMT
---
Changes in patch:
  -- Restructure and rearrange content of patches to co-locate
     similar or related modifications
  -- Rename pseries_update_drconf_cpu to pseries_update_processor
  -- Simplify code to update CPU nodes during mobility checks.
     Remove functions to generate extra HP_ELOG messages in favor
     of direct function calls to dlpar_cpu_readd_by_index.
  -- Revise code order in dlpar_cpu_readd_by_index() to present
     more appropriate error codes from underlying layers of the
     implementation.
  -- Add hotplug device lock around all property updates
  -- Add call to rebuild_sched_domains in case of changes
  -- Various code cleanups and compaction
  -- Rebase to 4.18-rc1 kernel
  -- Change operation to run CPU readd after end of migration store.
  -- Improve descriptive text

^ permalink raw reply

* [PATCH v05 1/9] hotplug/cpu: Conditionally acquire/release DRC index
From: Michael Bringmann @ 2018-06-29 22:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

powerpc/cpu: Modify dlpar_cpu_add and dlpar_cpu_remove to allow the
skipping of DRC index acquire or release operations during the CPU
add or remove operations.  This is intended to support subsequent
changes to provide a 'CPU readd' operation.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Move new validity check added to pseries_smp_notifier
     to another patch
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |   68 +++++++++++++++-----------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6ef77ca..3632db2 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -432,7 +432,7 @@ static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
 	return found;
 }
 
-static ssize_t dlpar_cpu_add(u32 drc_index)
+static ssize_t dlpar_cpu_add(u32 drc_index, bool acquire_drc)
 {
 	struct device_node *dn, *parent;
 	int rc, saved_rc;
@@ -457,19 +457,22 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 		return -EINVAL;
 	}
 
-	rc = dlpar_acquire_drc(drc_index);
-	if (rc) {
-		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
-			rc, drc_index);
-		of_node_put(parent);
-		return -EINVAL;
+	if (acquire_drc) {
+		rc = dlpar_acquire_drc(drc_index);
+		if (rc) {
+			pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
+				rc, drc_index);
+			of_node_put(parent);
+			return -EINVAL;
+		}
 	}
 
 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
 	if (!dn) {
 		pr_warn("Failed call to configure-connector, drc index: %x\n",
 			drc_index);
-		dlpar_release_drc(drc_index);
+		if (acquire_drc)
+			dlpar_release_drc(drc_index);
 		of_node_put(parent);
 		return -EINVAL;
 	}
@@ -484,8 +487,9 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
 			dn->name, rc, drc_index);
 
-		rc = dlpar_release_drc(drc_index);
-		if (!rc)
+		if (acquire_drc)
+			rc = dlpar_release_drc(drc_index);
+		if (!rc || acquire_drc)
 			dlpar_free_cc_nodes(dn);
 
 		return saved_rc;
@@ -498,7 +502,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 			dn->name, rc, drc_index);
 
 		rc = dlpar_detach_node(dn);
-		if (!rc)
+		if (!rc && acquire_drc)
 			dlpar_release_drc(drc_index);
 
 		return saved_rc;
@@ -566,7 +570,8 @@ static int dlpar_offline_cpu(struct device_node *dn)
 
 }
 
-static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index,
+				bool release_drc)
 {
 	int rc;
 
@@ -579,12 +584,14 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 		return -EINVAL;
 	}
 
-	rc = dlpar_release_drc(drc_index);
-	if (rc) {
-		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
-			drc_index, dn->name, rc);
-		dlpar_online_cpu(dn);
-		return rc;
+	if (release_drc) {
+		rc = dlpar_release_drc(drc_index);
+		if (rc) {
+			pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
+				drc_index, dn->name, rc);
+			dlpar_online_cpu(dn);
+			return rc;
+		}
 	}
 
 	rc = dlpar_detach_node(dn);
@@ -593,7 +600,10 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 
 		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
 
-		rc = dlpar_acquire_drc(drc_index);
+		if (release_drc)
+			rc = dlpar_acquire_drc(drc_index);
+		else
+			rc = 0;
 		if (!rc)
 			dlpar_online_cpu(dn);
 
@@ -622,7 +632,7 @@ static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
 	return dn;
 }
 
-static int dlpar_cpu_remove_by_index(u32 drc_index)
+static int dlpar_cpu_remove_by_index(u32 drc_index, bool release_drc)
 {
 	struct device_node *dn;
 	int rc;
@@ -634,7 +644,7 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
 		return -ENODEV;
 	}
 
-	rc = dlpar_cpu_remove(dn, drc_index);
+	rc = dlpar_cpu_remove(dn, drc_index, release_drc);
 	of_node_put(dn);
 	return rc;
 }
@@ -699,7 +709,7 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
 	}
 
 	for (i = 0; i < cpus_to_remove; i++) {
-		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
+		rc = dlpar_cpu_remove_by_index(cpu_drcs[i], true);
 		if (rc)
 			break;
 
@@ -710,7 +720,7 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
 		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");
 
 		for (i = 0; i < cpus_removed; i++)
-			dlpar_cpu_add(cpu_drcs[i]);
+			dlpar_cpu_add(cpu_drcs[i], true);
 
 		rc = -EINVAL;
 	} else {
@@ -780,7 +790,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
 	}
 
 	for (i = 0; i < cpus_to_add; i++) {
-		rc = dlpar_cpu_add(cpu_drcs[i]);
+		rc = dlpar_cpu_add(cpu_drcs[i], true);
 		if (rc)
 			break;
 
@@ -791,7 +801,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
 		pr_warn("CPU hot-add failed, removing any added CPUs\n");
 
 		for (i = 0; i < cpus_added; i++)
-			dlpar_cpu_remove_by_index(cpu_drcs[i]);
+			dlpar_cpu_remove_by_index(cpu_drcs[i], true);
 
 		rc = -EINVAL;
 	} else {
@@ -817,7 +827,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
 			rc = dlpar_cpu_remove_by_count(count);
 		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
-			rc = dlpar_cpu_remove_by_index(drc_index);
+			rc = dlpar_cpu_remove_by_index(drc_index, true);
 		else
 			rc = -EINVAL;
 		break;
@@ -825,7 +835,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
 			rc = dlpar_cpu_add_by_count(count);
 		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
-			rc = dlpar_cpu_add(drc_index);
+			rc = dlpar_cpu_add(drc_index, true);
 		else
 			rc = -EINVAL;
 		break;
@@ -850,7 +860,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	if (rc)
 		return -EINVAL;
 
-	rc = dlpar_cpu_add(drc_index);
+	rc = dlpar_cpu_add(drc_index, true);
 
 	return rc ? rc : count;
 }
@@ -871,7 +881,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 		return -EINVAL;
 	}
 
-	rc = dlpar_cpu_remove(dn, drc_index);
+	rc = dlpar_cpu_remove(dn, drc_index, true);
 	of_node_put(dn);
 
 	return rc ? rc : count;

^ permalink raw reply related

* [PATCH v05 2/9] hotplug/cpu: Add operation queuing function
From: Michael Bringmann @ 2018-06-29 22:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

migration/dlpar: This patch adds function dlpar_queue_action()
which will queued up information about a CPU/Memory 'readd'
operation according to resource type, action code, and DRC index.
At a subsequent point, the list of operations can be run/played
in series.  Examples of such oprations include 'readd' of CPU
and Memory blocks identified as having changed their associativity
during an LPAR migration event.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Correct drc_index before adding to pseries_hp_errorlog struct
  -- Correct text of notice
  -- Revise queuing model to save up all of the DLPAR actions for
     later execution.
  -- Restore list init statement missing from patch
  -- Move call to apply queued operations into 'mobility.c'
  -- Compress some code
  -- Rename some of queueing function APIs
  -- Revise implementation to push execution of queued operations
     to a workqueue task.
---
 arch/powerpc/include/asm/rtas.h           |    2 +
 arch/powerpc/platforms/pseries/dlpar.c    |   61 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/mobility.c |    4 ++
 arch/powerpc/platforms/pseries/pseries.h  |    2 +
 4 files changed, 69 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 71e393c..4f601c7 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -310,12 +310,14 @@ struct pseries_hp_errorlog {
 		struct { __be32 count, index; } ic;
 		char	drc_name[1];
 	} _drc_u;
+	struct list_head list;
 };
 
 #define PSERIES_HP_ELOG_RESOURCE_CPU	1
 #define PSERIES_HP_ELOG_RESOURCE_MEM	2
 #define PSERIES_HP_ELOG_RESOURCE_SLOT	3
 #define PSERIES_HP_ELOG_RESOURCE_PHB	4
+#define PSERIES_HP_ELOG_RESOURCE_PMT	5
 
 #define PSERIES_HP_ELOG_ACTION_ADD	1
 #define PSERIES_HP_ELOG_ACTION_REMOVE	2
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c0..7264b8e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -25,6 +25,7 @@
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <linux/uaccess.h>
+#include <linux/delay.h>
 #include <asm/rtas.h>
 
 static struct workqueue_struct *pseries_hp_wq;
@@ -329,6 +330,8 @@ int dlpar_release_drc(u32 drc_index)
 	return 0;
 }
 
+static int dlpar_pmt(struct pseries_hp_errorlog *work);
+
 static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
 	int rc;
@@ -357,6 +360,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 	case PSERIES_HP_ELOG_RESOURCE_CPU:
 		rc = dlpar_cpu(hp_elog);
 		break;
+	case PSERIES_HP_ELOG_RESOURCE_PMT:
+		rc = dlpar_pmt(hp_elog);
+		break;
 	default:
 		pr_warn_ratelimited("Invalid resource (%d) specified\n",
 				    hp_elog->resource);
@@ -407,6 +413,61 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
 	}
 }
 
+LIST_HEAD(dlpar_delayed_list);
+
+int dlpar_queue_action(int resource, int action, u32 drc_index)
+{
+	struct pseries_hp_errorlog *hp_errlog;
+
+	hp_errlog = kmalloc(sizeof(struct pseries_hp_errorlog), GFP_KERNEL);
+	if (!hp_errlog)
+		return -ENOMEM;
+
+	hp_errlog->resource = resource;
+	hp_errlog->action = action;
+	hp_errlog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+	hp_errlog->_drc_u.drc_index = cpu_to_be32(drc_index);
+
+	list_add_tail(&hp_errlog->list, &dlpar_delayed_list);
+
+	return 0;
+}
+
+static int dlpar_pmt(struct pseries_hp_errorlog *work)
+{
+	struct list_head *pos, *q;
+
+	ssleep(15);
+
+	list_for_each_safe(pos, q, &dlpar_delayed_list) {
+		struct pseries_hp_errorlog *tmp;
+
+		tmp = list_entry(pos, struct pseries_hp_errorlog, list);
+		handle_dlpar_errorlog(tmp);
+
+		list_del(pos);
+		kfree(tmp);
+
+		ssleep(10);
+	}
+
+	return 0;
+}
+
+int dlpar_queued_actions_run(void)
+{
+	if (!list_empty(&dlpar_delayed_list)) {
+		struct pseries_hp_errorlog hp_errlog;
+
+		hp_errlog.resource = PSERIES_HP_ELOG_RESOURCE_PMT;
+		hp_errlog.action = 0;
+		hp_errlog.id_type = 0;
+
+		queue_hotplug_event(&hp_errlog, 0, 0);
+	}
+	return 0;
+}
+
 static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
 {
 	char *arg;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index f6364d9..9ecc256 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -378,6 +378,10 @@ static ssize_t migration_store(struct class *class,
 		return rc;
 
 	post_mobility_fixup();
+
+	/* Apply any necessary changes identified during fixup */
+	dlpar_schedule_delayed_queue();
+
 	return count;
 }
 
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 60db2ee..72ca996 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -61,6 +61,8 @@ extern struct device_node *dlpar_configure_connector(__be32,
 
 void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
 			 struct completion *hotplug_done, int *rc);
+int dlpar_queue_action(int resource, int action, u32 drc_index);
+int dlpar_queued_actions_run(void);
 #ifdef CONFIG_MEMORY_HOTPLUG
 int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
 #else

^ permalink raw reply related

* [PATCH v05 3/9] hotplug/cpu: Provide CPU readd operation
From: Michael Bringmann @ 2018-06-29 22:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

powerpc/dlpar: Provide hotplug CPU 'readd by index' operation to
support LPAR Post Migration state updates.  When such changes are
invoked by the PowerPC 'mobility' code, they will be queued up so
that modifications to CPU properties will take place after the new
property value is written to the device-tree.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
  -- Add CPU validity check to pseries_smp_notifier
  -- Improve check on 'ibm,associativity' property
  -- Add check for cpu type to new update property entry
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |   58 ++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 3632db2..8f28160 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -305,6 +305,36 @@ static int pseries_add_processor(struct device_node *np)
 	return err;
 }
 
+static int pseries_update_processor(struct of_reconfig_data *pr)
+{
+	int old_entries, new_entries, rc = 0;
+	__be32 *old_assoc, *new_assoc;
+
+	/* We only handle changes due to 'ibm,associativity' property
+	 */
+	old_assoc = pr->old_prop->value;
+	old_entries = be32_to_cpu(*old_assoc++);
+
+	new_assoc = pr->prop->value;
+	new_entries = be32_to_cpu(*new_assoc++);
+
+	if (old_entries == new_entries) {
+		int sz = old_entries * sizeof(int);
+
+		if (memcmp(old_assoc, new_assoc, sz))
+			rc = dlpar_queue_action(
+					PSERIES_HP_ELOG_RESOURCE_CPU,
+					PSERIES_HP_ELOG_ACTION_READD,
+					pr->dn->phandle);
+	} else {
+		rc = dlpar_queue_action(PSERIES_HP_ELOG_RESOURCE_CPU,
+					PSERIES_HP_ELOG_ACTION_READD,
+					pr->dn->phandle);
+	}
+
+	return rc;
+}
+
 /*
  * Update the present map for a cpu node which is going away, and set
  * the hard id in the paca(s) to -1 to be consistent with boot time
@@ -649,6 +679,26 @@ static int dlpar_cpu_remove_by_index(u32 drc_index, bool release_drc)
 	return rc;
 }
 
+static int dlpar_cpu_readd_by_index(u32 drc_index)
+{
+	int rc = 0;
+
+	pr_info("Attempting to re-add CPU, drc index %x\n", drc_index);
+
+	rc = dlpar_cpu_remove_by_index(drc_index, false);
+	if (!rc)
+		rc = dlpar_cpu_add(drc_index, false);
+
+	if (rc)
+		pr_info("Failed to update cpu at drc_index %lx\n",
+				(unsigned long int)drc_index);
+	else
+		pr_info("CPU at drc_index %lx was updated\n",
+				(unsigned long int)drc_index);
+
+	return rc;
+}
+
 static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
 {
 	struct device_node *dn;
@@ -839,6 +889,9 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 		else
 			rc = -EINVAL;
 		break;
+	case PSERIES_HP_ELOG_ACTION_READD:
+		rc = dlpar_cpu_readd_by_index(drc_index);
+		break;
 	default:
 		pr_err("Invalid action (%d) specified\n", hp_elog->action);
 		rc = -EINVAL;
@@ -902,6 +955,11 @@ static int pseries_smp_notifier(struct notifier_block *nb,
 	case OF_RECONFIG_DETACH_NODE:
 		pseries_remove_processor(rd->dn);
 		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		if (!strcmp(rd->dn->type, "cpu") &&
+		    !strcmp(rd->prop->name, "ibm,associativity"))
+			pseries_update_processor(rd);
+		break;
 	}
 	return notifier_from_errno(err);
 }

^ permalink raw reply related

* [PATCH v05 4/9] mobility/numa: Ensure numa update does not overlap
From: Michael Bringmann @ 2018-06-29 22:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

mobility/numa: Ensure that numa_update_cpu_topology() can not be
entered multiple times concurrently.  It may be accessed through
many different paths / concurrent work functions, and the lock
ordering may be difficult to ensure otherwise.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
 arch/powerpc/mm/numa.c |    9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a789d57..b22e27a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1079,6 +1079,7 @@ struct topology_update_data {
 static int topology_timer_secs = 1;
 static int topology_inited;
 static int topology_update_needed;
+static struct mutex topology_update_lock;
 
 /*
  * Change polling interval for associativity changes.
@@ -1320,6 +1321,11 @@ int numa_update_cpu_topology(bool cpus_locked)
 	if (!updates)
 		return 0;
 
+	if (!mutex_trylock(&topology_update_lock)) {
+		kfree(updates);
+		return 0;
+	}
+
 	cpumask_clear(&updated_cpus);
 
 	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
@@ -1424,6 +1430,7 @@ int numa_update_cpu_topology(bool cpus_locked)
 out:
 	kfree(updates);
 	topology_update_needed = 0;
+	mutex_unlock(&topology_update_lock);
 	return changed;
 }
 
@@ -1598,6 +1605,8 @@ static ssize_t topology_write(struct file *file, const char __user *buf,
 
 static int topology_update_init(void)
 {
+	mutex_init(&topology_update_lock);
+
 	/* Do not poll for changes if disabled at boot */
 	if (topology_updates_enabled)
 		start_topology_update();

^ permalink raw reply related

* [PATCH v05 5/9] numa: Disable/enable arch_update_cpu_topology
From: Michael Bringmann @ 2018-06-29 22:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

numa: Provide mechanism to disable/enable operation of
arch_update_cpu_topology/numa_update_cpu_topology.  This is
a simple tool to eliminate some avenues for thread deadlock
observed during system execution.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/topology.h |   10 ++++++++++
 arch/powerpc/mm/numa.c              |   14 ++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 16b0778..d9ceba6 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -43,6 +43,8 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 extern int numa_update_cpu_topology(bool cpus_locked);
+extern void arch_update_cpu_topology_suspend(void);
+extern void arch_update_cpu_topology_resume(void);
 
 static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
 {
@@ -82,6 +84,14 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
 	return 0;
 }
 
+static inline void arch_update_cpu_topology_suspend(void)
+{
+}
+
+static inline void arch_update_cpu_topology_resume(void)
+{
+}
+
 static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
 
 #endif /* CONFIG_NUMA */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b22e27a..2352489 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1079,6 +1079,7 @@ struct topology_update_data {
 static int topology_timer_secs = 1;
 static int topology_inited;
 static int topology_update_needed;
+static int topology_update_enabled = 1;
 static struct mutex topology_update_lock;
 
 /*
@@ -1313,6 +1314,9 @@ int numa_update_cpu_topology(bool cpus_locked)
 		return 0;
 	}
 
+	if (!topology_update_enabled)
+		return 0;
+
 	weight = cpumask_weight(&cpu_associativity_changes_mask);
 	if (!weight)
 		return 0;
@@ -1439,6 +1443,16 @@ int arch_update_cpu_topology(void)
 	return numa_update_cpu_topology(true);
 }
 
+void arch_update_cpu_topology_suspend(void)
+{
+	topology_update_enabled = 0;
+}
+
+void arch_update_cpu_topology_resume(void)
+{
+	topology_update_enabled = 1;
+}
+
 static void topology_work_fn(struct work_struct *work)
 {
 	rebuild_sched_domains();

^ permalink raw reply related

* [PATCH v05 6/9] pmt/numa: Disable arch_update_cpu_topology during CPU readd
From: Michael Bringmann @ 2018-06-29 22:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

pmt/numa: Disable arch_update_cpu_topology during post migration
CPU readd updates when evaluating device-tree changes after LPM
to avoid thread deadlocks trying to update node assignments.
System timing between all of the threads and timers restarted in
a migrated system overlapped frequently allowing tasks to start
acquiring resources (get_online_cpus) needed by rebuild_sched_domains.
Defer the operation of that function until after the CPU readd has
completed.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |    9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 8f28160..6267b53 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -26,6 +26,7 @@
 #include <linux/sched.h>	/* for idle_task_exit */
 #include <linux/sched/hotplug.h>
 #include <linux/cpu.h>
+#include <linux/cpuset.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <asm/prom.h>
@@ -685,9 +686,15 @@ static int dlpar_cpu_readd_by_index(u32 drc_index)
 
 	pr_info("Attempting to re-add CPU, drc index %x\n", drc_index);
 
+	arch_update_cpu_topology_suspend();
 	rc = dlpar_cpu_remove_by_index(drc_index, false);
-	if (!rc)
+	arch_update_cpu_topology_resume();
+
+	if (!rc) {
+		arch_update_cpu_topology_suspend();
 		rc = dlpar_cpu_add(drc_index, false);
+		arch_update_cpu_topology_resume();
+	}
 
 	if (rc)
 		pr_info("Failed to update cpu at drc_index %lx\n",

^ permalink raw reply related

* [PATCH v05 7/9] powerpc/rtas: Allow disabling rtas_event_scan
From: Michael Bringmann @ 2018-06-29 22:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

powerpc/rtas: Provide mechanism by which the rtas_event_scan can
be disabled/re-enabled by other portions of the powerpc code.
Among other things, this simplifies the usage of locking mechanisms
for shared kernel resources.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/rtas.h |    4 ++++
 arch/powerpc/kernel/rtasd.c     |   14 ++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 4f601c7..4ab605a 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -386,8 +386,12 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
 extern void rtas_cancel_event_scan(void);
+extern void rtas_event_scan_disable(void);
+extern void rtas_event_scan_enable(void);
 #else
 static inline void rtas_cancel_event_scan(void) { }
+static inline void rtas_event_scan_disable(void) { }
+static inline void rtas_event_scan_enable(void) { }
 #endif
 
 /* Error types logged.  */
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 44d66c33d..af69e44 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -455,11 +455,25 @@ static void do_event_scan(void)
  */
 static unsigned long event_scan_delay = 1*HZ;
 static int first_pass = 1;
+static int res_enable = 1;
+
+void rtas_event_scan_disable(void)
+{
+	res_enable = 0;
+}
+
+void rtas_event_scan_enable(void)
+{
+	res_enable = 1;
+}
 
 static void rtas_event_scan(struct work_struct *w)
 {
 	unsigned int cpu;
 
+	if (!res_enable)
+		return;
+
 	do_event_scan();
 
 	get_online_cpus();

^ permalink raw reply related

* [PATCH v05 9/9] hotplug/pmt: Update topology after PMT
From: Michael Bringmann @ 2018-06-29 22:05 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

hotplug/pmt: Call rebuild_sched_domains after applying changes
to update CPU associativity i.e. 'readd' CPUs.  This is to
ensure that the deferred calls to arch_update_cpu_topology are
now reflected in the system data structures.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/dlpar.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 7264b8e..ea3c08a 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -16,6 +16,7 @@
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
+#include <linux/cpuset.h>
 #include <linux/slab.h>
 #include <linux/of.h>
 
@@ -451,6 +452,9 @@ static int dlpar_pmt(struct pseries_hp_errorlog *work)
 		ssleep(10);
 	}
 
+	ssleep(5);
+	rebuild_sched_domains();
+
 	return 0;
 }
 

^ permalink raw reply related

* [PATCH v05 8/9] hotplug/rtas: No rtas_event_scan during PMT update
From: Michael Bringmann @ 2018-06-29 22:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <e73c6076-34d9-d3fb-d1f0-ccef14dae093@linux.vnet.ibm.com>

hotplug/rtas: Disable rtas_event_scan during device-tree property
updates after migration to reduce conflicts with changes propagated
to other parts of the kernel configuration, such as CPUs or memory.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6267b53..f5c9e8f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -686,14 +686,18 @@ static int dlpar_cpu_readd_by_index(u32 drc_index)
 
 	pr_info("Attempting to re-add CPU, drc index %x\n", drc_index);
 
+	rtas_event_scan_disable();
 	arch_update_cpu_topology_suspend();
 	rc = dlpar_cpu_remove_by_index(drc_index, false);
 	arch_update_cpu_topology_resume();
+	rtas_event_scan_enable();
 
 	if (!rc) {
+		rtas_event_scan_disable();
 		arch_update_cpu_topology_suspend();
 		rc = dlpar_cpu_add(drc_index, false);
 		arch_update_cpu_topology_resume();
+		rtas_event_scan_enable();
 	}
 
 	if (rc)

^ permalink raw reply related

* [PATCH v02 0/5] powerpc/migration: Affinity fix for memory
From: Michael Bringmann @ 2018-06-29 22:12 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon

The migration of LPARs across Power systems affects many attributes
including that of the associativity of memory blocks.  The patches
in this set execute when a system is coming up fresh upon a migration
target.  They are intended to,

* Recognize changes to the associativity of memory recorded in
  internal data structures when compared to the latest copies in
  the device tree (e.g. ibm,dynamic-memory, ibm,dynamic-memory-v2).
* Recognize changes to the associativity mapping (e.g. ibm,
  associativity-lookup-arrays), locate all assigned memory blocks
  corresponding to each changed row, and readd all such blocks.
* Generate calls to other code layers to reset the data structures
  related to associativity of memory.
* Re-register the 'changed' entities into the target system.
  Re-registration of memory blocks mostly entails acting as if they
  have been newly hot-added into the target system.

This code builds upon features introduced in a previous patch set
that updates CPUs for affinity changes that may occur during LPM.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>

Michael Bringmann (5):
  powerpc/drmem: Export 'dynamic-memory' loader
  powerpc/drmem: Add internal_flags feature
  migration/memory: Add hotplug flags READD_MULTIPLE
  migration/memory: Evaluate LMB assoc changes
  migration/memory: Support 'ibm,dynamic-memory-v2'
---
Changes in v02:
  -- Change operation to tag changed LMBs in DRMEM array instead
     of queuing a potentially huge number of structures.
  -- Added another hotplug queue event for CPU/memory operations
  -- Added internal_flags feature to DRMEM
  -- Improve the patch description language for the patch set.

^ permalink raw reply

* [PATCH v02 1/5] powerpc/drmem: Export 'dynamic-memory' loader
From: Michael Bringmann @ 2018-06-29 22:13 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <adc67a97-b800-b533-7993-516fc254b6a2@linux.vnet.ibm.com>

powerpc/drmem: Export many of the functions of DRMEM to parse
"ibm,dynamic-memory" and "ibm,dynamic-memory-v2" during hotplug
operations and for Post Migration events.

Also modify the DRMEM initialization code to allow it to,

* Be called after system initialization
* Provide a separate user copy of the LMB array that is produces
* Free the user copy upon request

In addition, a couple of changes were made to make the creation
of additional copies of the LMB array more useful including,

* Add new iterator to work through a pair of drmem_info arrays.
* Modify DRMEM code to replace usages of dt_root_addr_cells, and
  dt_mem_next_cell, as these are only available at first boot.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/drmem.h |   15 ++++++++
 arch/powerpc/mm/drmem.c          |   75 ++++++++++++++++++++++++++++----------
 2 files changed, 70 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index ce242b9..b0e70fd 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -35,6 +35,18 @@ struct drmem_lmb_info {
 		&drmem_info->lmbs[0],				\
 		&drmem_info->lmbs[drmem_info->n_lmbs - 1])
 
+#define for_each_dinfo_lmb(dinfo, lmb)				\
+	for_each_drmem_lmb_in_range((lmb),			\
+		&dinfo->lmbs[0],				\
+		&dinfo->lmbs[dinfo->n_lmbs - 1])
+
+#define for_each_pair_dinfo_lmb(dinfo1, lmb1, dinfo2, lmb2)	\
+	for ((lmb1) = (&dinfo1->lmbs[0]),			\
+	     (lmb2) = (&dinfo2->lmbs[0]);			\
+	     ((lmb1) <= (&dinfo1->lmbs[dinfo1->n_lmbs - 1])) &&	\
+	     ((lmb2) <= (&dinfo2->lmbs[dinfo2->n_lmbs - 1]));	\
+	     (lmb1)++, (lmb2)++)
+
 /*
  * The of_drconf_cell_v1 struct defines the layout of the LMB data
  * specified in the ibm,dynamic-memory device tree property.
@@ -94,6 +106,9 @@ void __init walk_drmem_lmbs(struct device_node *dn,
 			void (*func)(struct drmem_lmb *, const __be32 **));
 int drmem_update_dt(void);
 
+struct drmem_lmb_info *drmem_lmbs_init(struct property *prop);
+void drmem_lmbs_free(struct drmem_lmb_info *dinfo);
+
 #ifdef CONFIG_PPC_PSERIES
 void __init walk_drmem_lmbs_early(unsigned long node,
 			void (*func)(struct drmem_lmb *, const __be32 **));
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 3f18036..13d2abb 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -20,6 +20,7 @@
 
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
+static int n_root_addr_cells;
 
 u64 drmem_lmb_memory_max(void)
 {
@@ -193,12 +194,13 @@ int drmem_update_dt(void)
 	return rc;
 }
 
-static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
+static void read_drconf_v1_cell(struct drmem_lmb *lmb,
 				       const __be32 **prop)
 {
 	const __be32 *p = *prop;
 
-	lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+	lmb->base_addr = of_read_number(p, n_root_addr_cells);
+	p += n_root_addr_cells;
 	lmb->drc_index = of_read_number(p++, 1);
 
 	p++; /* skip reserved field */
@@ -209,7 +211,7 @@ static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
 	*prop = p;
 }
 
-static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
+static void __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
 			void (*func)(struct drmem_lmb *, const __be32 **))
 {
 	struct drmem_lmb lmb;
@@ -225,13 +227,14 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
 	}
 }
 
-static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
 				       const __be32 **prop)
 {
 	const __be32 *p = *prop;
 
 	dr_cell->seq_lmbs = of_read_number(p++, 1);
-	dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+	dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
+	p += n_root_addr_cells;
 	dr_cell->drc_index = of_read_number(p++, 1);
 	dr_cell->aa_index = of_read_number(p++, 1);
 	dr_cell->flags = of_read_number(p++, 1);
@@ -239,7 +242,7 @@ static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
 	*prop = p;
 }
 
-static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
+static void __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
 			void (*func)(struct drmem_lmb *, const __be32 **))
 {
 	struct of_drconf_cell_v2 dr_cell;
@@ -275,6 +278,9 @@ void __init walk_drmem_lmbs_early(unsigned long node,
 	const __be32 *prop, *usm;
 	int len;
 
+	if (n_root_addr_cells == 0)
+		n_root_addr_cells = dt_root_addr_cells;
+
 	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
 	if (!prop || len < dt_root_size_cells * sizeof(__be32))
 		return;
@@ -353,24 +359,26 @@ void __init walk_drmem_lmbs(struct device_node *dn,
 	}
 }
 
-static void __init init_drmem_v1_lmbs(const __be32 *prop)
+static void init_drmem_v1_lmbs(const __be32 *prop,
+				struct drmem_lmb_info *dinfo)
 {
 	struct drmem_lmb *lmb;
 
-	drmem_info->n_lmbs = of_read_number(prop++, 1);
-	if (drmem_info->n_lmbs == 0)
+	dinfo->n_lmbs = of_read_number(prop++, 1);
+	if (dinfo->n_lmbs == 0)
 		return;
 
-	drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+	dinfo->lmbs = kcalloc(dinfo->n_lmbs, sizeof(*lmb),
 				   GFP_KERNEL);
-	if (!drmem_info->lmbs)
+	if (!dinfo->lmbs)
 		return;
 
-	for_each_drmem_lmb(lmb)
+	for_each_dinfo_lmb(dinfo, lmb)
 		read_drconf_v1_cell(lmb, &prop);
 }
 
-static void __init init_drmem_v2_lmbs(const __be32 *prop)
+static void init_drmem_v2_lmbs(const __be32 *prop,
+				struct drmem_lmb_info *dinfo)
 {
 	struct drmem_lmb *lmb;
 	struct of_drconf_cell_v2 dr_cell;
@@ -386,12 +394,12 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
 	p = prop;
 	for (i = 0; i < lmb_sets; i++) {
 		read_drconf_v2_cell(&dr_cell, &p);
-		drmem_info->n_lmbs += dr_cell.seq_lmbs;
+		dinfo->n_lmbs += dr_cell.seq_lmbs;
 	}
 
-	drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+	dinfo->lmbs = kcalloc(dinfo->n_lmbs, sizeof(*lmb),
 				   GFP_KERNEL);
-	if (!drmem_info->lmbs)
+	if (!dinfo->lmbs)
 		return;
 
 	/* second pass, read in the LMB information */
@@ -402,10 +410,10 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
 		read_drconf_v2_cell(&dr_cell, &p);
 
 		for (j = 0; j < dr_cell.seq_lmbs; j++) {
-			lmb = &drmem_info->lmbs[lmb_index++];
+			lmb = &dinfo->lmbs[lmb_index++];
 
 			lmb->base_addr = dr_cell.base_addr;
-			dr_cell.base_addr += drmem_info->lmb_size;
+			dr_cell.base_addr += dinfo->lmb_size;
 
 			lmb->drc_index = dr_cell.drc_index;
 			dr_cell.drc_index++;
@@ -416,11 +424,38 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
 	}
 }
 
+void drmem_lmbs_free(struct drmem_lmb_info *dinfo)
+{
+	if (dinfo) {
+		kfree(dinfo->lmbs);
+		kfree(dinfo);
+	}
+}
+
+struct drmem_lmb_info *drmem_lmbs_init(struct property *prop)
+{
+	struct drmem_lmb_info *dinfo;
+
+	dinfo = kzalloc(sizeof(*dinfo), GFP_KERNEL);
+	if (!dinfo)
+		return NULL;
+
+	if (!strcmp("ibm,dynamic-memory", prop->name))
+		init_drmem_v1_lmbs(prop->value, dinfo);
+	else if (!strcmp("ibm,dynamic-memory-v2", prop->name))
+		init_drmem_v2_lmbs(prop->value, dinfo);
+
+	return dinfo;
+}
+
 static int __init drmem_init(void)
 {
 	struct device_node *dn;
 	const __be32 *prop;
 
+	if (n_root_addr_cells == 0)
+		n_root_addr_cells = dt_root_addr_cells;
+
 	dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (!dn) {
 		pr_info("No dynamic reconfiguration memory found\n");
@@ -434,11 +469,11 @@ static int __init drmem_init(void)
 
 	prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
 	if (prop) {
-		init_drmem_v1_lmbs(prop);
+		init_drmem_v1_lmbs(prop, drmem_info);
 	} else {
 		prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
 		if (prop)
-			init_drmem_v2_lmbs(prop);
+			init_drmem_v2_lmbs(prop, drmem_info);
 	}
 
 	of_node_put(dn);

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox