LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 2/5] powerpc/64s: Add new security feature flags for count cache flush
From: Michael Ellerman @ 2018-07-23 15:07 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20180723150756.11108-1-mpe@ellerman.id.au>

Add security feature flags to indicate the need for software to flush
the count cache on context switch, and for the presence of a hardware
assisted count cache flush.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/security_features.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 44989b22383c..a0d47bc18a5c 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -59,6 +59,9 @@ static inline bool security_ftr_enabled(unsigned long feature)
 // Indirect branch prediction cache disabled
 #define SEC_FTR_COUNT_CACHE_DISABLED	0x0000000000000020ull
 
+// bcctr 2,0,0 triggers a hardware assisted count cache flush
+#define SEC_FTR_BCCTR_FLUSH_ASSIST	0x0000000000000800ull
+
 
 // Features indicating need for Spectre/Meltdown mitigations
 
@@ -74,6 +77,9 @@ static inline bool security_ftr_enabled(unsigned long feature)
 // Firmware configuration indicates user favours security over performance
 #define SEC_FTR_FAVOUR_SECURITY		0x0000000000000200ull
 
+// Software required to flush count cache on context switch
+#define SEC_FTR_FLUSH_COUNT_CACHE	0x0000000000000400ull
+
 
 // Features enabled by default
 #define SEC_FTR_DEFAULT \
-- 
2.14.1

^ permalink raw reply related

* [PATCH 3/5] powerpc/64s: Add support for software count cache flush
From: Michael Ellerman @ 2018-07-23 15:07 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20180723150756.11108-1-mpe@ellerman.id.au>

Some CPU revisions support a mode where the count cache needs to be
flushed by software on context switch. Additionally some revisions may
have a hardware accelerated flush, in which case the software flush
sequence can be shortened.

If we detect the appropriate flag from firmware we patch a branch
into _switch() which takes us to a count cache flush sequence.

That sequence in turn may be patched to return early if we detect that
the CPU supports accelerating the flush sequence in hardware.

Add debugfs support for reporting the state of the flush, as well as
runtime disabling it.

And modify the spectre_v2 sysfs file to report the state of the
software flush.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/asm-prototypes.h    |  6 ++
 arch/powerpc/include/asm/security_features.h |  1 +
 arch/powerpc/kernel/entry_64.S               | 54 ++++++++++++++++
 arch/powerpc/kernel/security.c               | 96 ++++++++++++++++++++++++++--
 4 files changed, 152 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 769567b66c0c..70fdc5b9b9fb 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -143,4 +143,10 @@ struct kvm_vcpu;
 void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 
+/* Patch sites */
+extern s32 patch__call_flush_count_cache;
+extern s32 patch__flush_count_cache_return;
+
+extern long flush_count_cache;
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index a0d47bc18a5c..759597bf0fd8 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -22,6 +22,7 @@ enum stf_barrier_type {
 
 void setup_stf_barrier(void);
 void do_stf_barrier_fixups(enum stf_barrier_type types);
+void setup_count_cache_flush(void);
 
 static inline void security_ftr_set(unsigned long feature)
 {
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0357f87a013c..017cf70f01d7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -25,6 +25,7 @@
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/thread_info.h>
+#include <asm/code-patching-asm.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/cputable.h>
@@ -504,6 +505,57 @@ _GLOBAL(ret_from_kernel_thread)
 	li	r3,0
 	b	.Lsyscall_exit
 
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#define FLUSH_COUNT_CACHE	\
+1:	nop;			\
+	patch_site 1b, patch__call_flush_count_cache
+
+
+#define BCCTR_FLUSH	.long 0x4c400420
+
+.macro nops number
+	.rept \number
+	nop
+	.endr
+.endm
+
+.balign 32
+.global flush_count_cache
+flush_count_cache:
+	/* Save LR into r9 */
+	mflr	r9
+
+	.rept 64
+	bl	.+4
+	.endr
+	b	1f
+	nops	6
+
+	.balign 32
+	/* Restore LR */
+1:	mtlr	r9
+	li	r9,0x7fff
+	mtctr	r9
+
+	BCCTR_FLUSH
+
+2:	nop
+	patch_site 2b patch__flush_count_cache_return
+
+	nops	3
+
+	.rept 278
+	.balign 32
+	BCCTR_FLUSH
+	nops	7
+	.endr
+
+	blr
+#else
+#define FLUSH_COUNT_CACHE
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
@@ -535,6 +587,8 @@ _GLOBAL(_switch)
 	std	r23,_CCR(r1)
 	std	r1,KSP(r3)	/* Set old stack pointer */
 
+	FLUSH_COUNT_CACHE
+
 	/*
 	 * On SMP kernels, care must be taken because a task may be
 	 * scheduled off CPUx and on to CPUy. Memory ordering must be
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 4cb8f1f7b593..fa9366b53eb7 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -8,6 +8,8 @@
 #include <linux/device.h>
 #include <linux/seq_buf.h>
 
+#include <asm/asm-prototypes.h>
+#include <asm/code-patching.h>
 #include <asm/debugfs.h>
 #include <asm/security_features.h>
 #include <asm/setup.h>
@@ -15,6 +17,13 @@
 
 unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
+enum count_cache_flush_type {
+	COUNT_CACHE_FLUSH_NONE	= 0x1,
+	COUNT_CACHE_FLUSH_SW	= 0x2,
+	COUNT_CACHE_FLUSH_HW	= 0x4,
+};
+static enum count_cache_flush_type count_cache_flush_type;
+
 bool barrier_nospec_enabled;
 
 static void enable_barrier_nospec(bool enable)
@@ -147,17 +156,29 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 	bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
 	ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
 
-	if (bcs || ccd) {
+	if (bcs || ccd || count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
+		bool comma = false;
 		seq_buf_printf(&s, "Mitigation: ");
 
-		if (bcs)
+		if (bcs) {
 			seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+			comma = true;
+		}
+
+		if (ccd) {
+			if (comma)
+				seq_buf_printf(&s, ", ");
+			seq_buf_printf(&s, "Indirect branch cache disabled");
+			comma = true;
+		}
 
-		if (bcs && ccd)
+		if (comma)
 			seq_buf_printf(&s, ", ");
 
-		if (ccd)
-			seq_buf_printf(&s, "Indirect branch cache disabled");
+		seq_buf_printf(&s, "Software count cache flush");
+
+		if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
+			seq_buf_printf(&s, "(hardware accelerated)");
 	} else
 		seq_buf_printf(&s, "Vulnerable");
 
@@ -313,3 +334,68 @@ static __init int stf_barrier_debugfs_init(void)
 }
 device_initcall(stf_barrier_debugfs_init);
 #endif /* CONFIG_DEBUG_FS */
+
+static void toggle_count_cache_flush(bool enable)
+{
+	if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+		patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP);
+		count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+		pr_info("count-cache-flush: software flush disabled.\n");
+		return;
+	}
+
+	patch_branch_site(&patch__call_flush_count_cache,
+			  (u64)&flush_count_cache, BRANCH_SET_LINK);
+
+	if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
+		count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
+		pr_info("count-cache-flush: full software flush sequence enabled.\n");
+		return;
+	}
+
+	patch_instruction_site(&patch__flush_count_cache_return, PPC_INST_BLR);
+	count_cache_flush_type = COUNT_CACHE_FLUSH_HW;
+	pr_info("count-cache-flush: hardware assisted flush sequence enabled\n");
+}
+
+void setup_count_cache_flush(void)
+{
+	toggle_count_cache_flush(true);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int count_cache_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	toggle_count_cache_flush(enable);
+
+	return 0;
+}
+
+static int count_cache_flush_get(void *data, u64 *val)
+{
+	if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE)
+		*val = 0;
+	else
+		*val = 1;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, count_cache_flush_set, "%llu\n");
+
+static __init int count_cache_flush_debugfs_init(void)
+{
+	debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root, NULL, &fops_count_cache_flush);
+	return 0;
+}
+device_initcall(count_cache_flush_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
-- 
2.14.1

^ permalink raw reply related

* [PATCH 4/5] powerpc/pseries: Query hypervisor for count cache flush settings
From: Michael Ellerman @ 2018-07-23 15:07 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20180723150756.11108-1-mpe@ellerman.id.au>

Use the existing hypercall to determine the appropriate settings for
the count cache flush, and then call the generic powerpc code to set
it up based on the security feature flags.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/hvcall.h      | 2 ++
 arch/powerpc/platforms/pseries/setup.c | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 662c8347d699..a0b17f9f1ea4 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -342,10 +342,12 @@
 #define H_CPU_CHAR_BRANCH_HINTS_HONORED	(1ull << 58) // IBM bit 5
 #define H_CPU_CHAR_THREAD_RECONFIG_CTRL	(1ull << 57) // IBM bit 6
 #define H_CPU_CHAR_COUNT_CACHE_DISABLED	(1ull << 56) // IBM bit 7
+#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST	(1ull << 54) // IBM bit 9
 
 #define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
 #define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2
+#define H_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58) // IBM bit 5
 
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK	0x18
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 139f0af6c3d9..04805a79cbda 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -484,6 +484,12 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 	if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
 		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
 
+	if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
+		security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+	if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
+		security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
 	/*
 	 * The features below are enabled by default, so we instead look to see
 	 * if firmware has *disabled* them, and clear them if so.
@@ -535,6 +541,7 @@ void pseries_setup_rfi_flush(void)
 
 	setup_rfi_flush(types, enable);
 	setup_barrier_nospec();
+	setup_count_cache_flush();
 }
 
 #ifdef CONFIG_PCI_IOV
-- 
2.14.1

^ permalink raw reply related

* [PATCH 5/5] powerpc/powernv: Query firmware for count cache flush settings
From: Michael Ellerman @ 2018-07-23 15:07 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20180723150756.11108-1-mpe@ellerman.id.au>

Look for fw-features properties to determine the appropriate settings
for the count cache flush, and then call the generic powerpc code to
set it up based on the security feature flags.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/setup.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index f96df0a25d05..0988d050becd 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -78,6 +78,12 @@ static void init_fw_feat_flags(struct device_node *np)
 	if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
 		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
 
+	if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
+		security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+	if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
+		security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
 	/*
 	 * The features below are enabled by default, so we instead look to see
 	 * if firmware has *disabled* them, and clear them if so.
@@ -125,6 +131,7 @@ static void pnv_setup_rfi_flush(void)
 
 	setup_rfi_flush(type, enable);
 	setup_barrier_nospec();
+	setup_count_cache_flush();
 }
 
 static void __init pnv_setup_arch(void)
-- 
2.14.1

^ permalink raw reply related

* Re: Mark ams driver as orphaned in MAINTAINERS
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Michael Hanselmann, linuxppc-dev, linux-kernel; +Cc: Michael Hanselmann
In-Reply-To: <8fe08e4d44c56ee0b70517fc8609ab5a116bc407.1517265227.git.linux-kernel@hansmi.ch>

On Mon, 2018-01-29 at 22:40:09 UTC, Michael Hanselmann wrote:
> I no longer have any hardware with the Apple motion sensor and thus
> relinquish maintainership of the driver.
> 
> Signed-off-by: Michael Hanselmann <linux-kernel@hansmi.ch>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/d69ccc00c497a4d81fca2dca9bda9f

cheers

^ permalink raw reply

* Re: [2/2] powernv/npu: Add a debugfs setting to change ATSD threshold
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Alistair Popple, linuxppc-dev; +Cc: Alistair Popple, mhairgrove, arbab
In-Reply-To: <20180417091129.23069-2-alistair@popple.id.au>

On Tue, 2018-04-17 at 09:11:29 UTC, Alistair Popple wrote:
> The threshold at which it becomes more efficient to coalesce a range of
> ATSDs into a single per-PID ATSD is currently not well understood due to a
> lack of real-world work loads. This patch adds a debugfs parameter allowing
> the threshold to be altered at runtime in order to aid future development
> and refinement of the value.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> Acked-by: Balbir Singh <bsingharora@gmail.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/99c3ce33a00bc40cb218af770ef00c

cheers

^ permalink raw reply

* Re: powerpc/mpic: Cleanup irq vector accounting
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Bharat Bhushan, benh, paulus, robh, geoff, tyreld, linuxppc-dev,
	linux-kernel
  Cc: Bharat Bhushan
In-Reply-To: <1530267872-31244-1-git-send-email-Bharat.Bhushan@nxp.com>

On Fri, 2018-06-29 at 10:24:32 UTC, Bharat Bhushan wrote:
> Available vector space accounts ipis and timer interrupts
> while spurious vector was not accounted. Also later
> mpic_setup_error_int() escape one more vector, seemingly it
> assumes one spurious vector.
> 
> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fca7bf946e31688edce446fdaa571c

cheers

^ permalink raw reply

* Re: cxl: Fix wrong comparison in cxl_adapter_context_get()
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Vaibhav Jain, linuxppc-dev, Frederic Barrat, Andrew Donnellan,
	Dan Carpenter
  Cc: Philippe Bergheaud, Vaibhav Jain, Alastair D'Silva,
	Christophe Lombard, stable
In-Reply-To: <20180704152833.2288-1-vaibhav@linux.ibm.com>

On Wed, 2018-07-04 at 15:28:33 UTC, Vaibhav Jain wrote:
> Function atomic_inc_unless_negative() returns a bool to indicate
> success/failure. However cxl_adapter_context_get() wrongly compares
> the return value against '>=0' which will always be true. The patch
> fixes this comparison to '==0' there by also fixing this compile time
> warning:
> 
> 	drivers/misc/cxl/main.c:290 cxl_adapter_context_get()
> 	warn: 'atomic_inc_unless_negative(&adapter->contexts_num)' is unsigned
> 
> Cc: stable@vger.kernel.org
> Fixes: 70b565bbdb91 ("cxl: Prevent adapter reset if an active context exists")
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ef6cb5f1a048fdf91ccee6d63d2bfa

cheers

^ permalink raw reply

* Re: powerpc/8xx: fix handling of early NULL pointer dereference
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <4302f36bd615c179c92f2007a0f3a1215ba96c55.1531439662.git.christophe.leroy@c-s.fr>

On Fri, 2018-07-13 at 13:10:47 UTC, Christophe Leroy wrote:
> NULL pointers are pointers to user memory space. So user pagetable
> has to be set in order to avoid random behaviour in case of NULL
> pointer dereference, otherwise we may encounter random memory
> access hence Machine Check Exception from TLB Miss handlers.
> 
> Set user pagetable as early as possible in order to properly
> catch early kernel NULL pointer dereference.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/8c8c10b90d88bfaad41ea34df17bf6

cheers

^ permalink raw reply

* Re: [1/3] selftests/powerpc: Remove Power9 copy_unaligned test
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: mikey, chris
In-Reply-To: <20180711071017.5128-1-mpe@ellerman.id.au>

On Wed, 2018-07-11 at 07:10:15 UTC, Michael Ellerman wrote:
> This is a test of the ISA 3.0 "copy" instruction. That instruction has
> an L field, which if set to 1 specifies that "the instruction
> identifies the beginning of a move group" (pp 858). That's also
> referred to as "copy first" vs "copy".
> 
> In ISA 3.0B the copy instruction does not have an L field, and the
> corresponding bit in the instruction must be set to 1.
> 
> This test is generating a "copy" instruction, not a "copy first", and
> so on Power9 (which implements 3.0B), this results in an illegal
> instruction.
> 
> So just drop the test entirely. We still have copy_first_unaligned to
> test the "copy first" behaviour.
> 
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> Acked-by: Michael Neuling <mikey@neuling.org>

Series applied to powerpc next.

https://git.kernel.org/powerpc/c/83039f22ba2f6aff935a2acbb6bf67

cheers

^ permalink raw reply

* Re: chrp/nvram.c: add MODULE_LICENSE()
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Randy Dunlap, linuxppc-dev; +Cc: Paul Mackerras
In-Reply-To: <22eb2f31-3b79-3702-071c-d0fe06a50150@infradead.org>

On Sat, 2018-07-14 at 04:27:48 UTC, Randy Dunlap wrote:
> From: Randy Dunlap <rdunlap@infradead.org>
> 
> Add MODULE_LICENSE() to the chrp nvram.c driver to fix the build
> warning message:
> 
> WARNING: modpost: missing MODULE_LICENSE() in arch/powerpc/platforms/chrp/nvram.o
> 
> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: linuxppc-dev@lists.ozlabs.org

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/a8bf9e504a6a2bb058b4f020d4ccc5

cheers

^ permalink raw reply

* Re: powerpc/hugetlbpage: Rmove unhelpful HUGEPD_*_SHIFT macros
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: David Gibson, aneesh.kumar, paulus
  Cc: linuxppc-dev, linux-kernel, David Gibson
In-Reply-To: <20180717042430.15395-1-david@gibson.dropbear.id.au>

On Tue, 2018-07-17 at 04:24:30 UTC, David Gibson wrote:
> The HUGEPD_*_SHIFT macros are always defined to be PGDIR_SHIFT and
> PUD_SHIFT, and have to have those values to work properly.  They once used
> to have different values, but that was really only because they were used
> to mean different things in different contexts.
> 
> 6fa50483 "powerpc/mm/hugetlb: initialize the pagetable cache correctly for
> hugetlb" removed that double meaning, but left the now useless constants.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fdf743c5c51d5b45db4dada3910954

cheers

^ permalink raw reply

* Re: [v2] powerpc/prom_init: remove linux,stdout-package property
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Murilo Opsfelder Araujo, linux-kernel
  Cc: Kees Cook, Mathieu Malaterre, Aneesh Kumar K . V,
	Murilo Opsfelder Araujo, Nicholas Piggin, Alexey Kardashevskiy,
	Michael Bringmann, Paul Mackerras, Cédric Le Goater,
	Bharata B Rao, Nathan Fontenot, linuxppc-dev
In-Reply-To: <20180718161544.12134-1-muriloo@linux.ibm.com>

On Wed, 2018-07-18 at 16:15:44 UTC, Murilo Opsfelder Araujo wrote:
> This property was added in 2004 and the only use of it, which was already inside
> `#if 0`, was removed a month later.
> 
> Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ec9336396a98f61290f45e8bb94245

cheers

^ permalink raw reply

* Re: powerpc/ps3: Set driver coherent_dma_mask
From: Michael Ellerman @ 2018-07-23 15:11 UTC (permalink / raw)
  To: Geoff Levand, Alan Stern, Takashi Iwai, Jaroslav Kysela
  Cc: linux-usb, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <068ebcfa-7cd0-bd06-42e7-577a4624f0b0@infradead.org>

On Wed, 2018-07-18 at 22:08:33 UTC, Geoff Levand wrote:
> Set the coherent_dma_mask for the PS3 ehci, ohci, and snd devices.
> 
> Silences WARN_ON_ONCE messages emitted by the dma_alloc_attrs() routine.
> 
> Reported-by: Fredrik Noring <noring@nocrew.org>
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/48e91846865cd61bfdb701eb737de2

cheers

^ permalink raw reply

* Re: [PATCH v2 2/2] powerpc/pseries: Wait for completion of hotplug events during PRRN handling
From: John Allen @ 2018-07-23 15:22 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, nfont
In-Reply-To: <87k1pmhxx7.fsf@concordia.ellerman.id.au>

On Mon, Jul 23, 2018 at 11:41:24PM +1000, Michael Ellerman wrote:
>John Allen <jallen@linux.ibm.com> writes:
>
>> While handling PRRN events, the time to handle the actual hotplug events
>> dwarfs the time it takes to perform the device tree updates and queue the
>> hotplug events. In the case that PRRN events are being queued continuously,
>> hotplug events have been observed to be queued faster than the kernel can
>> actually handle them. This patch avoids the problem by waiting for a
>> hotplug request to complete before queueing more hotplug events.
>
>So do we need the hotplug work queue at all? Can we just call
>handle_dlpar_errorlog() directly?
>
>Or are we using the work queue to serialise things? And if so would a
>mutex be better?

Right, the workqueue is meant to serialize all hotplug events and it 
gets used for more than just PRRN events. I believe the motivation for 
using the workqueue over a mutex is that KVM guests initiate hotplug 
events through the hotplug interrupt and can queue fairly large requests 
meaning that in this scenario, waiting for a lock would block interrupts
for a while. Using the workqueue allows us to serialize hotplug events 
from different sources in the same way without worrying about the 
context in which the event is generated.

>
>It looks like prrn_update_node() is called via at least, prrn_work_fn()
>and post_mobility_fixup().
>
>The latter is called from migration_store(), which seems like it would
>be harmless. But also from pseries_suspend_enable_irqs() which I'm less
>clear on.

Yeah, that doesn't seem to make sense based on the function name. Odd 
that prrn_update_node is being called from anywhere outside of handling 
PRRN events. Perhaps if other code paths are using the function, it 
needs a more generic name.

-John

>
>cheers
>
>> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
>> index 8a8033a249c7..49930848fa78 100644
>> --- a/arch/powerpc/platforms/pseries/mobility.c
>> +++ b/arch/powerpc/platforms/pseries/mobility.c
>> @@ -242,6 +242,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
>>  static void prrn_update_node(__be32 phandle)
>>  {
>>  	struct pseries_hp_errorlog *hp_elog;
>> +	struct completion hotplug_done;
>>  	struct device_node *dn;
>>
>>  	/*
>> @@ -263,7 +264,9 @@ static void prrn_update_node(__be32 phandle)
>>  	hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
>>  	hp_elog->_drc_u.drc_index = phandle;
>>
>> -	queue_hotplug_event(hp_elog, NULL, NULL);
>> +	init_completion(&hotplug_done);
>> +	queue_hotplug_event(hp_elog, &hotplug_done, NULL);
>> +	wait_for_completion(&hotplug_done);
>>
>>  	kfree(hp_elog);
>>  }
>> --
>> 2.17.1
>

^ permalink raw reply

* Re: [PATCH v07 2/9] hotplug/cpu: Add operation queuing function
From: John Allen @ 2018-07-23 15:54 UTC (permalink / raw)
  To: Michael Bringmann
  Cc: linuxppc-dev, Nathan Fontenot, Thomas Falcon, Tyrel Datwyler,
	John Allen
In-Reply-To: <a2c023e9-6997-d1a3-0110-fa8c788e64ee@linux.vnet.ibm.com>

On Fri, Jul 13, 2018 at 03:18:01PM -0500, Michael Bringmann wrote:
>migration/dlpar: This patch adds function dlpar_queue_action()
>which will queued up information about a CPU/Memory 'readd'
>operation according to resource type, action code, and DRC index.
>At a subsequent point, the list of operations can be run/played
>in series.  Examples of such oprations include 'readd' of CPU
>and Memory blocks identified as having changed their associativity
>during an LPAR migration event.
>
>Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
>---
>Changes in patch:
>  -- Correct drc_index before adding to pseries_hp_errorlog struct
>  -- Correct text of notice
>  -- Revise queuing model to save up all of the DLPAR actions for
>     later execution.
>  -- Restore list init statement missing from patch
>  -- Move call to apply queued operations into 'mobility.c'
>  -- Compress some code
>  -- Rename some of queueing function APIs
>  -- Revise implementation to push execution of queued operations
>     to a workqueue task.
>  -- Cleanup reference to outdated queuing operation.
>---
> arch/powerpc/include/asm/rtas.h           |    2 +
> arch/powerpc/platforms/pseries/dlpar.c    |   61 +++++++++++++++++++++++++++++
> arch/powerpc/platforms/pseries/mobility.c |    4 ++
> arch/powerpc/platforms/pseries/pseries.h  |    2 +
> 4 files changed, 69 insertions(+)
>
>diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
>index 71e393c..4f601c7 100644
>--- a/arch/powerpc/include/asm/rtas.h
>+++ b/arch/powerpc/include/asm/rtas.h
>@@ -310,12 +310,14 @@ struct pseries_hp_errorlog {
> 		struct { __be32 count, index; } ic;
> 		char	drc_name[1];
> 	} _drc_u;
>+	struct list_head list;
> };
>
> #define PSERIES_HP_ELOG_RESOURCE_CPU	1
> #define PSERIES_HP_ELOG_RESOURCE_MEM	2
> #define PSERIES_HP_ELOG_RESOURCE_SLOT	3
> #define PSERIES_HP_ELOG_RESOURCE_PHB	4
>+#define PSERIES_HP_ELOG_RESOURCE_PMT	5
>
> #define PSERIES_HP_ELOG_ACTION_ADD	1
> #define PSERIES_HP_ELOG_ACTION_REMOVE	2
>diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
>index a0b20c0..7264b8e 100644
>--- a/arch/powerpc/platforms/pseries/dlpar.c
>+++ b/arch/powerpc/platforms/pseries/dlpar.c
>@@ -25,6 +25,7 @@
> #include <asm/prom.h>
> #include <asm/machdep.h>
> #include <linux/uaccess.h>
>+#include <linux/delay.h>
> #include <asm/rtas.h>
>
> static struct workqueue_struct *pseries_hp_wq;
>@@ -329,6 +330,8 @@ int dlpar_release_drc(u32 drc_index)
> 	return 0;
> }
>
>+static int dlpar_pmt(struct pseries_hp_errorlog *work);
>+
> static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
> {
> 	int rc;
>@@ -357,6 +360,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
> 	case PSERIES_HP_ELOG_RESOURCE_CPU:
> 		rc = dlpar_cpu(hp_elog);
> 		break;
>+	case PSERIES_HP_ELOG_RESOURCE_PMT:
>+		rc = dlpar_pmt(hp_elog);
>+		break;
> 	default:
> 		pr_warn_ratelimited("Invalid resource (%d) specified\n",
> 				    hp_elog->resource);
>@@ -407,6 +413,61 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
> 	}
> }
>
>+LIST_HEAD(dlpar_delayed_list);
>+
>+int dlpar_queue_action(int resource, int action, u32 drc_index)
>+{
>+	struct pseries_hp_errorlog *hp_errlog;
>+
>+	hp_errlog = kmalloc(sizeof(struct pseries_hp_errorlog), GFP_KERNEL);
>+	if (!hp_errlog)
>+		return -ENOMEM;
>+
>+	hp_errlog->resource = resource;
>+	hp_errlog->action = action;
>+	hp_errlog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
>+	hp_errlog->_drc_u.drc_index = cpu_to_be32(drc_index);
>+
>+	list_add_tail(&hp_errlog->list, &dlpar_delayed_list);
>+
>+	return 0;
>+}
>+
>+static int dlpar_pmt(struct pseries_hp_errorlog *work)
>+{
>+	struct list_head *pos, *q;
>+
>+	ssleep(15);

Why do we need to sleep for so long here?

-John

>+
>+	list_for_each_safe(pos, q, &dlpar_delayed_list) {
>+		struct pseries_hp_errorlog *tmp;
>+
>+		tmp = list_entry(pos, struct pseries_hp_errorlog, list);
>+		handle_dlpar_errorlog(tmp);
>+
>+		list_del(pos);
>+		kfree(tmp);
>+
>+		ssleep(10);
>+	}
>+
>+	return 0;
>+}
>+
>+int dlpar_queued_actions_run(void)
>+{
>+	if (!list_empty(&dlpar_delayed_list)) {
>+		struct pseries_hp_errorlog hp_errlog;
>+
>+		hp_errlog.resource = PSERIES_HP_ELOG_RESOURCE_PMT;
>+		hp_errlog.action = 0;
>+		hp_errlog.id_type = 0;
>+
>+		queue_hotplug_event(&hp_errlog, 0, 0);
>+	}
>+	return 0;
>+}
>+
> static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
> {
> 	char *arg;
>diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
>index f6364d9..d0d1cae 100644
>--- a/arch/powerpc/platforms/pseries/mobility.c
>+++ b/arch/powerpc/platforms/pseries/mobility.c
>@@ -378,6 +378,10 @@ static ssize_t migration_store(struct class *class,
> 		return rc;
>
> 	post_mobility_fixup();
>+
>+	/* Apply any necessary changes identified during fixup */
>+	dlpar_queued_actions_run();
>+
> 	return count;
> }
>
>diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
>index 60db2ee..72ca996 100644
>--- a/arch/powerpc/platforms/pseries/pseries.h
>+++ b/arch/powerpc/platforms/pseries/pseries.h
>@@ -61,6 +61,8 @@ extern struct device_node *dlpar_configure_connector(__be32,
>
> void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
> 			 struct completion *hotplug_done, int *rc);
>+int dlpar_queue_action(int resource, int action, u32 drc_index);
>+int dlpar_queued_actions_run(void);
> #ifdef CONFIG_MEMORY_HOTPLUG
> int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
> #else
>

^ permalink raw reply

* [PATCH] net: ethernet: fs-enet: Use generic CRC32 implementation
From: Krzysztof Kozlowski @ 2018-07-23 16:20 UTC (permalink / raw)
  To: Pantelis Antoniou, David S. Miller, linuxppc-dev, netdev,
	linux-kernel
  Cc: Eric Biggers, Krzysztof Kozlowski

Use generic kernel CRC32 implementation because it:
1. Should be faster (uses lookup tables),
2. Removes duplicated CRC generation code,
3. Uses well-proven algorithm instead of coding it one more time.

Suggested-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>

---

Not tested on hardware.
---
 drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
index 1fc27c97e3b2..99fe2c210d0f 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
@@ -18,6 +18,7 @@
 #include <linux/string.h>
 #include <linux/ptrace.h>
 #include <linux/errno.h>
+#include <linux/crc32.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -176,21 +177,10 @@ static void set_multicast_start(struct net_device *dev)
 static void set_multicast_one(struct net_device *dev, const u8 *mac)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
-	int temp, hash_index, i, j;
+	int temp, hash_index;
 	u32 crc, csrVal;
-	u8 byte, msb;
-
-	crc = 0xffffffff;
-	for (i = 0; i < 6; i++) {
-		byte = mac[i];
-		for (j = 0; j < 8; j++) {
-			msb = crc >> 31;
-			crc <<= 1;
-			if (msb ^ (byte & 0x1))
-				crc ^= FEC_CRC_POLY;
-			byte >>= 1;
-		}
-	}
+
+	crc = ether_crc(6, mac);
 
 	temp = (crc & 0x3f) >> 1;
 	hash_index = ((temp & 0x01) << 4) |
-- 
2.14.1

^ permalink raw reply related

* Re: [PATCH v4 00/11] hugetlb: Factorize hugetlb architecture primitives
From: Alex Ghiti @ 2018-07-23 17:41 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linux, catalin.marinas, will.deacon, tony.luck, fenghua.yu, ralf,
	paul.burton, jhogan, jejb, deller, benh, paulus, ysato, dalias,
	davem, tglx, mingo, hpa, x86, arnd, linux-arm-kernel,
	linux-kernel, linux-ia64, linux-mips, linux-parisc, linuxppc-dev,
	linux-sh, sparclinux, linux-arch, Naoya Horiguchi, Mike Kravetz,
	Michal Hocko
In-Reply-To: <87d0vehx16.fsf@concordia.ellerman.id.au>

Ok will do and report when done.

Thanks for your feedback,

Alex

On 07/23/2018 02:00 PM, Michael Ellerman wrote:
> Alex Ghiti <alex@ghiti.fr> writes:
>
>> Does anyone have any suggestion about those patches ?
> Cross compiling it for some non-x86 arches would be a good start :)
>
> There are cross compilers available here:
>
>    https://mirrors.edge.kernel.org/pub/tools/crosstool/
>
>
> cheers
>
>> On 07/09/2018 02:16 PM, Michal Hocko wrote:
>>> [CC hugetlb guys - http://lkml.kernel.org/r/20180705110716.3919-1-alex@ghiti.fr]
>>>
>>> On Thu 05-07-18 11:07:05, Alexandre Ghiti wrote:
>>>> In order to reduce copy/paste of functions across architectures and then
>>>> make riscv hugetlb port (and future ports) simpler and smaller, this
>>>> patchset intends to factorize the numerous hugetlb primitives that are
>>>> defined across all the architectures.
>>>>
>>>> Except for prepare_hugepage_range, this patchset moves the versions that
>>>> are just pass-through to standard pte primitives into
>>>> asm-generic/hugetlb.h by using the same #ifdef semantic that can be
>>>> found in asm-generic/pgtable.h, i.e. __HAVE_ARCH_***.
>>>>
>>>> s390 architecture has not been tackled in this serie since it does not
>>>> use asm-generic/hugetlb.h at all.
>>>> powerpc could be factorized a bit more (cf huge_ptep_set_wrprotect).
>>>>
>>>> This patchset has been compiled on x86 only.
>>>>
>>>> Changelog:
>>>>
>>>> v4:
>>>>     Fix powerpc build error due to misplacing of #include
>>>>     <asm-generic/hugetlb.h> outside of #ifdef CONFIG_HUGETLB_PAGE, as
>>>>     pointed by Christophe Leroy.
>>>>
>>>> v1, v2, v3:
>>>>     Same version, just problems with email provider and misuse of
>>>>     --batch-size option of git send-email
>>>>
>>>> Alexandre Ghiti (11):
>>>>     hugetlb: Harmonize hugetlb.h arch specific defines with pgtable.h
>>>>     hugetlb: Introduce generic version of hugetlb_free_pgd_range
>>>>     hugetlb: Introduce generic version of set_huge_pte_at
>>>>     hugetlb: Introduce generic version of huge_ptep_get_and_clear
>>>>     hugetlb: Introduce generic version of huge_ptep_clear_flush
>>>>     hugetlb: Introduce generic version of huge_pte_none
>>>>     hugetlb: Introduce generic version of huge_pte_wrprotect
>>>>     hugetlb: Introduce generic version of prepare_hugepage_range
>>>>     hugetlb: Introduce generic version of huge_ptep_set_wrprotect
>>>>     hugetlb: Introduce generic version of huge_ptep_set_access_flags
>>>>     hugetlb: Introduce generic version of huge_ptep_get
>>>>
>>>>    arch/arm/include/asm/hugetlb-3level.h        | 32 +---------
>>>>    arch/arm/include/asm/hugetlb.h               | 33 +----------
>>>>    arch/arm64/include/asm/hugetlb.h             | 39 +++---------
>>>>    arch/ia64/include/asm/hugetlb.h              | 47 ++-------------
>>>>    arch/mips/include/asm/hugetlb.h              | 40 +++----------
>>>>    arch/parisc/include/asm/hugetlb.h            | 33 +++--------
>>>>    arch/powerpc/include/asm/book3s/32/pgtable.h |  2 +
>>>>    arch/powerpc/include/asm/book3s/64/pgtable.h |  1 +
>>>>    arch/powerpc/include/asm/hugetlb.h           | 43 ++------------
>>>>    arch/powerpc/include/asm/nohash/32/pgtable.h |  2 +
>>>>    arch/powerpc/include/asm/nohash/64/pgtable.h |  1 +
>>>>    arch/sh/include/asm/hugetlb.h                | 54 ++---------------
>>>>    arch/sparc/include/asm/hugetlb.h             | 40 +++----------
>>>>    arch/x86/include/asm/hugetlb.h               | 72 +----------------------
>>>>    include/asm-generic/hugetlb.h                | 88 +++++++++++++++++++++++++++-
>>>>    15 files changed, 143 insertions(+), 384 deletions(-)
>>>>
>>>> -- 
>>>> 2.16.2

^ permalink raw reply

* Re: [PATCH v07 1/9] hotplug/cpu: Conditionally acquire/release DRC index
From: Nathan Fontenot @ 2018-07-23 17:42 UTC (permalink / raw)
  To: Michael Bringmann, linuxppc-dev; +Cc: John Allen, Tyrel Datwyler, Thomas Falcon
In-Reply-To: <a7e202c1-177f-db1b-912f-8773a551feec@linux.vnet.ibm.com>

On 07/13/2018 03:17 PM, Michael Bringmann wrote:
> powerpc/cpu: Modify dlpar_cpu_add and dlpar_cpu_remove to allow the
> skipping of DRC index acquire or release operations during the CPU
> add or remove operations.  This is intended to support subsequent
> changes to provide a 'CPU readd' operation.
> 
> Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
> ---
> Changes in patch:
>    -- Move new validity check added to pseries_smp_notifier
>       to another patch
>    -- Revise one of checks for 'acquire_drc' in dlpar_cpu_add.
>    -- Revise one of checks for 'release_drc' in dlpar_cpu_remove.
> ---
>   arch/powerpc/platforms/pseries/hotplug-cpu.c |   71 +++++++++++++++-----------
>   1 file changed, 40 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 6ef77ca..7ede3b0 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -432,7 +432,7 @@ static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
>   	return found;
>   }
> 
> -static ssize_t dlpar_cpu_add(u32 drc_index)
> +static ssize_t dlpar_cpu_add(u32 drc_index, bool acquire_drc)
>   {
>   	struct device_node *dn, *parent;
>   	int rc, saved_rc;
> @@ -457,19 +457,22 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   		return -EINVAL;
>   	}
> 
> -	rc = dlpar_acquire_drc(drc_index);
> -	if (rc) {
> -		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
> -			rc, drc_index);
> -		of_node_put(parent);
> -		return -EINVAL;
> +	if (acquire_drc) {
> +		rc = dlpar_acquire_drc(drc_index);
> +		if (rc) {
> +			pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
> +				rc, drc_index);
> +			of_node_put(parent);
> +			return -EINVAL;
> +		}
>   	}
> 
>   	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
>   	if (!dn) {
>   		pr_warn("Failed call to configure-connector, drc index: %x\n",
>   			drc_index);
> -		dlpar_release_drc(drc_index);
> +		if (acquire_drc)
> +			dlpar_release_drc(drc_index);
>   		of_node_put(parent);
>   		return -EINVAL;
>   	}
> @@ -484,9 +487,11 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
>   			dn->name, rc, drc_index);
> 
> -		rc = dlpar_release_drc(drc_index);
> -		if (!rc)
> -			dlpar_free_cc_nodes(dn);
> +		if (acquire_drc) {
> +			rc = dlpar_release_drc(drc_index);
> +			if (!rc)
> +				dlpar_free_cc_nodes(dn);
> +		}
> 
>   		return saved_rc;
>   	}
> @@ -498,7 +503,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   			dn->name, rc, drc_index);
> 
>   		rc = dlpar_detach_node(dn);
> -		if (!rc)
> +		if (!rc && acquire_drc)
>   			dlpar_release_drc(drc_index);
> 
>   		return saved_rc;
> @@ -566,7 +571,8 @@ static int dlpar_offline_cpu(struct device_node *dn)
> 
>   }
> 
> -static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
> +static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index,
> +				bool release_drc)
>   {
>   	int rc;
> 
> @@ -579,12 +585,14 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
>   		return -EINVAL;
>   	}
> 
> -	rc = dlpar_release_drc(drc_index);
> -	if (rc) {
> -		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
> -			drc_index, dn->name, rc);
> -		dlpar_online_cpu(dn);
> -		return rc;
> +	if (release_drc) {
> +		rc = dlpar_release_drc(drc_index);
> +		if (rc) {
> +			pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
> +				drc_index, dn->name, rc);
> +			dlpar_online_cpu(dn);
> +			return rc;
> +		}
>   	}
> 
>   	rc = dlpar_detach_node(dn);
> @@ -593,8 +601,9 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
> 
>   		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
> 
> -		rc = dlpar_acquire_drc(drc_index);
> -		if (!rc)
> +		if (release_drc)
> +			rc = dlpar_acquire_drc(drc_index);
> +		if (!release_drc || !rc)
>   			dlpar_online_cpu(dn);

This is likely wrong. At this point you're in a if (rc) so rc is already
non-zero. If release_drc is false this checks an invalid rc state.

-Nathan

> 
>   		return saved_rc;
> @@ -622,7 +631,7 @@ static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
>   	return dn;
>   }
> 
> -static int dlpar_cpu_remove_by_index(u32 drc_index)
> +static int dlpar_cpu_remove_by_index(u32 drc_index, bool release_drc)
>   {
>   	struct device_node *dn;
>   	int rc;
> @@ -634,7 +643,7 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
>   		return -ENODEV;
>   	}
> 
> -	rc = dlpar_cpu_remove(dn, drc_index);
> +	rc = dlpar_cpu_remove(dn, drc_index, release_drc);
>   	of_node_put(dn);
>   	return rc;
>   }
> @@ -699,7 +708,7 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
>   	}
> 
>   	for (i = 0; i < cpus_to_remove; i++) {
> -		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
> +		rc = dlpar_cpu_remove_by_index(cpu_drcs[i], true);
>   		if (rc)
>   			break;
> 
> @@ -710,7 +719,7 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
>   		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");
> 
>   		for (i = 0; i < cpus_removed; i++)
> -			dlpar_cpu_add(cpu_drcs[i]);
> +			dlpar_cpu_add(cpu_drcs[i], true);
> 
>   		rc = -EINVAL;
>   	} else {
> @@ -780,7 +789,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
>   	}
> 
>   	for (i = 0; i < cpus_to_add; i++) {
> -		rc = dlpar_cpu_add(cpu_drcs[i]);
> +		rc = dlpar_cpu_add(cpu_drcs[i], true);
>   		if (rc)
>   			break;
> 
> @@ -791,7 +800,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
>   		pr_warn("CPU hot-add failed, removing any added CPUs\n");
> 
>   		for (i = 0; i < cpus_added; i++)
> -			dlpar_cpu_remove_by_index(cpu_drcs[i]);
> +			dlpar_cpu_remove_by_index(cpu_drcs[i], true);
> 
>   		rc = -EINVAL;
>   	} else {
> @@ -817,7 +826,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
>   		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
>   			rc = dlpar_cpu_remove_by_count(count);
>   		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
> -			rc = dlpar_cpu_remove_by_index(drc_index);
> +			rc = dlpar_cpu_remove_by_index(drc_index, true);
>   		else
>   			rc = -EINVAL;
>   		break;
> @@ -825,7 +834,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
>   		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
>   			rc = dlpar_cpu_add_by_count(count);
>   		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
> -			rc = dlpar_cpu_add(drc_index);
> +			rc = dlpar_cpu_add(drc_index, true);
>   		else
>   			rc = -EINVAL;
>   		break;
> @@ -850,7 +859,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
>   	if (rc)
>   		return -EINVAL;
> 
> -	rc = dlpar_cpu_add(drc_index);
> +	rc = dlpar_cpu_add(drc_index, true);
> 
>   	return rc ? rc : count;
>   }
> @@ -871,7 +880,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
>   		return -EINVAL;
>   	}
> 
> -	rc = dlpar_cpu_remove(dn, drc_index);
> +	rc = dlpar_cpu_remove(dn, drc_index, true);
>   	of_node_put(dn);
> 
>   	return rc ? rc : count;
> 

^ permalink raw reply

* Re: [PATCH v07 2/9] hotplug/cpu: Add operation queuing function
From: Nathan Fontenot @ 2018-07-23 17:51 UTC (permalink / raw)
  To: Michael Bringmann, linuxppc-dev; +Cc: John Allen, Tyrel Datwyler, Thomas Falcon
In-Reply-To: <a2c023e9-6997-d1a3-0110-fa8c788e64ee@linux.vnet.ibm.com>

On 07/13/2018 03:18 PM, Michael Bringmann wrote:
> migration/dlpar: This patch adds function dlpar_queue_action()
> which will queued up information about a CPU/Memory 'readd'
> operation according to resource type, action code, and DRC index.
> At a subsequent point, the list of operations can be run/played
> in series.  Examples of such oprations include 'readd' of CPU
> and Memory blocks identified as having changed their associativity
> during an LPAR migration event. >
> Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
> ---
> Changes in patch:
>    -- Correct drc_index before adding to pseries_hp_errorlog struct
>    -- Correct text of notice
>    -- Revise queuing model to save up all of the DLPAR actions for
>       later execution.
>    -- Restore list init statement missing from patch
>    -- Move call to apply queued operations into 'mobility.c'
>    -- Compress some code
>    -- Rename some of queueing function APIs
>    -- Revise implementation to push execution of queued operations
>       to a workqueue task.
>    -- Cleanup reference to outdated queuing operation.
> ---
>   arch/powerpc/include/asm/rtas.h           |    2 +
>   arch/powerpc/platforms/pseries/dlpar.c    |   61 +++++++++++++++++++++++++++++
>   arch/powerpc/platforms/pseries/mobility.c |    4 ++
>   arch/powerpc/platforms/pseries/pseries.h  |    2 +
>   4 files changed, 69 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> index 71e393c..4f601c7 100644
> --- a/arch/powerpc/include/asm/rtas.h
> +++ b/arch/powerpc/include/asm/rtas.h
> @@ -310,12 +310,14 @@ struct pseries_hp_errorlog {
>   		struct { __be32 count, index; } ic;
>   		char	drc_name[1];
>   	} _drc_u;
> +	struct list_head list;
>   };
> 
>   #define PSERIES_HP_ELOG_RESOURCE_CPU	1
>   #define PSERIES_HP_ELOG_RESOURCE_MEM	2
>   #define PSERIES_HP_ELOG_RESOURCE_SLOT	3
>   #define PSERIES_HP_ELOG_RESOURCE_PHB	4
> +#define PSERIES_HP_ELOG_RESOURCE_PMT	5
> 
>   #define PSERIES_HP_ELOG_ACTION_ADD	1
>   #define PSERIES_HP_ELOG_ACTION_REMOVE	2
> diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
> index a0b20c0..7264b8e 100644
> --- a/arch/powerpc/platforms/pseries/dlpar.c
> +++ b/arch/powerpc/platforms/pseries/dlpar.c
> @@ -25,6 +25,7 @@
>   #include <asm/prom.h>
>   #include <asm/machdep.h>
>   #include <linux/uaccess.h>
> +#include <linux/delay.h>
>   #include <asm/rtas.h>
> 
>   static struct workqueue_struct *pseries_hp_wq;
> @@ -329,6 +330,8 @@ int dlpar_release_drc(u32 drc_index)
>   	return 0;
>   }
> 
> +static int dlpar_pmt(struct pseries_hp_errorlog *work);
> +
>   static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
>   {
>   	int rc;
> @@ -357,6 +360,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
>   	case PSERIES_HP_ELOG_RESOURCE_CPU:
>   		rc = dlpar_cpu(hp_elog);
>   		break;
> +	case PSERIES_HP_ELOG_RESOURCE_PMT:
> +		rc = dlpar_pmt(hp_elog);
> +		break;
>   	default:
>   		pr_warn_ratelimited("Invalid resource (%d) specified\n",
>   				    hp_elog->resource);
> @@ -407,6 +413,61 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
>   	}
>   }
> 
> +LIST_HEAD(dlpar_delayed_list);
> +
> +int dlpar_queue_action(int resource, int action, u32 drc_index)
> +{
> +	struct pseries_hp_errorlog *hp_errlog;
> +
> +	hp_errlog = kmalloc(sizeof(struct pseries_hp_errorlog), GFP_KERNEL);
> +	if (!hp_errlog)
> +		return -ENOMEM;
> +
> +	hp_errlog->resource = resource;
> +	hp_errlog->action = action;
> +	hp_errlog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
> +	hp_errlog->_drc_u.drc_index = cpu_to_be32(drc_index);
> +
> +	list_add_tail(&hp_errlog->list, &dlpar_delayed_list);
> +
> +	return 0;
> +}
> +
> +static int dlpar_pmt(struct pseries_hp_errorlog *work)
> +{
> +	struct list_head *pos, *q;
> +
> +	ssleep(15);
> +
> +	list_for_each_safe(pos, q, &dlpar_delayed_list) {
> +		struct pseries_hp_errorlog *tmp;
> +
> +		tmp = list_entry(pos, struct pseries_hp_errorlog, list);
> +		handle_dlpar_errorlog(tmp);
> +
> +		list_del(pos);
> +		kfree(tmp);
> +
> +		ssleep(10);
> +	}
> +
> +	return 0;
> +}
> +
> +int dlpar_queued_actions_run(void)
> +{
> +	if (!list_empty(&dlpar_delayed_list)) {
> +		struct pseries_hp_errorlog hp_errlog;
> +
> +		hp_errlog.resource = PSERIES_HP_ELOG_RESOURCE_PMT;
> +		hp_errlog.action = 0;
> +		hp_errlog.id_type = 0;
> +
> +		queue_hotplug_event(&hp_errlog, 0, 0); > +	}
> +	return 0;
> +}

I'm a bit confused by this. Is there a reason this needs to queue a
hotplug event instead of just walking the list as is done in dlpar_pmt?

-Nathan

> +
>   static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
>   {
>   	char *arg;
> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
> index f6364d9..d0d1cae 100644
> --- a/arch/powerpc/platforms/pseries/mobility.c
> +++ b/arch/powerpc/platforms/pseries/mobility.c
> @@ -378,6 +378,10 @@ static ssize_t migration_store(struct class *class,
>   		return rc;
> 
>   	post_mobility_fixup();
> +
> +	/* Apply any necessary changes identified during fixup */
> +	dlpar_queued_actions_run();
> +
>   	return count;
>   }
> 
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index 60db2ee..72ca996 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -61,6 +61,8 @@ extern struct device_node *dlpar_configure_connector(__be32,
> 
>   void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
>   			 struct completion *hotplug_done, int *rc);
> +int dlpar_queue_action(int resource, int action, u32 drc_index);
> +int dlpar_queued_actions_run(void);
>   #ifdef CONFIG_MEMORY_HOTPLUG
>   int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
>   #else
> 

^ permalink raw reply

* Re: [PATCH net-next] wan/fsl_ucc_hdlc: use IS_ERR_VALUE() to check return value of qe_muram_alloc
From: David Miller @ 2018-07-23 18:07 UTC (permalink / raw)
  To: yuehaibing; +Cc: qiang.zhao, linux-kernel, netdev, linuxppc-dev
In-Reply-To: <20180723141233.19948-1-yuehaibing@huawei.com>

From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 23 Jul 2018 22:12:33 +0800

> qe_muram_alloc return a unsigned long integer,which should not
> compared with zero. check it using IS_ERR_VALUE() to fix this.
> 
> Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC")
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>

Applied, thanks.

^ permalink raw reply

* [PATCH kernel for v4.14 and v4.17 stable] KVM: PPC: Check if IOMMU page is contained in the pinned physical page
From: Alexey Kardashevskiy @ 2018-07-24  5:32 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Alexey Kardashevskiy, Michael Ellerman, stable, gregkh, #, v4.12+

A VM which has:
 - a DMA capable device passed through to it (eg. network card);
 - running a malicious kernel that ignores H_PUT_TCE failure;
 - capability of using IOMMU pages bigger that physical pages
can create an IOMMU mapping that exposes (for example) 16MB of
the host physical memory to the device when only 64K was allocated to the VM.

The remaining 16MB - 64K will be some other content of host memory, possibly
including pages of the VM, but also pages of host kernel memory, host
programs or other VMs.

The attacking VM does not control the location of the page it can map,
and is only allowed to map as many pages as it has pages of RAM.

We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that
an IOMMU page is contained in the physical page so the PCI hardware won't
get access to unassigned host memory; however this check is missing in
the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and
did not hit this yet as the very first time when the mapping happens
we do not have tbl::it_userspace allocated yet and fall back to
the userspace which in turn calls VFIO IOMMU driver, this fails and
the guest does not retry,

This stores the smallest preregistered page size in the preregistered
region descriptor and changes the mm_iommu_xxx API to check this against
the IOMMU page size.

This calculates maximum page size as a minimum of the natural region
alignment and compound page size. For the page shift this uses the shift
returned by find_linux_pte() which indicates how the page is mapped to
the current userspace - if the page is huge and this is not a zero, then
it is a leaf pte and the page is mapped within the range.

Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit 76fa4975f3ed12d15762bc979ca44078598ed8ee)
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---

The original patch did not apply because of fad953ce which fixed
all vmalloc's to use array_size() so the backport is pretty trivial
and applies to v4.17 stable as well.


---
 arch/powerpc/include/asm/mmu_context.h |  4 ++--
 arch/powerpc/kvm/book3s_64_vio.c       |  2 +-
 arch/powerpc/kvm/book3s_64_vio_hv.c    |  6 ++++--
 arch/powerpc/mm/mmu_context_iommu.c    | 37 ++++++++++++++++++++++++++++++++--
 drivers/vfio/vfio_iommu_spapr_tce.c    |  2 +-
 5 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 44fdf47..6f67ff5 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
 extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
 		unsigned long ua, unsigned long entries);
 extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
-		unsigned long ua, unsigned long *hpa);
+		unsigned long ua, unsigned int pageshift, unsigned long *hpa);
 extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
-		unsigned long ua, unsigned long *hpa);
+		unsigned long ua, unsigned int pageshift, unsigned long *hpa);
 extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
 extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
 #endif
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 4dffa61..e14cec6 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -433,7 +433,7 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
 		/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
 		return H_TOO_HARD;
 
-	if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa)))
+	if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
 		return H_HARDWARE;
 
 	if (mm_iommu_mapped_inc(mem))
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index c32e9bfe..648cf6c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -262,7 +262,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
 	if (!mem)
 		return H_TOO_HARD;
 
-	if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
+	if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
+			&hpa)))
 		return H_HARDWARE;
 
 	pua = (void *) vmalloc_to_phys(pua);
@@ -431,7 +432,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 
 		mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
 		if (mem)
-			prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0;
+			prereg = mm_iommu_ua_to_hpa_rm(mem, ua,
+					IOMMU_PAGE_SHIFT_4K, &tces) == 0;
 	}
 
 	if (!prereg) {
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index e0a2d8e..8160559 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -19,6 +19,7 @@
 #include <linux/hugetlb.h>
 #include <linux/swap.h>
 #include <asm/mmu_context.h>
+#include <asm/pte-walk.h>
 
 static DEFINE_MUTEX(mem_list_mutex);
 
@@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t {
 	struct rcu_head rcu;
 	unsigned long used;
 	atomic64_t mapped;
+	unsigned int pageshift;
 	u64 ua;			/* userspace address */
 	u64 entries;		/* number of entries in hpas[] */
 	u64 *hpas;		/* vmalloc'ed */
@@ -126,6 +128,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
 {
 	struct mm_iommu_table_group_mem_t *mem;
 	long i, j, ret = 0, locked_entries = 0;
+	unsigned int pageshift;
+	unsigned long flags;
 	struct page *page = NULL;
 
 	mutex_lock(&mem_list_mutex);
@@ -160,6 +164,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
 		goto unlock_exit;
 	}
 
+	/*
+	 * For a starting point for a maximum page size calculation
+	 * we use @ua and @entries natural alignment to allow IOMMU pages
+	 * smaller than huge pages but still bigger than PAGE_SIZE.
+	 */
+	mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
 	mem->hpas = vzalloc(entries * sizeof(mem->hpas[0]));
 	if (!mem->hpas) {
 		kfree(mem);
@@ -200,6 +210,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
 			}
 		}
 populate:
+		pageshift = PAGE_SHIFT;
+		if (PageCompound(page)) {
+			pte_t *pte;
+			struct page *head = compound_head(page);
+			unsigned int compshift = compound_order(head);
+
+			local_irq_save(flags); /* disables as well */
+			pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift);
+			local_irq_restore(flags);
+
+			/* Double check it is still the same pinned page */
+			if (pte && pte_page(*pte) == head &&
+					pageshift == compshift)
+				pageshift = max_t(unsigned int, pageshift,
+						PAGE_SHIFT);
+		}
+		mem->pageshift = min(mem->pageshift, pageshift);
 		mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
 	}
 
@@ -350,7 +377,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
 EXPORT_SYMBOL_GPL(mm_iommu_find);
 
 long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
-		unsigned long ua, unsigned long *hpa)
+		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
 {
 	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
 	u64 *va = &mem->hpas[entry];
@@ -358,6 +385,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
 	if (entry >= mem->entries)
 		return -EFAULT;
 
+	if (pageshift > mem->pageshift)
+		return -EFAULT;
+
 	*hpa = *va | (ua & ~PAGE_MASK);
 
 	return 0;
@@ -365,7 +395,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
 
 long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
-		unsigned long ua, unsigned long *hpa)
+		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
 {
 	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
 	void *va = &mem->hpas[entry];
@@ -374,6 +404,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
 	if (entry >= mem->entries)
 		return -EFAULT;
 
+	if (pageshift > mem->pageshift)
+		return -EFAULT;
+
 	pa = (void *) vmalloc_to_phys(va);
 	if (!pa)
 		return -EFAULT;
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index b751dd6..b4c68f3 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -467,7 +467,7 @@ static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
 	if (!mem)
 		return -EINVAL;
 
-	ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
+	ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
 	if (ret)
 		return -EINVAL;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH] selftests/powerpc: Add a test of wild bctr
From: Michael Ellerman @ 2018-07-24  5:53 UTC (permalink / raw)
  To: linuxppc-dev

This tests that a bctr (Branch to counter and link), ie. a function
call, to a wildly out-of-bounds address is handled correctly.

Some old kernel versions didn't handle it correctly, see eg:

  "powerpc/slb: Force a full SLB flush when we insert for a bad EA"
  https://lists.ozlabs.org/pipermail/linuxppc-dev/2017-April/157397.html

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/include/reg.h  |   1 +
 tools/testing/selftests/powerpc/mm/.gitignore  |   3 +-
 tools/testing/selftests/powerpc/mm/Makefile    |   2 +-
 tools/testing/selftests/powerpc/mm/wild_bctr.c | 153 +++++++++++++++++++++++++
 4 files changed, 157 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/mm/wild_bctr.c

diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 7f348c059bc2..52b4710469d2 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -17,6 +17,7 @@
 				    : "memory")
 
 #define mb()		asm volatile("sync" : : : "memory");
+#define barrier()	asm volatile("" : : : "memory");
 
 #define SPRN_MMCR2     769
 #define SPRN_MMCRA     770
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 7d7c42ed6de9..ba919308fe30 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,4 +2,5 @@ hugetlb_vs_thp_test
 subpage_prot
 tempfile
 prot_sao
-segv_errors
\ No newline at end of file
+segv_errors
+wild_bctr
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 8ebbe96d80a8..893ad41460af 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
 TEST_GEN_FILES := tempfile
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c
new file mode 100644
index 000000000000..f8a2901dc44e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corp.
+ *
+ * Test that an out-of-bounds branch to counter behaves as expected.
+ */
+
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#define BAD_NIP	0x788c545a18000000ull
+
+static struct pt_regs signal_regs;
+static jmp_buf setjmp_env;
+
+static void save_regs(ucontext_t *ctxt)
+{
+	struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+	memcpy(&signal_regs, regs, sizeof(signal_regs));
+}
+
+static void segv_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+	save_regs(ctxt_v);
+	longjmp(setjmp_env, 1);
+}
+
+static void usr2_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+	save_regs(ctxt_v);
+}
+
+static int ok(void)
+{
+	printf("Everything is OK in here.\n");
+	return 0;
+}
+
+#define REG_POISON	0x5a5aUL
+#define POISONED_REG(n)	((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n))
+
+static inline void poison_regs(void)
+{
+	#define POISON_REG(n)	\
+	  "lis  " __stringify(n) "," __stringify(REG_POISON) ";" \
+	  "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" \
+	  "sldi " __stringify(n) "," __stringify(n) ", 32 ;" \
+	  "oris " __stringify(n) "," __stringify(n) "," __stringify(REG_POISON) ";" \
+	  "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";"
+
+	asm (POISON_REG(15)
+	     POISON_REG(16)
+	     POISON_REG(17)
+	     POISON_REG(18)
+	     POISON_REG(19)
+	     POISON_REG(20)
+	     POISON_REG(21)
+	     POISON_REG(22)
+	     POISON_REG(23)
+	     POISON_REG(24)
+	     POISON_REG(25)
+	     POISON_REG(26)
+	     POISON_REG(27)
+	     POISON_REG(28)
+	     POISON_REG(29)
+	     : // inputs
+	     : // outputs
+	     : "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
+	       "26", "27", "28", "29"
+	);
+	#undef POISON_REG
+}
+
+static int check_regs(void)
+{
+	unsigned long i;
+
+	for (i = 15; i <= 29; i++)
+		FAIL_IF(signal_regs.gpr[i] != POISONED_REG(i));
+
+	printf("Regs OK\n");
+	return 0;
+}
+
+static void dump_regs(void)
+{
+	for (int i = 0; i < 32; i += 4) {
+		printf("r%02d 0x%016lx  r%02d 0x%016lx  " \
+		       "r%02d 0x%016lx  r%02d 0x%016lx\n",
+		       i, signal_regs.gpr[i],
+		       i+1, signal_regs.gpr[i+1],
+		       i+2, signal_regs.gpr[i+2],
+		       i+3, signal_regs.gpr[i+3]);
+	}
+}
+
+int test_wild_bctr(void)
+{
+	int (*func_ptr)(void);
+	struct sigaction segv = {
+		.sa_sigaction = segv_handler,
+		.sa_flags = SA_SIGINFO
+	};
+	struct sigaction usr2 = {
+		.sa_sigaction = usr2_handler,
+		.sa_flags = SA_SIGINFO
+	};
+
+	FAIL_IF(sigaction(SIGSEGV, &segv, NULL));
+	FAIL_IF(sigaction(SIGUSR2, &usr2, NULL));
+
+	bzero(&signal_regs, sizeof(signal_regs));
+
+	if (setjmp(setjmp_env) == 0) {
+		func_ptr = ok;
+		func_ptr();
+
+		kill(getpid(), SIGUSR2);
+		printf("Regs before:\n");
+		dump_regs();
+		bzero(&signal_regs, sizeof(signal_regs));
+
+		poison_regs();
+
+		func_ptr = (int (*)(void))BAD_NIP;
+		func_ptr();
+
+		FAIL_IF(1); /* we didn't segv? */
+	}
+
+	FAIL_IF(signal_regs.nip != BAD_NIP);
+
+	printf("All good - took SEGV as expected branching to 0x%llx\n", BAD_NIP);
+
+	dump_regs();
+	FAIL_IF(check_regs());
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_wild_bctr, "wild_bctr");
+}
-- 
2.14.1

^ permalink raw reply related

* [PATCH] powerpc/e200: Skip tlb1 entries used for kernel mapping
From: Bharat Bhushan @ 2018-07-24  5:59 UTC (permalink / raw)
  To: benh, paulus, mpe, linuxppc-dev, linux-kernel; +Cc: Bharat Bhushan

E200 have TLB1 only and it does not have TLB0.
So TLB1 are used for mapping kernel and user-space both.
TLB miss handler for E200 does not consider skipping TLBs
used for kernel mapping. This patch ensures that we skip
tlb1 entries used for kernel mapping (tlbcam_index).

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
---
 arch/powerpc/kernel/head_fsl_booke.S | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index bf4c602..951fb96 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -801,12 +801,28 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
 	/* Round robin TLB1 entries assignment */
 	mfspr	r12, SPRN_MAS0
 
+	/* Get first free tlbcam entry */
+	lis	r11, tlbcam_index@ha
+	lwz	r11, tlbcam_index@l(r11)
+
+	/* Extract MAS0(NV) */
+	andi.	r13, r12, 0xfff
+	cmpw	0, r13, r11
+	blt	0, 5f
+	b	6f
+5:
+	/* When NV is less than first free tlbcam entry, use first free
+	 * tlbcam entry for ESEL and set NV */
+	rlwimi	r12, r11, 16, 4, 15
+	addi	r11, r11, 1
+	rlwimi	r12, r11, 0, 20, 31
+	b	7f
+6:
 	/* Extract TLB1CFG(NENTRY) */
 	mfspr	r11, SPRN_TLB1CFG
 	andi.	r11, r11, 0xfff
 
-	/* Extract MAS0(NV) */
-	andi.	r13, r12, 0xfff
+	/* Set MAS0(NV) for next TLB miss exception */
 	addi	r13, r13, 1
 	cmpw	0, r13, r11
 	addi	r12, r12, 1
-- 
1.9.3

^ permalink raw reply related

* [PATCH v4 0/2] powerpc: Detection and scheduler optimization for POWER9 bigcore
From: Gautham R. Shenoy @ 2018-07-24  6:14 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Michael Neuling,
	Vaidyanathan Srinivasan, Akshay Adiga, Shilpasri G Bhat,
	Oliver O'Halloran, Nicholas Piggin, Murilo Opsfelder Araujo
  Cc: linuxppc-dev, linux-kernel, Gautham R. Shenoy

From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>

Hi,

This is the fourth iteration of the patchset to add support for
big-core on POWER9.

The previous versions can be found here:

v3: https://lkml.org/lkml/2018/7/6/255
v2: https://lkml.org/lkml/2018/7/3/401
v1: https://lkml.org/lkml/2018/5/11/245

Changes :
v3 --> v4:
   - Build fix for powerpc-g5 : Enable CPU_FTR_ASYM_SMT only on
     CONFIG_PPC_POWERNV and CONFIG_PPC_PSERIES.
   - Fixed a minor error in the ABI description.

v2 --> v3
    - Set sane values in the tg->property, tg->nr_groups inside
    parse_thread_groups before returning due to an error.
    - Define a helper function to determine whether a CPU device node
      is a big-core or not.
    - Updated the comments around the functions to describe the
      arguments passed to them.

v1 --> v2
    - Added comments explaining the "ibm,thread-groups" device tree property.
    - Uses cleaner device-tree parsing functions to parse the u32 arrays.
    - Adds a sysfs file listing the small-core siblings for every CPU.
    - Enables the scheduler optimization by setting the CPU_FTR_ASYM_SMT bit
      in the cur_cpu_spec->cpu_features on detecting the presence
      of interleaved big-core.
    - Handles the corner case where there is only a single thread-group
      or when there is a single thread in a thread-group.

Description:
~~~~~~~~~~~~~~~~~~~~
A pair of IBM POWER9 SMT4 cores can be fused together to form a
big-core with 8 SMT threads. This can be discovered via the
"ibm,thread-groups" CPU property in the device tree which will
indicate which group of threads that share the L1 cache, translation
cache and instruction data flow.  If there are multiple such group of
threads, then the core is a big-core. Furthermore, the thread-ids of
such a big-core is obtained by interleaving the thread-ids of the
component SMT4 cores.

Eg: Threads in the pair of component SMT4 cores of an interleaved
big-core are numbered {0,2,4,6} and {1,3,5,7} respectively.

On such a big-core, when multiple tasks are scheduled to run on the
big-core, we get the best performance when the tasks are spread across
the pair of SMT4 cores.

The Linux scheduler supports a flag called "SD_ASYM_PACKING" which
when set in the SMT sched-domain, biases the load-balancing of the
tasks on the smaller numbered threads in the core. On an big-core
whose threads are interleavings of the threads of the small cores,
enabling SD_ASYM_PACKING in the SMT sched-domain automatically results
in spreading the tasks uniformly across the associated pair of SMT4
cores, thereby yielding better performance.

This patchset contains two patches which on detecting the presence of
interleaved big-cores will enable the the CPU_FTR_ASYM_SMT bit in the
cur_cpu_spec->cpu_feature.

Patch 1: adds support to detect the presence of
big-cores and reports the small-core siblings of each CPU X
via the sysfs file "/sys/devices/system/cpu/cpuX/big_core_siblings".

Patch 2: checks if the thread-ids of the component small-cores are
interleaved, in which case we enable the the CPU_FTR_ASYM_SMT bit in
the cur_cpu_spec->cpu_features which results in the SD_ASYM_PACKING
flag being set at the SMT level sched-domain.

Results:
~~~~~~~~~~~~~~~~~
Experimental results for ebizzy with 2 threads, bound to a single big-core
show a marked improvement with this patchset over the 4.18-rc5 vanilla
kernel.

The result of 100 such runs for 4.18-rc5 kernel and the 4.18-rc5 +
big-core-patches are as follows

4.18-rc5 vanilla:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        records/s    :  # samples  : Histogram
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[0 - 1000000]        :      0      : #
[1000000 - 2000000]  :      7      : ##
[2000000 - 3000000]  :      17     : ####
[3000000 - 4000000]  :      18     : ####
[4000000 - 5000000]  :      3      : #
[5000000 - 6000000]  :      55     : ############

4.8-rc5 + big-core-patches
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        records/s    :  # samples  : Histogram
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[0 - 1000000]        :      0      : #
[1000000 - 2000000]  :      0      : #
[2000000 - 3000000]  :      8      : ##
[3000000 - 4000000]  :      0      : #
[4000000 - 5000000]  :      0      : #
[5000000 - 6000000]  :      92     : ###################

Gautham R. Shenoy (2):
  powerpc: Detect the presence of big-cores via "ibm,thread-groups"
  powerpc: Enable CPU_FTR_ASYM_SMT for interleaved big-cores

 Documentation/ABI/testing/sysfs-devices-system-cpu |   8 +
 arch/powerpc/include/asm/cputhreads.h              |  22 ++
 arch/powerpc/kernel/setup-common.c                 | 229 ++++++++++++++++++++-
 arch/powerpc/kernel/sysfs.c                        |  35 ++++
 4 files changed, 293 insertions(+), 1 deletion(-)

-- 
1.9.4

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox