LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 10/16] powerpc: fix includes in asm/processor.h
From: Christophe Leroy @ 2018-07-05 16:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1530807556.git.christophe.leroy@c-s.fr>

Remove superflous includes and add missing ones

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/hw_breakpoint.h | 1 +
 arch/powerpc/include/asm/processor.h     | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 8e7b09703ca4..3637588d3f6d 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -55,6 +55,7 @@ struct arch_hw_breakpoint {
 struct perf_event;
 struct pmu;
 struct perf_sample_data;
+struct task_struct;
 
 #define HW_BREAKPOINT_ALIGN 0x7
 
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 5debe337ea9d..52fadded5c1e 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -39,10 +39,9 @@
 #endif /* CONFIG_PPC64 */
 
 #ifndef __ASSEMBLY__
-#include <linux/compiler.h>
-#include <linux/cache.h>
+#include <linux/types.h>
+#include <asm/thread_info.h>
 #include <asm/ptrace.h>
-#include <asm/types.h>
 #include <asm/hw_breakpoint.h>
 
 /* We do _not_ want to define new machine types at all, those must die
-- 
2.13.3

^ permalink raw reply related

* [PATCH v3 11/16] powerpc/nohash: fix hash related comments in pgtable.h
From: Christophe Leroy @ 2018-07-05 16:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1530807556.git.christophe.leroy@c-s.fr>

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/nohash/32/pgtable.h |  4 ----
 arch/powerpc/include/asm/nohash/64/pgtable.h | 18 ++++--------------
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 79805e0dad27..a507a65b0866 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -223,10 +223,6 @@ static inline unsigned long long pte_update(pte_t *p,
 }
 #endif /* CONFIG_PTE_64BIT */
 
-/*
- * 2.6 calls this without flushing the TLB entry; this is wrong
- * for our hash-based implementation, we fix that up here.
- */
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index fe05b3e03cf1..7cd6809f4d33 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -3,7 +3,7 @@
 #define _ASM_POWERPC_NOHASH_64_PGTABLE_H
 /*
  * This file contains the functions and defines necessary to modify and use
- * the ppc64 hashed page table.
+ * the ppc64 non-hashed page table.
  */
 
 #include <asm/nohash/64/pgtable-4k.h>
@@ -38,7 +38,7 @@
 
 /*
  * The vmalloc space starts at the beginning of that region, and
- * occupies half of it on hash CPUs and a quarter of it on Book3E
+ * occupies a quarter of it on Book3E
  * (we keep a quarter for the virtual memmap)
  */
 #define VMALLOC_START	KERN_VIRT_START
@@ -78,7 +78,7 @@
 
 /*
  * Defines the address of the vmemap area, in its own region on
- * hash table CPUs and after the vmalloc space on Book3E
+ * after the vmalloc space on Book3E
  */
 #define VMEMMAP_BASE		VMALLOC_END
 #define VMEMMAP_END		KERN_IO_START
@@ -248,14 +248,6 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
 }
 
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young(__vma, __address, __ptep)		\
 ({									\
@@ -279,9 +271,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 }
 
 
-/* Set the dirty and/or accessed bits atomically in a linux PTE, this
- * function doesn't need to flush the hash entry
- */
+/* Set the dirty and/or accessed bits atomically in a linux PTE */
 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
 					   unsigned long address,
-- 
2.13.3

^ permalink raw reply related

* [PATCH v3 12/16] powerpc/44x: remove page.h from mmu-44x.h
From: Christophe Leroy @ 2018-07-05 16:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1530807556.git.christophe.leroy@c-s.fr>

mmu-44x.h doesn't need asm/page.h if PAGE_SHIFT are replaced by CONFIG_PPC_XX_PAGES

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/mmu-44x.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 9bdbe1d1c9b9..295b3dbb2698 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -5,7 +5,6 @@
  * PPC440 support
  */
 
-#include <asm/page.h>
 #include <asm/asm-const.h>
 
 #define PPC44x_MMUCR_TID	0x000000ff
@@ -125,19 +124,19 @@ typedef struct {
 /* Size of the TLBs used for pinning in lowmem */
 #define PPC_PIN_SIZE	(1 << 28)	/* 256M */
 
-#if (PAGE_SHIFT == 12)
+#if defined(CONFIG_PPC_4K_PAGES)
 #define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
 #define PPC47x_TLBE_SIZE	PPC47x_TLB0_4K
 #define mmu_virtual_psize	MMU_PAGE_4K
-#elif (PAGE_SHIFT == 14)
+#elif defined(CONFIG_PPC_16K_PAGES)
 #define PPC44x_TLBE_SIZE	PPC44x_TLB_16K
 #define PPC47x_TLBE_SIZE	PPC47x_TLB0_16K
 #define mmu_virtual_psize	MMU_PAGE_16K
-#elif (PAGE_SHIFT == 16)
+#elif defined(CONFIG_PPC_64K_PAGES)
 #define PPC44x_TLBE_SIZE	PPC44x_TLB_64K
 #define PPC47x_TLBE_SIZE	PPC47x_TLB0_64K
 #define mmu_virtual_psize	MMU_PAGE_64K
-#elif (PAGE_SHIFT == 18)
+#elif defined(CONFIG_PPC_256K_PAGES)
 #define PPC44x_TLBE_SIZE	PPC44x_TLB_256K
 #define mmu_virtual_psize	MMU_PAGE_256K
 #else
-- 
2.13.3

^ permalink raw reply related

* [PATCH v3 13/16] powerpc: split reg.h in two parts
From: Christophe Leroy @ 2018-07-05 16:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1530807556.git.christophe.leroy@c-s.fr>

Move all macros involving feature-fixups in a new file reg-ftr.h

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/exception-64s.h |  1 +
 arch/powerpc/include/asm/reg-ftr.h       | 71 ++++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/reg.h           | 42 -------------------
 arch/powerpc/kernel/entry_64.S           |  1 +
 arch/powerpc/kernel/exceptions-64s.S     |  1 +
 arch/powerpc/kernel/head_64.S            |  1 +
 arch/powerpc/kernel/idle_book3s.S        |  1 +
 arch/powerpc/kernel/paca.c               |  1 +
 arch/powerpc/kernel/process.c            |  1 +
 arch/powerpc/kernel/tm.S                 |  1 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |  1 +
 arch/powerpc/kvm/book3s_rmhandlers.S     |  1 +
 arch/powerpc/kvm/book3s_segment.S        |  1 +
 arch/powerpc/kvm/tm.S                    |  1 +
 14 files changed, 83 insertions(+), 42 deletions(-)
 create mode 100644 arch/powerpc/include/asm/reg-ftr.h

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 1f2efc1a9769..9d748eaeb9ec 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -36,6 +36,7 @@
  */
 #include <asm/head-64.h>
 #include <asm/feature-fixups.h>
+#include <asm/reg-ftr.h>
 
 /* PACA save area offsets (exgen, exmc, etc) */
 #define EX_R9		0
diff --git a/arch/powerpc/include/asm/reg-ftr.h b/arch/powerpc/include/asm/reg-ftr.h
new file mode 100644
index 000000000000..73a024af3a9a
--- /dev/null
+++ b/arch/powerpc/include/asm/reg-ftr.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Contains the definition of registers common to all PowerPC variants.
+ * If a register definition has been changed in a different PowerPC
+ * variant, we will case it in #ifndef XXX ... #endif, and have the
+ * number used in the Programming Environments Manual For 32-Bit
+ * Implementations of the PowerPC Architecture (a.k.a. Green Book) here.
+ */
+
+#ifndef _ASM_POWERPC_REG_FTR_H
+#define _ASM_POWERPC_REG_FTR_H
+#ifdef __KERNEL__
+
+#include <linux/stringify.h>
+#include <asm/cputable.h>
+#include <asm/feature-fixups.h>
+#include <asm/reg.h>
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#define GET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_PACA;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HPACA;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
+
+#define SET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_PACA,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HPACA,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
+
+#define GET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_SCRATCH0;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HSCRATCH0;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
+
+#define SET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_SCRATCH0,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HSCRATCH0,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
+
+#else /* CONFIG_PPC_BOOK3S_64 */
+#define GET_SCRATCH0(rX)	mfspr	rX,SPRN_SPRG_SCRATCH0
+#define SET_SCRATCH0(rX)	mtspr	SPRN_SPRG_SCRATCH0,rX
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+
+#define SET_PACA(rX)	mtspr	SPRN_SPRG_PACA,rX
+#define GET_PACA(rX)	mfspr	rX,SPRN_SPRG_PACA
+
+#endif
+
+#ifndef __ASSEMBLY__
+static inline void mtmsr_isync(unsigned long val)
+{
+	asm volatile(__MTMSR " %0; " ASM_FTR_IFCLR("isync", "nop", %1) : :
+			"r" (val), "i" (CPU_FTR_ARCH_206) : "memory");
+}
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_REG_FTR_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 486b7c83b8c5..a8b62363d6ee 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -14,7 +14,6 @@
 #include <linux/stringify.h>
 #include <asm/cputable.h>
 #include <asm/asm-const.h>
-#include <asm/feature-fixups.h>
 
 /* Pickup Book E specific registers. */
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -1105,38 +1104,6 @@
 #define SPRN_SPRG_VDSO_READ	SPRN_USPRG3
 #define SPRN_SPRG_VDSO_WRITE	SPRN_SPRG3
 
-#define GET_PACA(rX)					\
-	BEGIN_FTR_SECTION_NESTED(66);			\
-	mfspr	rX,SPRN_SPRG_PACA;			\
-	FTR_SECTION_ELSE_NESTED(66);			\
-	mfspr	rX,SPRN_SPRG_HPACA;			\
-	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
-
-#define SET_PACA(rX)					\
-	BEGIN_FTR_SECTION_NESTED(66);			\
-	mtspr	SPRN_SPRG_PACA,rX;			\
-	FTR_SECTION_ELSE_NESTED(66);			\
-	mtspr	SPRN_SPRG_HPACA,rX;			\
-	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
-
-#define GET_SCRATCH0(rX)				\
-	BEGIN_FTR_SECTION_NESTED(66);			\
-	mfspr	rX,SPRN_SPRG_SCRATCH0;			\
-	FTR_SECTION_ELSE_NESTED(66);			\
-	mfspr	rX,SPRN_SPRG_HSCRATCH0;			\
-	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
-
-#define SET_SCRATCH0(rX)				\
-	BEGIN_FTR_SECTION_NESTED(66);			\
-	mtspr	SPRN_SPRG_SCRATCH0,rX;			\
-	FTR_SECTION_ELSE_NESTED(66);			\
-	mtspr	SPRN_SPRG_HSCRATCH0,rX;			\
-	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
-
-#else /* CONFIG_PPC_BOOK3S_64 */
-#define GET_SCRATCH0(rX)	mfspr	rX,SPRN_SPRG_SCRATCH0
-#define SET_SCRATCH0(rX)	mtspr	SPRN_SPRG_SCRATCH0,rX
-
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E_64
@@ -1150,9 +1117,6 @@
 #define SPRN_SPRG_VDSO_READ	SPRN_USPRG7
 #define SPRN_SPRG_VDSO_WRITE	SPRN_SPRG7
 
-#define SET_PACA(rX)	mtspr	SPRN_SPRG_PACA,rX
-#define GET_PACA(rX)	mfspr	rX,SPRN_SPRG_PACA
-
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -1338,12 +1302,6 @@
 #define __MTMSR		"mtmsr"
 #endif
 
-static inline void mtmsr_isync(unsigned long val)
-{
-	asm volatile(__MTMSR " %0; " ASM_FTR_IFCLR("isync", "nop", %1) : :
-			"r" (val), "i" (CPU_FTR_ARCH_206) : "memory");
-}
-
 #define mfspr(rn)	({unsigned long rval; \
 			asm volatile("mfspr %0," __stringify(rn) \
 				: "=r" (rval)); rval;})
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 31068be6be59..5651d50d8d10 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -45,6 +45,7 @@
 #include <asm/exception-64e.h>
 #endif
 #include <asm/feature-fixups.h>
+#include <asm/reg-ftr.h>
 
 /*
  * System calls.
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index a3c79aed2f61..06907e5e16a7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -19,6 +19,7 @@
 #include <asm/cpuidle.h>
 #include <asm/head-64.h>
 #include <asm/feature-fixups.h>
+#include <asm/reg-ftr.h>
 
 /*
  * There are a few constraints to be concerned with.
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4898e9491a1c..fb959742aaa2 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -25,6 +25,7 @@
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <asm/reg.h>
+#include <asm/reg-ftr.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 0cb6ffe992c2..c87aaacda6d7 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -25,6 +25,7 @@
 #include <asm/mmu.h>
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
+#include <asm/reg-ftr.h>
 
 #undef DEBUG
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0ee3e6d50f28..75ae2a48d02d 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -17,6 +17,7 @@
 #include <asm/sections.h>
 #include <asm/pgtable.h>
 #include <asm/kexec.h>
+#include <asm/reg-ftr.h>
 
 #include "setup.h"
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6061efb369e8..402a0ce96d6a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -65,6 +65,7 @@
 #include <asm/livepatch.h>
 #include <asm/cpu_has_feature.h>
 #include <asm/asm-prototypes.h>
+#include <asm/reg-ftr.h>
 
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index c0dcdd57cd7f..ac833198f308 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -11,6 +11,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/ptrace.h>
 #include <asm/reg.h>
+#include <asm/reg-ftr.h>
 #include <asm/bug.h>
 #include <asm/export.h>
 #include <asm/feature-fixups.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index ac53be00039a..2d01fcf44991 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -20,6 +20,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
+#include <asm/reg-ftr.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index b0089e04c8c8..17a08780fb63 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -20,6 +20,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
+#include <asm/reg-ftr.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index e5c542a7c5ac..6cb740cd8af4 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -21,6 +21,7 @@
 
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
+#include <asm/reg-ftr.h>
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 90e330f21356..112b8dee5a6d 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -15,6 +15,7 @@
  */
 
 #include <asm/reg.h>
+#include <asm/reg-ftr.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/export.h>
-- 
2.13.3

^ permalink raw reply related

* [PATCH v3 14/16] powerpc: Split synch.h in two parts
From: Christophe Leroy @ 2018-07-05 16:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1530807556.git.christophe.leroy@c-s.fr>

move feature-fixups related stuff from synch.h to synch-ftr.h

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/atomic.h                 |  1 +
 arch/powerpc/include/asm/barrier.h                |  1 +
 arch/powerpc/include/asm/bitops.h                 |  1 +
 arch/powerpc/include/asm/cmpxchg.h                |  1 +
 arch/powerpc/include/asm/spinlock.h               |  1 +
 arch/powerpc/include/asm/{synch.h => synch-ftr.h} | 22 +++--------------
 arch/powerpc/include/asm/synch.h                  | 30 -----------------------
 arch/powerpc/lib/feature-fixups-test.S            |  1 +
 8 files changed, 9 insertions(+), 49 deletions(-)
 copy arch/powerpc/include/asm/{synch.h => synch-ftr.h} (66%)

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index cbdb0b7e60a3..49a929ec5435 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -11,6 +11,7 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 #include <asm/asm-405.h>
+#include <asm/synch-ftr.h>
 
 #define ATOMIC_INIT(i)		{ (i) }
 
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index de1316874e45..d90a1463967b 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -6,6 +6,7 @@
 #define _ASM_POWERPC_BARRIER_H
 
 #include <asm/asm-const.h>
+#include <asm/synch-ftr.h>
 
 /*
  * Memory barrier.
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index ff71566dadee..b8bf2f5b75ce 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -44,6 +44,7 @@
 
 #include <linux/compiler.h>
 #include <asm/asm-compat.h>
+#include <asm/synch-ftr.h>
 #include <asm/synch.h>
 #include <asm/asm-405.h>
 
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index 27183871eb3b..d94a67a1a574 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -4,6 +4,7 @@
 
 #ifdef __KERNEL__
 #include <linux/compiler.h>
+#include <asm/synch-ftr.h>
 #include <asm/synch.h>
 #include <linux/bug.h>
 #include <asm/asm-405.h>
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 685c72310f5d..182f950a4c5d 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -24,6 +24,7 @@
 #include <asm/paca.h>
 #include <asm/hvcall.h>
 #endif
+#include <asm/synch-ftr.h>
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
 #include <asm/asm-405.h>
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch-ftr.h
similarity index 66%
copy from arch/powerpc/include/asm/synch.h
copy to arch/powerpc/include/asm/synch-ftr.h
index aca70fb43147..f86c536bd351 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch-ftr.h
@@ -1,27 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_SYNCH_H 
-#define _ASM_POWERPC_SYNCH_H 
+#ifndef _ASM_POWERPC_SYNCH_FTR_H
+#define _ASM_POWERPC_SYNCH_FTR_H
 #ifdef __KERNEL__
 
 #include <asm/feature-fixups.h>
 #include <asm/asm-const.h>
 
-#ifndef __ASSEMBLY__
-extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
-extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
-			     void *fixup_end);
-
-static inline void eieio(void)
-{
-	__asm__ __volatile__ ("eieio" : : : "memory");
-}
-
-static inline void isync(void)
-{
-	__asm__ __volatile__ ("isync" : : : "memory");
-}
-#endif /* __ASSEMBLY__ */
-
 #if defined(__powerpc64__)
 #    define LWSYNC	lwsync
 #elif defined(CONFIG_E500)
@@ -50,4 +34,4 @@ static inline void isync(void)
 #endif
 
 #endif /* __KERNEL__ */
-#endif	/* _ASM_POWERPC_SYNCH_H */
+#endif	/* _ASM_POWERPC_SYNCH_FTR_H */
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index aca70fb43147..48b23168ea9e 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -3,9 +3,6 @@
 #define _ASM_POWERPC_SYNCH_H 
 #ifdef __KERNEL__
 
-#include <asm/feature-fixups.h>
-#include <asm/asm-const.h>
-
 #ifndef __ASSEMBLY__
 extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
 extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
@@ -22,32 +19,5 @@ static inline void isync(void)
 }
 #endif /* __ASSEMBLY__ */
 
-#if defined(__powerpc64__)
-#    define LWSYNC	lwsync
-#elif defined(CONFIG_E500)
-#    define LWSYNC					\
-	START_LWSYNC_SECTION(96);			\
-	sync;						\
-	MAKE_LWSYNC_SECTION_ENTRY(96, __lwsync_fixup);
-#else
-#    define LWSYNC	sync
-#endif
-
-#ifdef CONFIG_SMP
-#define __PPC_ACQUIRE_BARRIER				\
-	START_LWSYNC_SECTION(97);			\
-	isync;						\
-	MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
-#define PPC_ACQUIRE_BARRIER	 "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
-#define PPC_RELEASE_BARRIER	 stringify_in_c(LWSYNC) "\n"
-#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n"
-#define PPC_ATOMIC_EXIT_BARRIER	 "\n" stringify_in_c(sync) "\n"
-#else
-#define PPC_ACQUIRE_BARRIER
-#define PPC_RELEASE_BARRIER
-#define PPC_ATOMIC_ENTRY_BARRIER
-#define PPC_ATOMIC_EXIT_BARRIER
-#endif
-
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_SYNCH_H */
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
index ee7c5fd5fc64..8c781b2e467b 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -10,6 +10,7 @@
 
 #include <asm/feature-fixups.h>
 #include <asm/ppc_asm.h>
+#include <asm/synch-ftr.h>
 #include <asm/synch.h>
 #include <asm/asm-compat.h>
 
-- 
2.13.3

^ permalink raw reply related

* [PATCH v3 15/16] powerpc: remove unnecessary inclusion of asm/tlbflush.h
From: Christophe Leroy @ 2018-07-05 16:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1530807556.git.christophe.leroy@c-s.fr>

asm/tlbflush.h is only needed for:
- using functions xxx_flush_tlb_xxx()
- using MMU_NO_CONTEXT
- including asm-generic/pgtable.h

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/highmem.h    | 2 +-
 arch/powerpc/include/asm/tlb.h        | 1 -
 arch/powerpc/kvm/book3s.c             | 1 -
 arch/powerpc/kvm/book3s_32_mmu.c      | 1 -
 arch/powerpc/kvm/book3s_64_mmu.c      | 1 -
 arch/powerpc/kvm/book3s_64_mmu_hv.c   | 1 -
 arch/powerpc/kvm/book3s_64_vio.c      | 1 -
 arch/powerpc/kvm/book3s_64_vio_hv.c   | 1 -
 arch/powerpc/kvm/book3s_hv.c          | 1 -
 arch/powerpc/kvm/book3s_hv_rm_mmu.c   | 1 -
 arch/powerpc/kvm/book3s_pr.c          | 1 -
 arch/powerpc/kvm/e500.c               | 1 -
 arch/powerpc/kvm/e500mc.c             | 1 -
 arch/powerpc/kvm/powerpc.c            | 1 -
 arch/powerpc/mm/fault.c               | 1 -
 arch/powerpc/mm/hash_native_64.c      | 1 -
 arch/powerpc/mm/hash_utils_64.c       | 1 -
 arch/powerpc/mm/mmu_context_hash32.c  | 1 -
 arch/powerpc/mm/mmu_decl.h            | 1 -
 arch/powerpc/mm/subpage-prot.c        | 1 -
 arch/powerpc/platforms/pseries/lpar.c | 1 -
 arch/powerpc/sysdev/cpm1.c            | 1 -
 22 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index cec820f961da..a4b65b186ec6 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -25,7 +25,7 @@
 
 #include <linux/interrupt.h>
 #include <asm/kmap_types.h>
-#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
 #include <asm/page.h>
 #include <asm/fixmap.h>
 
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 9138baccebb0..6d2ba7c779dc 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -17,7 +17,6 @@
 #include <asm/pgtable.h>
 #endif
 #include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
 #ifndef __powerpc64__
 #include <asm/page.h>
 #include <asm/mmu.h>
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index edaf4720d156..87348e498c89 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -28,7 +28,6 @@
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
 #include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/kvm_ppc.h>
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 45c8ea4a0487..612169988a3d 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -23,7 +23,6 @@
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 
-#include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index cf9d686e8162..c92dd25bed23 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -23,7 +23,6 @@
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 
-#include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 #include <asm/book3s/64/mmu-hash.h>
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7f3a8cf5d66f..3c0e8fb2b773 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -29,7 +29,6 @@
 #include <linux/file.h>
 #include <linux/debugfs.h>
 
-#include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 #include <asm/book3s/64/mmu-hash.h>
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index d066e37551ec..b3c1935229b0 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -31,7 +31,6 @@
 #include <linux/iommu.h>
 #include <linux/file.h>
 
-#include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 #include <asm/book3s/64/mmu-hash.h>
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 80d50d67b8c5..3315a56b6737 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -28,7 +28,6 @@
 #include <linux/list.h>
 #include <linux/stringify.h>
 
-#include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 #include <asm/book3s/64/mmu-hash.h>
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de686b340f4a..b4a5cc8525a0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -53,7 +53,6 @@
 #include <asm/disassemble.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
 #include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/kvm_ppc.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 1f22d9e977d4..a67cf1cdeda4 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/log2.h>
 
-#include <asm/tlbflush.h>
 #include <asm/trace.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c3b8006f0eac..47ee43bbd696 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -27,7 +27,6 @@
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
 #include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/kvm_ppc.h>
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index f9f6468f4171..afd3c255a427 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -21,7 +21,6 @@
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
-#include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
 
 #include "../mm/mmu_decl.h"
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index d0b6b5788afc..d31645491a93 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -21,7 +21,6 @@
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
-#include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
 #include <asm/dbell.h>
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0e8c20c5eaac..3ccc386b380d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -33,7 +33,6 @@
 #include <asm/cputable.h>
 #include <linux/uaccess.h>
 #include <asm/kvm_ppc.h>
-#include <asm/tlbflush.h>
 #include <asm/cputhreads.h>
 #include <asm/irqflags.h>
 #include <asm/iommu.h>
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b1ca7a0974e3..7d262c6437c4 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,7 +42,6 @@
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
 #include <asm/siginfo.h>
 #include <asm/debug.h>
 
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 1baabc74c4ae..4958f2a4f1d5 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -23,7 +23,6 @@
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
-#include <asm/tlbflush.h>
 #include <asm/trace.h>
 #include <asm/tlb.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 8318716e5075..af6c70dfe140 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -48,7 +48,6 @@
 #include <linux/uaccess.h>
 #include <asm/machdep.h>
 #include <asm/prom.h>
-#include <asm/tlbflush.h>
 #include <asm/io.h>
 #include <asm/eeh.h>
 #include <asm/tlb.h>
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c
index aa5a7fd89461..921c1e33e941 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/mmu_context_hash32.c
@@ -27,7 +27,6 @@
 #include <linux/export.h>
 
 #include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
 
 /*
  * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c4c0a09a7775..e5d779eed181 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -19,7 +19,6 @@
  *
  */
 #include <linux/mm.h>
-#include <asm/tlbflush.h>
 #include <asm/mmu.h>
 
 #ifdef CONFIG_PPC_MMU_NOHASH
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 9d16ee251fc0..3327551c8b47 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -17,7 +17,6 @@
 
 #include <asm/pgtable.h>
 #include <linux/uaccess.h>
-#include <asm/tlbflush.h>
 
 /*
  * Free all pages allocated for subpage protection maps and pointers.
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 5a392e40f3d2..bc50f1dcc430 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -36,7 +36,6 @@
 #include <asm/machdep.h>
 #include <asm/mmu_context.h>
 #include <asm/iommu.h>
-#include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <asm/prom.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index 5240d3a74a10..4f8dcf124828 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -38,7 +38,6 @@
 #include <asm/8xx_immap.h>
 #include <asm/cpm1.h>
 #include <asm/io.h>
-#include <asm/tlbflush.h>
 #include <asm/rheap.h>
 #include <asm/prom.h>
 #include <asm/cpm.h>
-- 
2.13.3

^ permalink raw reply related

* [PATCH v3 16/16] powerpc: split asm/tlbflush.h
From: Christophe Leroy @ 2018-07-05 16:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1530807556.git.christophe.leroy@c-s.fr>

Split asm/tlbflush.h into:
asm/nohash/tlbflush.h
asm/book3s/32/tlbflush.h
asm/book3s/64/tlbflush.h (already existing)

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/book3s/32/tlbflush.h    | 25 +++++++
 arch/powerpc/include/asm/book3s/tlbflush.h       | 11 +++
 arch/powerpc/include/asm/{ => nohash}/tlbflush.h | 42 ++----------
 arch/powerpc/include/asm/tlbflush.h              | 86 ++----------------------
 4 files changed, 45 insertions(+), 119 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/32/tlbflush.h
 create mode 100644 arch/powerpc/include/asm/book3s/tlbflush.h
 copy arch/powerpc/include/asm/{ => nohash}/tlbflush.h (57%)

diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
new file mode 100644
index 000000000000..068085b709fb
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
+
+#define MMU_NO_CONTEXT      (0)
+/*
+ * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
+ */
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+					unsigned long vmaddr)
+{
+	flush_tlb_page(vma, vmaddr);
+}
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	flush_tlb_mm(mm);
+}
+
+#endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/tlbflush.h b/arch/powerpc/include/asm/book3s/tlbflush.h
new file mode 100644
index 000000000000..dec11de41055
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/tlbflush.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_TLBFLUSH_H
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/tlbflush.h>
+#else
+#include <asm/book3s/32/tlbflush.h>
+#endif
+
+#endif /* _ASM_POWERPC_BOOK3S_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h
similarity index 57%
copy from arch/powerpc/include/asm/tlbflush.h
copy to arch/powerpc/include/asm/nohash/tlbflush.h
index 7d5a157c7832..b1d8fec29169 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/nohash/tlbflush.h
@@ -1,5 +1,6 @@
-#ifndef _ASM_POWERPC_TLBFLUSH_H
-#define _ASM_POWERPC_TLBFLUSH_H
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_TLBFLUSH_H
+#define _ASM_POWERPC_NOHASH_TLBFLUSH_H
 
 /*
  * TLB flushing:
@@ -13,14 +14,8 @@
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
  *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
  */
-#ifdef __KERNEL__
 
-#ifdef CONFIG_PPC_MMU_NOHASH
 /*
  * TLB flushing for software loaded TLB chips
  *
@@ -55,33 +50,4 @@ extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 #define __flush_tlb_page(mm,addr,p,i)	__local_flush_tlb_page(mm,addr,p,i)
 #endif
 
-#elif defined(CONFIG_PPC_STD_MMU_32)
-
-#define MMU_NO_CONTEXT      (0)
-/*
- * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
- */
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr);
-extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-			    unsigned long end);
-extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
-					unsigned long vmaddr)
-{
-	flush_tlb_page(vma, vmaddr);
-}
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
-{
-	flush_tlb_mm(mm);
-}
-
-#elif defined(CONFIG_PPC_BOOK3S_64)
-#include <asm/book3s/64/tlbflush.h>
-#else
-#error Unsupported MMU type
-#endif
-
-#endif /*__KERNEL__ */
-#endif /* _ASM_POWERPC_TLBFLUSH_H */
+#endif /* _ASM_POWERPC_NOHASH_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 7d5a157c7832..61fba43bf8b2 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -1,87 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_POWERPC_TLBFLUSH_H
 #define _ASM_POWERPC_TLBFLUSH_H
 
-/*
- * TLB flushing:
- *
- *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
- *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - local_flush_tlb_mm(mm, full) flushes the specified mm context on
- *                           the local processor
- *  - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor
- *  - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
- *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-#ifdef __KERNEL__
-
-#ifdef CONFIG_PPC_MMU_NOHASH
-/*
- * TLB flushing for software loaded TLB chips
- *
- * TODO: (CONFIG_FSL_BOOKE) determine if flush_tlb_range &
- * flush_tlb_kernel_range are best implemented as tlbia vs
- * specific tlbie's
- */
-
-struct vm_area_struct;
-struct mm_struct;
-
-#define MMU_NO_CONTEXT      	((unsigned int)-1)
-
-extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-			    unsigned long end);
-extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-
-extern void local_flush_tlb_mm(struct mm_struct *mm);
-extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-
-extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
-				   int tsize, int ind);
-
-#ifdef CONFIG_SMP
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
-			     int tsize, int ind);
-#else
-#define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
-#define flush_tlb_page(vma,addr)	local_flush_tlb_page(vma,addr)
-#define __flush_tlb_page(mm,addr,p,i)	__local_flush_tlb_page(mm,addr,p,i)
-#endif
-
-#elif defined(CONFIG_PPC_STD_MMU_32)
-
-#define MMU_NO_CONTEXT      (0)
-/*
- * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
- */
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr);
-extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-			    unsigned long end);
-extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
-					unsigned long vmaddr)
-{
-	flush_tlb_page(vma, vmaddr);
-}
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
-{
-	flush_tlb_mm(mm);
-}
-
-#elif defined(CONFIG_PPC_BOOK3S_64)
-#include <asm/book3s/64/tlbflush.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/tlbflush.h>
 #else
-#error Unsupported MMU type
-#endif
+#include <asm/nohash/tlbflush.h>
+#endif /* !CONFIG_PPC_BOOK3S */
 
-#endif /*__KERNEL__ */
 #endif /* _ASM_POWERPC_TLBFLUSH_H */
-- 
2.13.3

^ permalink raw reply related

* Re: [RFC PATCH v1] powerpc/radix/kasan: KASAN support for Radix
From: Christophe LEROY @ 2018-07-05 16:32 UTC (permalink / raw)
  To: Balbir Singh, Andrey Ryabinin
  Cc: kasan-dev, linux-mm, Alexander Potapenko,
	open list:LINUX FOR POWERPC (32-BIT AND 64-BIT), Dmitry Vyukov
In-Reply-To: <CAKTCnznzKtZWD25pYysGosns6GQLOnqAOS-BV90FtLOuLwS36Q@mail.gmail.com>

Hello Balbir,

Are you still working on KASAN support ?

Thanks,
Christophe

Le 08/08/2017 à 03:18, Balbir Singh a écrit :
> On Mon, Aug 7, 2017 at 10:30 PM, Andrey Ryabinin
> <aryabinin@virtuozzo.com> wrote:
>> On 07/29/2017 05:09 PM, Balbir Singh wrote:
>>> This is the first attempt to implement KASAN for radix
>>> on powerpc64. Aneesh Kumar implemented KASAN for hash 64
>>> in limited mode (support only for kernel linear mapping)
>>> (https://lwn.net/Articles/655642/)
>>>
>>> This patch does the following:
>>> 1. Defines its own zero_page,pte,pmd and pud because
>>> the generic PTRS_PER_PTE, etc are variables on ppc64
>>> book3s. Since the implementation is for radix, we use
>>> the radix constants. This patch uses ARCH_DEFINES_KASAN_ZERO_PTE
>>> for that purpose
>>> 2. There is a new function check_return_arch_not_ready()
>>> which is defined for ppc64/book3s/radix and overrides the
>>> checks in check_memory_region_inline() until the arch has
>>> done kasan setup is done for the architecture. This is needed
>>> for powerpc. A lot of functions are called in real mode prior
>>> to MMU paging init, we could fix some of this by using
>>> the kasan_early_init() bits, but that just maps the zero
>>> page and does not do useful reporting. For this RFC we
>>> just delay the checks in mem* functions till kasan_init()
>>
>> check_return_arch_not_ready() works only for outline instrumentation
>> and without stack instrumentation.
>>
>> I guess this works for you only because CONFIG_KASAN_SHADOW_OFFSET is not defined.
>> Therefore test for CFLAGS_KASAN can't pass, as '-fasan-shadow-offset= ' is invalid option,
>> so CFLAGS_KASAN_MINIMAL is used instead. Or maybe you just used gcc 4.9.x which don't have
>> full kasan support.
>> This is also the reason why some tests doesn't pass for you.
>>
>> For stack instrumentation you'll have to implement kasan_early_init() and define CONFIG_KASAN_SHADOW_OFFSET.
> 
> Yep, I noticed that a little later when reading the build log,
> scripts/Makefile.kasan does
> print a warning. I guess we'll need to do early_init() because
> kasan_init() can happen only
> once we've setup our memblocks after parsing the device-tree.
> 
>>
>>> 3. This patch renames memcpy/memset/memmove to their
>>> equivalent __memcpy/__memset/__memmove and for files
>>> that skip KASAN via KASAN_SANITIZE, we use the __
>>> variants. This is largely based on Aneesh's patchset
>>> mentioned above
>>> 4. In paca.c, some explicit memcpy inserted by the
>>> compiler/linker is replaced via explicit memcpy
>>> for structure content copying
>>> 5. prom_init and a few other files have KASAN_SANITIZE
>>> set to n, I think with the delayed checks (#2 above)
>>> we might be able to work around many of them
>>> 6. Resizing of virtual address space is done a little
>>> aggressively the size is reduced to 1/4 and totally
>>> to 1/2. For the RFC it was considered OK, since this
>>> is just a debug tool for developers. This can be revisited
>>> in the final implementation
>>>
>>> Tests:
>>>
>>> I ran test_kasan.ko and it reported errors for all test
>>> cases except for
>>>
>>> kasan test: memcg_accounted_kmem_cache allocate memcg accounted object
>>> kasan test: kasan_stack_oob out-of-bounds on stack
>>> kasan test: kasan_global_oob out-of-bounds global variable
>>> kasan test: use_after_scope_test use-after-scope on int
>>> kasan test: use_after_scope_test use-after-scope on array
>>>
>>> Based on my understanding of the test, which is an expected
>>> kasan bug report after each test starting with a "===" line.
>>>
>>
>> Right, with exception of memc_accounted_kmem_cache test.
>> The rest are expected to produce the kasan report unless CLFAGS_KASAN_MINIMAL
>> used.
>> use_after_scope tests also require fresh gcc 7.
> 
> 
> Yep, Thanks for the review!
> 
> I'll work on a v2 and resend the patches
> 
> Balbir Singh.
> 

^ permalink raw reply

* Re: [PATCH V2 00/10] KASan ppc64 support
From: Christophe LEROY @ 2018-07-05 16:33 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Andrey Ryabinin; +Cc: paulus, linuxppc-dev, LKML
In-Reply-To: <87k2sic6pw.fsf@linux.vnet.ibm.com>

Hi Aneesh,

Are you still working on support for KASan for ppc64 ?

Thanks,
Christophe


Le 26/08/2015 à 19:14, Aneesh Kumar K.V a écrit :
> Andrey Ryabinin <ryabinin.a.a@gmail.com> writes:
> 
>> 2015-08-26 11:26 GMT+03:00 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>:
>>> Hi,
>>>
>>> This patchset implements kernel address sanitizer for ppc64.
>>> Since ppc64 virtual address range is divided into different regions,
>>> we can't have one contigous area for the kasan shadow range. Hence
>>> we don't support the INLINE kasan instrumentation. With Outline
>>> instrumentation, we override the shadow_to_mem and mem_to_shadow
>>> callbacks, so that we map only the kernel linear range (ie,
>>> region with ID 0xc). For region with ID 0xd and 0xf (vmalloc
>>> and vmemmap ) we return the address of the zero page. This
>>> works because kasan doesn't track both vmemmap and vmalloc address.
>>>
>>> Known issues:
>>> * Kasan is not yet enabled for arch/powerpc/kvm
>>> * kexec hang
>>> * outline stack and global support
>>>
>>
>> Is there any problem with globals or you just didn't try it yet?
>> I think it should just work. You need only to add  --param
>> asan-globals=0 to KBUILD_CFLAGS_MODULE
>> to disable it for modules.
> 
> I am hitting BUG_ON in early vmalloc code. I still haven't got time to
> debug it further. Should get to that soon.
> 
> -aneesh
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 

^ permalink raw reply

* Re: [PATCH v3 2/3] hwmon: ibmpowernv: Add attributes to enable/disable sensor groups
From: Shilpasri G Bhat @ 2018-07-05 17:35 UTC (permalink / raw)
  To: Guenter Roeck, mpe, linuxppc-dev, linux-hwmon, linux-kernel, ego
In-Reply-To: <9b55f78a-1d29-4391-c43e-124772b7aa9f@roeck-us.net>

Hi,

On 07/05/2018 09:07 PM, Guenter Roeck wrote:
> On 07/05/2018 06:51 AM, Shilpasri G Bhat wrote:
>> On-Chip-Controller(OCC) is an embedded micro-processor in POWER9 chip
>> which measures various system and chip level sensors. These sensors
>> comprises of environmental sensors (like power, temperature, current
>> and voltage) and performance sensors (like utilization, frequency).
>> All these sensors are copied to main memory at a regular interval of
>> 100ms. OCC provides a way to select a group of sensors that is copied
>> to the main memory to increase the update frequency of selected sensor
>> groups. When a sensor-group is disabled, OCC will not copy it to main
>> memory and those sensors read 0 values.
>>
>> This patch provides support for enabling/disabling the sensor groups
>> like power, temperature, current and voltage. This patch adds new
>> per-senor sysfs attribute to disable and enable them.
>>
>> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
>> ---
>> Changes from v2:
>> - Writes to first 'enable' attribute of the sensor group will affect all the
>>    sensors in the group
>> - Removed global mutex and made it per sensor-group
>>
>>   drivers/hwmon/ibmpowernv.c | 184 ++++++++++++++++++++++++++++++++++++++-------
>>   1 file changed, 155 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
>> index f829dad..9c6adee 100644
>> --- a/drivers/hwmon/ibmpowernv.c
>> +++ b/drivers/hwmon/ibmpowernv.c
>> @@ -73,6 +73,10 @@ enum sensors {
>>       struct attribute_group group;
>>       u32 attr_count;
>>       u32 hwmon_index;
>> +    struct mutex mutex;
>> +    u32 *gid;
>> +    u32 nr_gid;
>> +    bool enable;
>>   } sensor_groups[] = {
>>       { "fan"   },
>>       { "temp"  },
>> @@ -105,6 +109,9 @@ static ssize_t show_sensor(struct device *dev, struct
>> device_attribute *devattr,
>>       ssize_t ret;
>>       u64 x;
>>   +    if (!sensor_groups[sdata->type].enable)
>> +        return -ENODATA;
>> +
>>       ret =  opal_get_sensor_data_u64(sdata->id, &x);
>>         if (ret)
>> @@ -120,6 +127,46 @@ static ssize_t show_sensor(struct device *dev, struct
>> device_attribute *devattr,
>>       return sprintf(buf, "%llu\n", x);
>>   }
>>   +static ssize_t show_enable(struct device *dev,
>> +               struct device_attribute *devattr, char *buf)
>> +{
>> +    struct sensor_data *sdata = container_of(devattr, struct sensor_data,
>> +                         dev_attr);
>> +
>> +    return sprintf(buf, "%u\n", sensor_groups[sdata->type].enable);
>> +}
>> +
>> +static ssize_t store_enable(struct device *dev,
>> +                struct device_attribute *devattr,
>> +                const char *buf, size_t count)
>> +{
>> +    struct sensor_data *sdata = container_of(devattr, struct sensor_data,
>> +                         dev_attr);
>> +    struct sensor_group *sg = &sensor_groups[sdata->type];
>> +    int ret, i;
>> +    bool data;
>> +
>> +    ret = kstrtobool(buf, &data);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = mutex_lock_interruptible(&sg->mutex);
>> +    if (ret)
>> +        return ret;
>> +
>> +    if (data != sg->enable)
>> +        for (i = 0; i < sg->nr_gid && !ret; i++)
>> +            ret =  sensor_group_enable(sg->gid[i], data);
>> +
> 
> Wouldn't it be better to have a separate attribute for each of the
> affected groups if there can be more than one ? Just wondering.
> 
> The idea was to widen the scope to a point where there is a 1:1 match
> between the hardware capabilities and attributes. Clearly having
> a separate attribute for all sensors was inappropriate, but the code
> above now suggests that a single attribute for all sensors may have
> widened the scope too much (because the hardware can do better than
> this).
> 

Yup it would be better to have 'enable' attribute for each sub-group.

Thanks and Regards,
Shilpa

> Thanks,
> Guenter
> 
>> +    if (!ret) {
>> +        sg->enable = data;
>> +        ret = count;
>> +    }
>> +
>> +    mutex_unlock(&sg->mutex);
>> +    return ret;
>> +}
>> +
>>   static ssize_t show_label(struct device *dev, struct device_attribute *devattr,
>>                 char *buf)
>>   {
>> @@ -292,13 +339,68 @@ static u32 get_sensor_hwmon_index(struct sensor_data
>> *sdata,
>>       return ++sensor_groups[sdata->type].hwmon_index;
>>   }
>>   +static int init_sensor_group_data(struct platform_device *pdev)
>> +{
>> +    struct device_node *groups, *sg;
>> +    enum sensors type;
>> +    int ret = 0, i;
>> +
>> +    for (i = 0; i < MAX_SENSOR_TYPE; i++) {
>> +        sensor_groups[i].nr_gid = 0;
>> +        sensor_groups[i].enable = true;
>> +    }
>> +
>> +    groups = of_find_node_by_path("/ibm,opal/sensor-groups");
>> +    if (!groups)
>> +        return ret;
>> +
>> +    for (i = 0; i < MAX_SENSOR_TYPE; i++) {
>> +        u32 gid[256];
>> +        u32 id, size;
>> +
>> +        for_each_child_of_node(groups, sg) {
>> +            type = get_sensor_type(sg);
>> +            if (type != i)
>> +                continue;
>> +
>> +            if (of_property_read_u32(sg, "sensor-group-id", &id))
>> +                continue;
>> +
>> +            gid[sensor_groups[i].nr_gid++] = id;
>> +        }
>> +
>> +        if (!sensor_groups[i].nr_gid)
>> +            continue;
>> +
>> +        size = sensor_groups[i].nr_gid * sizeof(u32);
>> +        sensor_groups[i].gid = devm_kzalloc(&pdev->dev, size,
>> +                            GFP_KERNEL);
>> +        if (!sensor_groups[i].gid) {
>> +            ret = -ENOMEM;
>> +            break;
>> +        }
>> +
>> +        memcpy(sensor_groups[i].gid, gid, size);
>> +        sensor_groups[i].enable = false;
>> +        mutex_init(&sensor_groups[i].mutex);
>> +    }
>> +
>> +    of_node_put(groups);
>> +    return ret;
>> +}
>> +
>>   static int populate_attr_groups(struct platform_device *pdev)
>>   {
>>       struct platform_data *pdata = platform_get_drvdata(pdev);
>>       const struct attribute_group **pgroups = pdata->attr_groups;
>>       struct device_node *opal, *np;
>> +    int ret;
>>       enum sensors type;
>>   +    ret = init_sensor_group_data(pdev);
>> +    if (ret)
>> +        return ret;
>> +
>>       opal = of_find_node_by_path("/ibm,opal/sensors");
>>       for_each_child_of_node(opal, np) {
>>           const char *label;
>> @@ -313,7 +415,7 @@ static int populate_attr_groups(struct platform_device *pdev)
>>           sensor_groups[type].attr_count++;
>>             /*
>> -         * add attributes for labels, min and max
>> +         * add attributes for labels, min, max and enable
>>            */
>>           if (!of_property_read_string(np, "label", &label))
>>               sensor_groups[type].attr_count++;
>> @@ -321,6 +423,8 @@ static int populate_attr_groups(struct platform_device *pdev)
>>               sensor_groups[type].attr_count++;
>>           if (of_find_property(np, "sensor-data-max", NULL))
>>               sensor_groups[type].attr_count++;
>> +        if (sensor_groups[type].nr_gid)
>> +            sensor_groups[type].attr_count++;
>>       }
>>         of_node_put(opal);
>> @@ -344,7 +448,10 @@ static int populate_attr_groups(struct platform_device
>> *pdev)
>>   static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name,
>>                     ssize_t (*show)(struct device *dev,
>>                             struct device_attribute *attr,
>> -                          char *buf))
>> +                          char *buf),
>> +                ssize_t (*store)(struct device *dev,
>> +                         struct device_attribute *attr,
>> +                         const char *buf, size_t count))
>>   {
>>       snprintf(sdata->name, MAX_ATTR_LEN, "%s%d_%s",
>>            sensor_groups[sdata->type].name, sdata->hwmon_index,
>> @@ -352,8 +459,13 @@ static void create_hwmon_attr(struct sensor_data *sdata,
>> const char *attr_name,
>>         sysfs_attr_init(&sdata->dev_attr.attr);
>>       sdata->dev_attr.attr.name = sdata->name;
>> -    sdata->dev_attr.attr.mode = S_IRUGO;
>>       sdata->dev_attr.show = show;
>> +    if (store) {
>> +        sdata->dev_attr.store = store;
>> +        sdata->dev_attr.attr.mode = 0664;
>> +    } else {
>> +        sdata->dev_attr.attr.mode = 0444;
>> +    }
>>   }
>>     static void populate_sensor(struct sensor_data *sdata, int od, int hd, int
>> sid,
>> @@ -361,13 +473,16 @@ static void populate_sensor(struct sensor_data *sdata,
>> int od, int hd, int sid,
>>                   const struct attribute_group *pgroup,
>>                   ssize_t (*show)(struct device *dev,
>>                           struct device_attribute *attr,
>> -                        char *buf))
>> +                        char *buf),
>> +                ssize_t (*store)(struct device *dev,
>> +                         struct device_attribute *attr,
>> +                         const char *buf, size_t count))
>>   {
>>       sdata->id = sid;
>>       sdata->type = type;
>>       sdata->opal_index = od;
>>       sdata->hwmon_index = hd;
>> -    create_hwmon_attr(sdata, attr_name, show);
>> +    create_hwmon_attr(sdata, attr_name, show, store);
>>       pgroup->attrs[sensor_groups[type].attr_count++] = &sdata->dev_attr.attr;
>>   }
>>   @@ -408,18 +523,16 @@ static int create_device_attrs(struct platform_device
>> *pdev)
>>       u32 count = 0;
>>       int err = 0;
>>   -    opal = of_find_node_by_path("/ibm,opal/sensors");
>>       sdata = devm_kcalloc(&pdev->dev,
>>                    pdata->sensors_count, sizeof(*sdata),
>>                    GFP_KERNEL);
>> -    if (!sdata) {
>> -        err = -ENOMEM;
>> -        goto exit_put_node;
>> -    }
>> +    if (!sdata)
>> +        return -ENOMEM;
>>   +    opal = of_find_node_by_path("/ibm,opal/sensors");
>>       for_each_child_of_node(opal, np) {
>>           const char *attr_name;
>> -        u32 opal_index;
>> +        u32 opal_index, hw_id;
>>           const char *label;
>>             if (np->name == NULL)
>> @@ -456,14 +569,11 @@ static int create_device_attrs(struct platform_device
>> *pdev)
>>               opal_index = INVALID_INDEX;
>>           }
>>   -        sdata[count].opal_index = opal_index;
>> -        sdata[count].hwmon_index =
>> -            get_sensor_hwmon_index(&sdata[count], sdata, count);
>> -
>> -        create_hwmon_attr(&sdata[count], attr_name, show_sensor);
>> -
>> -        pgroups[type]->attrs[sensor_groups[type].attr_count++] =
>> -                &sdata[count++].dev_attr.attr;
>> +        hw_id = get_sensor_hwmon_index(&sdata[count], sdata, count);
>> +        populate_sensor(&sdata[count], opal_index, hw_id, sensor_id,
>> +                attr_name, type, pgroups[type], show_sensor,
>> +                NULL);
>> +        count++;
>>             if (!of_property_read_string(np, "label", &label)) {
>>               /*
>> @@ -474,33 +584,49 @@ static int create_device_attrs(struct platform_device
>> *pdev)
>>                */
>>                 make_sensor_label(np, &sdata[count], label);
>> -            populate_sensor(&sdata[count], opal_index,
>> -                    sdata[count - 1].hwmon_index,
>> +            populate_sensor(&sdata[count], opal_index, hw_id,
>>                       sensor_id, "label", type, pgroups[type],
>> -                    show_label);
>> +                    show_label, NULL);
>>               count++;
>>           }
>>             if (!of_property_read_u32(np, "sensor-data-max", &sensor_id)) {
>>               attr_name = get_max_attr(type);
>> -            populate_sensor(&sdata[count], opal_index,
>> -                    sdata[count - 1].hwmon_index,
>> +            populate_sensor(&sdata[count], opal_index, hw_id,
>>                       sensor_id, attr_name, type,
>> -                    pgroups[type], show_sensor);
>> +                    pgroups[type], show_sensor, NULL);
>>               count++;
>>           }
>>             if (!of_property_read_u32(np, "sensor-data-min", &sensor_id)) {
>>               attr_name = get_min_attr(type);
>> -            populate_sensor(&sdata[count], opal_index,
>> -                    sdata[count - 1].hwmon_index,
>> +            populate_sensor(&sdata[count], opal_index, hw_id,
>>                       sensor_id, attr_name, type,
>> -                    pgroups[type], show_sensor);
>> +                    pgroups[type], show_sensor, NULL);
>>               count++;
>>           }
>> +
>> +        if (sensor_groups[type].nr_gid) {
>> +            ssize_t (*store)(struct device *dev,
>> +                     struct device_attribute *attr,
>> +                     const char *buf, size_t count);
>> +
>> +            if (!sensor_groups[type].enable) {
>> +                sensor_groups[type].enable = true;
>> +                store = store_enable;
>> +            } else {
>> +                store = NULL;
>> +            }
>> +
>> +            sensor_groups[type].enable = true;
>> +            populate_sensor(&sdata[count], opal_index, hw_id,
>> +                    sensor_id, "enable", type,
>> +                    pgroups[type], show_enable, store);
>> +            count++;
>> +        }
>> +
>>       }
>>   -exit_put_node:
>>       of_node_put(opal);
>>       return err;
>>   }
>>
> 

^ permalink raw reply

* Re: [PATCHv5 4/4] arm64: Add build salt to the vDSO
From: Laura Abbott @ 2018-07-05 19:05 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: Mark Wielaard, H . J . Lu, Catalin Marinas, Will Deacon,
	Andy Lutomirski, Linus Torvalds, X86 ML,
	Linux Kernel Mailing List, Nick Clifton, Cary Coutant,
	Linux Kbuild mailing list, linuxppc-dev, Michael Ellerman,
	linux-arm-kernel
In-Reply-To: <CAK7LNARxwNkzkUif9dj1oVFGYP4J41C+t=W4YFVxM+J94siGvA@mail.gmail.com>

On 07/03/2018 08:55 PM, Masahiro Yamada wrote:
> Hi.
> 
> 2018-07-04 8:34 GMT+09:00 Laura Abbott <labbott@redhat.com>:
>>
>> The vDSO needs to have a unique build id in a similar manner
>> to the kernel and modules. Use the build salt macro.
>>
>> Signed-off-by: Laura Abbott <labbott@redhat.com>
>> ---
>> v5: I was previously focused on x86 only but since powerpc gave a patch,
>> I figured I would do arm64 since the changes were also fairly simple.
>> ---
>>   arch/arm64/kernel/vdso/note.S | 3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S
>> index b82c85e5d972..2c429dfd3f45 100644
>> --- a/arch/arm64/kernel/vdso/note.S
>> +++ b/arch/arm64/kernel/vdso/note.S
>> @@ -22,7 +22,10 @@
>>   #include <linux/uts.h>
>>   #include <linux/version.h>
>>   #include <linux/elfnote.h>
>> +#include <linux/build-salt.h>
>>
>>   ELFNOTE_START(Linux, 0, "a")
>>          .long LINUX_VERSION_CODE
>>   ELFNOTE_END
>> +
>> +BUILD_SALT;
> 
> 
> 
> I think this works, but
> I prefer no-semicolon in assembly files.
> 
> For coding consistency,
> I want ';' as statement delimiter in .c files.
> But, only new line after each statement in .S files.
> 
> For example, in arch/x86/xen/xen-head.S
> I see no semicolon after ELFNOTE().
> 
> I found this:
> http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473k/dom1359731141352.html
> It says ';' starts a comment line
> although it is not the case of GAS.
> 
> 
> Same for 3/4.
> 
> 
> 

Yes, that was a typo out of habit. Will fix.

^ permalink raw reply

* Re: [PATCHv5 2/4] x86: Add build salt to the vDSO
From: Laura Abbott @ 2018-07-05 19:08 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Mark Wielaard, H . J . Lu, Masahiro Yamada, Linus Torvalds,
	X86 ML, LKML, Nick Clifton, Cary Coutant,
	Linux Kbuild mailing list, linuxppc-dev, Michael Ellerman,
	Catalin Marinas, Will Deacon, linux-arm-kernel
In-Reply-To: <CALCETrVZi2WYbBSifaNV4wajj52PyG-w7z9DekfJvSuZAKi1nA@mail.gmail.com>

On 07/05/2018 08:58 AM, Andy Lutomirski wrote:
> On Tue, Jul 3, 2018 at 4:34 PM, Laura Abbott <labbott@redhat.com> wrote:
>>
>> The vDSO needs to have a unique build id in a similar manner
>> to the kernel and modules. Use the build salt macro.
>>
> 
> Looks good to me.  I have no idea whose tree these would go through.
> 

I was intending this to go through kbuild tree. Can I take this
as an Ack?

>> Signed-off-by: Laura Abbott <labbott@redhat.com>
>> ---
>> v5: Switched to using the single line BUILD_SALT macro
>> ---
>>   arch/x86/entry/vdso/vdso-note.S   | 3 +++
>>   arch/x86/entry/vdso/vdso32/note.S | 3 +++
>>   2 files changed, 6 insertions(+)
>>
>> diff --git a/arch/x86/entry/vdso/vdso-note.S b/arch/x86/entry/vdso/vdso-note.S
>> index 79a071e4357e..79423170118f 100644
>> --- a/arch/x86/entry/vdso/vdso-note.S
>> +++ b/arch/x86/entry/vdso/vdso-note.S
>> @@ -3,6 +3,7 @@
>>    * Here we can supply some information useful to userland.
>>    */
>>
>> +#include <linux/build-salt.h>
>>   #include <linux/uts.h>
>>   #include <linux/version.h>
>>   #include <linux/elfnote.h>
>> @@ -10,3 +11,5 @@
>>   ELFNOTE_START(Linux, 0, "a")
>>          .long LINUX_VERSION_CODE
>>   ELFNOTE_END
>> +
>> +BUILD_SALT
>> diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
>> index 9fd51f206314..e78047d119f6 100644
>> --- a/arch/x86/entry/vdso/vdso32/note.S
>> +++ b/arch/x86/entry/vdso/vdso32/note.S
>> @@ -4,6 +4,7 @@
>>    * Here we can supply some information useful to userland.
>>    */
>>
>> +#include <linux/build-salt.h>
>>   #include <linux/version.h>
>>   #include <linux/elfnote.h>
>>
>> @@ -14,6 +15,8 @@ ELFNOTE_START(Linux, 0, "a")
>>          .long LINUX_VERSION_CODE
>>   ELFNOTE_END
>>
>> +BUILD_SALT
>> +
>>   #ifdef CONFIG_XEN
>>   /*
>>    * Add a special note telling glibc's dynamic linker a fake hardware
>> --
>> 2.17.1
>>

^ permalink raw reply

* Re: [PATCHv5 1/4] kbuild: Add build salt to the kernel and modules
From: Laura Abbott @ 2018-07-05 19:20 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: Andy Lutomirski, Mark Wielaard, H . J . Lu, Michael Ellerman,
	Catalin Marinas, Will Deacon, Linus Torvalds, X86 ML,
	Linux Kernel Mailing List, Nick Clifton, Cary Coutant,
	Linux Kbuild mailing list, linuxppc-dev, linux-arm-kernel
In-Reply-To: <CAK7LNAS7XBwDidNaLKC1wD+G30AyAvTHG5GN=UAiP5zz1EO8JA@mail.gmail.com>

On 07/03/2018 08:59 PM, Masahiro Yamada wrote:
> Hi.
> 
> Thanks for the update.
> 
> 
> 2018-07-04 8:34 GMT+09:00 Laura Abbott <labbott@redhat.com>:
>>
>> The build id generated from --build-id can be generated in several different
>> ways, with the default being the sha1 on the output of the linked file. For
>> distributions, it can be useful to make sure this ID is unique, even if the
>> actual file contents don't change. The easiest way to do this is to insert
>> a section with some data.
>>
>> Add an ELF note to both the kernel and module which contains some data based
>> off of a config option.
>>
>> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
>> Signed-off-by: Laura Abbott <labbott@redhat.com>
>> ---
>> v5: I used S-o-b here since the majority of the code was written
>> already.
> 
> 
> I think Suggested-by is good enough.
> S-o-b is appended as a patch is passed from people to people.
> 
> Anyway, this looks good except one bike-shed.
> 
>> Please feel free to change the tag if you think it's not
>> appropriate. I also tweaked this to take an ascii string instead of just
>> a hex value since this makes things much easier on the distribution
>> side.
>> ---
> 
> 
>> diff --git a/init/Kconfig b/init/Kconfig
>> index 041f3a022122..8de789f40db9 100644
>> --- a/init/Kconfig
>> +++ b/init/Kconfig
>> @@ -107,6 +107,15 @@ config LOCALVERSION_AUTO
>>
>>            which is done within the script "scripts/setlocalversion".)
>>
>> +config BUILD_SALT
>> +       string "Build ID Salt"
>> +       default "Linux"
> 
> 
> How about empty string ""
> for default?
> 

Sure, seems to work fine.

Thanks,
Laura

^ permalink raw reply

* Re: [RFC PATCH 1/2] dma-mapping: Clean up dma_set_*mask() hooks
From: Christoph Hellwig @ 2018-07-05 19:37 UTC (permalink / raw)
  To: Robin Murphy
  Cc: hch, m.szyprowski, iommu, linux-arm-kernel, linux-ia64,
	linuxppc-dev
In-Reply-To: <55ac9550c311f056dcfeed9b2c8265375f17b155.1530726467.git.robin.murphy@arm.com>

On Wed, Jul 04, 2018 at 06:50:11PM +0100, Robin Murphy wrote:
> Arch-specific implementions for dma_set_{coherent_,}mask() currently
> rely on an inconsistent mix of arch-defined Kconfig symbols and macro
> overrides. Now that we have a nice centralised home for DMA API gubbins,
> let's consolidate these loose ends under consistent config options.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> 
> Here's hoping the buildbot comes by to point out what I've inevitably
> missed, although I did check a cursory cross-compile of ppc64_defconfig
> to iron out the obvious howlers.

The patch looks sensible to me, although I was hoping to get rid of these
hooks in this or the next merge window as they are a horrible bad idea.

> The motivation here is that I'm looking at adding set_mask overrides
> for arm64, and having discovered a bit of a mess it seemed prudent to
> clean up before ingraining it any more.

What are you trying to do?  I really don't want to see more users of
the hooks as they are are a horribly bad idea.

^ permalink raw reply

* Re: [RFC PATCH 2/2] dma-mapping: Clean up dma_get_required_mask() hooks
From: Christoph Hellwig @ 2018-07-05 19:38 UTC (permalink / raw)
  To: Robin Murphy
  Cc: hch, m.szyprowski, iommu, linux-arm-kernel, linux-ia64,
	linuxppc-dev
In-Reply-To: <08256121f325ceed7f6b88c1a5d3cf949698787d.1530726467.git.robin.murphy@arm.com>

On Wed, Jul 04, 2018 at 06:50:12PM +0100, Robin Murphy wrote:
> As for the other mask-related hooks, standardise the arch override into
> a Kconfig option, and also pull the generic implementation into the DMA
> mapping code rather than having it hide away in the platform bus code.

Heh, I have a somewhat similar patch in my queue.  I didn't want it out
because dma_get_required_mask is rather ill defined at the moment and
I wanted to clean that up first.  But I guess I could apply this first
and clean up later.

I just fear you might be wanting to add an arm64 user, so I'd really like
to understand why and how.

^ permalink raw reply

* powerpc: 32BIT vs. 64BIT (PPC32 vs. PPC64)
From: Randy Dunlap @ 2018-07-05 21:30 UTC (permalink / raw)
  To: linux-kbuild
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Stephen Rothwell, linuxppc-dev

Hi,

Is there a good way (or a shortcut) to do something like:

$ make ARCH=powerpc O=PPC32 [other_options] allmodconfig
  to get a PPC32/32BIT allmodconfig

and also be able to do:

$make ARCH=powerpc O=PPC64 [other_options] allmodconfig
  to get a PPC64/64BIT allmodconfig?


Note that arch/x86, arch/sh, and arch/sparc have ways to do
some flavor(s) of this (from Documentation/kbuild/kbuild.txt;
sh and sparc based on a recent "fix" patch from me):

x86: i386 for 32 bit, x86_64 for 64 bit
sh: sh for 32 bit, sh64 for 64 bit
sparc: sparc32 for 32 bit, sparc64 for 64 bit


thanks,
-- 
~Randy

^ permalink raw reply

* Re: [PATCHv5 2/4] x86: Add build salt to the vDSO
From: Andy Lutomirski @ 2018-07-05 21:46 UTC (permalink / raw)
  To: Laura Abbott
  Cc: Andy Lutomirski, Mark Wielaard, H . J . Lu, Masahiro Yamada,
	Linus Torvalds, X86 ML, LKML, Nick Clifton, Cary Coutant,
	Linux Kbuild mailing list, linuxppc-dev, Michael Ellerman,
	Catalin Marinas, Will Deacon, linux-arm-kernel
In-Reply-To: <2abd8138-0f62-79a5-5a2c-8e08aee171b8@redhat.com>

Sure.

On Thu, Jul 5, 2018 at 12:08 PM, Laura Abbott <labbott@redhat.com> wrote:
> On 07/05/2018 08:58 AM, Andy Lutomirski wrote:
>>
>> On Tue, Jul 3, 2018 at 4:34 PM, Laura Abbott <labbott@redhat.com> wrote:
>>>
>>>
>>> The vDSO needs to have a unique build id in a similar manner
>>> to the kernel and modules. Use the build salt macro.
>>>
>>
>> Looks good to me.  I have no idea whose tree these would go through.
>>
>
> I was intending this to go through kbuild tree. Can I take this
> as an Ack?
>
>
>>> Signed-off-by: Laura Abbott <labbott@redhat.com>
>>> ---
>>> v5: Switched to using the single line BUILD_SALT macro
>>> ---
>>>   arch/x86/entry/vdso/vdso-note.S   | 3 +++
>>>   arch/x86/entry/vdso/vdso32/note.S | 3 +++
>>>   2 files changed, 6 insertions(+)
>>>
>>> diff --git a/arch/x86/entry/vdso/vdso-note.S
>>> b/arch/x86/entry/vdso/vdso-note.S
>>> index 79a071e4357e..79423170118f 100644
>>> --- a/arch/x86/entry/vdso/vdso-note.S
>>> +++ b/arch/x86/entry/vdso/vdso-note.S
>>> @@ -3,6 +3,7 @@
>>>    * Here we can supply some information useful to userland.
>>>    */
>>>
>>> +#include <linux/build-salt.h>
>>>   #include <linux/uts.h>
>>>   #include <linux/version.h>
>>>   #include <linux/elfnote.h>
>>> @@ -10,3 +11,5 @@
>>>   ELFNOTE_START(Linux, 0, "a")
>>>          .long LINUX_VERSION_CODE
>>>   ELFNOTE_END
>>> +
>>> +BUILD_SALT
>>> diff --git a/arch/x86/entry/vdso/vdso32/note.S
>>> b/arch/x86/entry/vdso/vdso32/note.S
>>> index 9fd51f206314..e78047d119f6 100644
>>> --- a/arch/x86/entry/vdso/vdso32/note.S
>>> +++ b/arch/x86/entry/vdso/vdso32/note.S
>>> @@ -4,6 +4,7 @@
>>>    * Here we can supply some information useful to userland.
>>>    */
>>>
>>> +#include <linux/build-salt.h>
>>>   #include <linux/version.h>
>>>   #include <linux/elfnote.h>
>>>
>>> @@ -14,6 +15,8 @@ ELFNOTE_START(Linux, 0, "a")
>>>          .long LINUX_VERSION_CODE
>>>   ELFNOTE_END
>>>
>>> +BUILD_SALT
>>> +
>>>   #ifdef CONFIG_XEN
>>>   /*
>>>    * Add a special note telling glibc's dynamic linker a fake hardware
>>> --
>>> 2.17.1
>>>
>

^ permalink raw reply

* [PATCHv6 0/4] Salted build ids via ELF notes
From: Laura Abbott @ 2018-07-06  0:49 UTC (permalink / raw)
  To: Andy Lutomirski, mjw, H . J . Lu, Masahiro Yamada,
	Michael Ellerman, Catalin Marinas, Will Deacon
  Cc: Laura Abbott, Linus Torvalds, X86 ML, linux-kernel, Nick Clifton,
	Cary Coutant, linux-kbuild, linuxppc-dev, linux-arm-kernel

Hi,

This is v6 of the series to allow unique build ids. v6 is mostly minor
fixups and Acks for this to go through the kbuild tree.

Thanks,
Laura

Laura Abbott (4):
  kbuild: Add build salt to the kernel and modules
  x86: Add build salt to the vDSO
  powerpc: Add build salt to the vDSO
  arm64: Add build salt to the vDSO

 arch/arm64/kernel/vdso/note.S     |  3 +++
 arch/powerpc/kernel/vdso32/note.S |  3 +++
 arch/x86/entry/vdso/vdso-note.S   |  3 +++
 arch/x86/entry/vdso/vdso32/note.S |  3 +++
 include/linux/build-salt.h        | 20 ++++++++++++++++++++
 init/Kconfig                      |  9 +++++++++
 init/version.c                    |  3 +++
 scripts/mod/modpost.c             |  3 +++
 8 files changed, 47 insertions(+)
 create mode 100644 include/linux/build-salt.h

-- 
2.17.1

^ permalink raw reply

* [PATCHv6 1/4] kbuild: Add build salt to the kernel and modules
From: Laura Abbott @ 2018-07-06  0:49 UTC (permalink / raw)
  To: Andy Lutomirski, mjw, H . J . Lu, Masahiro Yamada,
	Michael Ellerman, Catalin Marinas, Will Deacon
  Cc: Laura Abbott, Linus Torvalds, X86 ML, linux-kernel, Nick Clifton,
	Cary Coutant, linux-kbuild, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20180706004940.32587-1-labbott@redhat.com>


In Fedora, the debug information is packaged separately (foo-debuginfo) and
can be installed separately. There's been a long standing issue where only
one version of a debuginfo info package can be installed at a time. There's
been an effort for Fedora for parallel debuginfo to rectify this problem.

Part of the requirement to allow parallel debuginfo to work is that build ids
are unique between builds. The existing upstream rpm implementation ensures
this by re-calculating the build-id using the version and release as a
seed. This doesn't work 100% for the kernel because of the vDSO which is
its own binary and doesn't get updated when embedded.

Fix this by adding some data in an ELF note for both the kernel and modules.
The data is controlled via a Kconfig option so distributions can set it
to an appropriate value to ensure uniqueness between builds.

Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
---
v6: Added more detail to the commit text about why exactly this feature
is useful. Default string now ""
---
 include/linux/build-salt.h | 20 ++++++++++++++++++++
 init/Kconfig               |  9 +++++++++
 init/version.c             |  3 +++
 scripts/mod/modpost.c      |  3 +++
 4 files changed, 35 insertions(+)
 create mode 100644 include/linux/build-salt.h

diff --git a/include/linux/build-salt.h b/include/linux/build-salt.h
new file mode 100644
index 000000000000..bb007bd05e7a
--- /dev/null
+++ b/include/linux/build-salt.h
@@ -0,0 +1,20 @@
+#ifndef __BUILD_SALT_H
+#define __BUILD_SALT_H
+
+#include <linux/elfnote.h>
+
+#define LINUX_ELFNOTE_BUILD_SALT       0x100
+
+#ifdef __ASSEMBLER__
+
+#define BUILD_SALT \
+       ELFNOTE(Linux, LINUX_ELFNOTE_BUILD_SALT, .asciz CONFIG_BUILD_SALT)
+
+#else
+
+#define BUILD_SALT \
+       ELFNOTE32("Linux", LINUX_ELFNOTE_BUILD_SALT, CONFIG_BUILD_SALT)
+
+#endif
+
+#endif /* __BUILD_SALT_H */
diff --git a/init/Kconfig b/init/Kconfig
index 041f3a022122..d39b31484c52 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -107,6 +107,15 @@ config LOCALVERSION_AUTO
 
 	  which is done within the script "scripts/setlocalversion".)
 
+config BUILD_SALT
+       string "Build ID Salt"
+       default ""
+       help
+          The build ID is used to link binaries and their debug info. Setting
+          this option will use the value in the calculation of the build id.
+          This is mostly useful for distributions which want to ensure the
+          build is unique between builds. It's safe to leave the default.
+
 config HAVE_KERNEL_GZIP
 	bool
 
diff --git a/init/version.c b/init/version.c
index bfb4e3f4955e..ef4012ec4375 100644
--- a/init/version.c
+++ b/init/version.c
@@ -7,6 +7,7 @@
  */
 
 #include <generated/compile.h>
+#include <linux/build-salt.h>
 #include <linux/export.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
@@ -49,3 +50,5 @@ const char linux_proc_banner[] =
 	"%s version %s"
 	" (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")"
 	" (" LINUX_COMPILER ") %s\n";
+
+BUILD_SALT;
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 1663fb19343a..dc6d714e4dcb 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2125,10 +2125,13 @@ static int check_modname_len(struct module *mod)
  **/
 static void add_header(struct buffer *b, struct module *mod)
 {
+	buf_printf(b, "#include <linux/build-salt.h>\n");
 	buf_printf(b, "#include <linux/module.h>\n");
 	buf_printf(b, "#include <linux/vermagic.h>\n");
 	buf_printf(b, "#include <linux/compiler.h>\n");
 	buf_printf(b, "\n");
+	buf_printf(b, "BUILD_SALT;\n");
+	buf_printf(b, "\n");
 	buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
 	buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n");
 	buf_printf(b, "\n");
-- 
2.17.1

^ permalink raw reply related

* [PATCHv6 2/4] x86: Add build salt to the vDSO
From: Laura Abbott @ 2018-07-06  0:49 UTC (permalink / raw)
  To: Andy Lutomirski, mjw, H . J . Lu, Masahiro Yamada
  Cc: Laura Abbott, Linus Torvalds, X86 ML, linux-kernel, Nick Clifton,
	Cary Coutant, linux-kbuild, linuxppc-dev, Michael Ellerman,
	Catalin Marinas, Will Deacon, linux-arm-kernel
In-Reply-To: <20180706004940.32587-1-labbott@redhat.com>


The vDSO needs to have a unique build id in a similar manner
to the kernel and modules. Use the build salt macro.

Acked-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Laura Abbott <labbott@redhat.com>
---
v6: Ack from Andy
---
 arch/x86/entry/vdso/vdso-note.S   | 3 +++
 arch/x86/entry/vdso/vdso32/note.S | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/arch/x86/entry/vdso/vdso-note.S b/arch/x86/entry/vdso/vdso-note.S
index 79a071e4357e..79423170118f 100644
--- a/arch/x86/entry/vdso/vdso-note.S
+++ b/arch/x86/entry/vdso/vdso-note.S
@@ -3,6 +3,7 @@
  * Here we can supply some information useful to userland.
  */
 
+#include <linux/build-salt.h>
 #include <linux/uts.h>
 #include <linux/version.h>
 #include <linux/elfnote.h>
@@ -10,3 +11,5 @@
 ELFNOTE_START(Linux, 0, "a")
 	.long LINUX_VERSION_CODE
 ELFNOTE_END
+
+BUILD_SALT
diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
index 9fd51f206314..e78047d119f6 100644
--- a/arch/x86/entry/vdso/vdso32/note.S
+++ b/arch/x86/entry/vdso/vdso32/note.S
@@ -4,6 +4,7 @@
  * Here we can supply some information useful to userland.
  */
 
+#include <linux/build-salt.h>
 #include <linux/version.h>
 #include <linux/elfnote.h>
 
@@ -14,6 +15,8 @@ ELFNOTE_START(Linux, 0, "a")
 	.long LINUX_VERSION_CODE
 ELFNOTE_END
 
+BUILD_SALT
+
 #ifdef CONFIG_XEN
 /*
  * Add a special note telling glibc's dynamic linker a fake hardware
-- 
2.17.1

^ permalink raw reply related

* [PATCHv6 3/4] powerpc: Add build salt to the vDSO
From: Laura Abbott @ 2018-07-06  0:49 UTC (permalink / raw)
  To: mjw, H . J . Lu, Masahiro Yamada, Michael Ellerman
  Cc: Laura Abbott, Andy Lutomirski, Linus Torvalds, X86 ML,
	linux-kernel, Nick Clifton, Cary Coutant, linux-kbuild,
	linuxppc-dev, Catalin Marinas, Will Deacon, linux-arm-kernel
In-Reply-To: <20180706004940.32587-1-labbott@redhat.com>

The vDSO needs to have a unique build id in a similar manner
to the kernel and modules. Use the build salt macro.

Signed-off-by: Laura Abbott <labbott@redhat.com>
---
v6: Remove semi-colon
---
 arch/powerpc/kernel/vdso32/note.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S
index d4b5be4f3d5f..227a7327399e 100644
--- a/arch/powerpc/kernel/vdso32/note.S
+++ b/arch/powerpc/kernel/vdso32/note.S
@@ -5,6 +5,7 @@
 
 #include <linux/uts.h>
 #include <linux/version.h>
+#include <linux/build-salt.h>
 
 #define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type)			      \
 	.section name, flags;						      \
@@ -23,3 +24,5 @@
 	ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
 	.long LINUX_VERSION_CODE
 	ASM_ELF_NOTE_END
+
+BUILD_SALT
-- 
2.17.1

^ permalink raw reply related

* [PATCHv6 4/4] arm64: Add build salt to the vDSO
From: Laura Abbott @ 2018-07-06  0:49 UTC (permalink / raw)
  To: mjw, H . J . Lu, Masahiro Yamada, Catalin Marinas, Will Deacon
  Cc: Laura Abbott, Andy Lutomirski, Linus Torvalds, X86 ML,
	linux-kernel, Nick Clifton, Cary Coutant, linux-kbuild,
	linuxppc-dev, Michael Ellerman, linux-arm-kernel
In-Reply-To: <20180706004940.32587-1-labbott@redhat.com>

The vDSO needs to have a unique build id in a similar manner
to the kernel and modules. Use the build salt macro.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
---
v6: Remove the semi-colon, Ack from Will
---
 arch/arm64/kernel/vdso/note.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S
index b82c85e5d972..e20483b104d9 100644
--- a/arch/arm64/kernel/vdso/note.S
+++ b/arch/arm64/kernel/vdso/note.S
@@ -22,7 +22,10 @@
 #include <linux/uts.h>
 #include <linux/version.h>
 #include <linux/elfnote.h>
+#include <linux/build-salt.h>
 
 ELFNOTE_START(Linux, 0, "a")
 	.long LINUX_VERSION_CODE
 ELFNOTE_END
+
+BUILD_SALT
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCHv3 2/4] drivers/base: utilize device tree info to shutdown devices
From: Pingfan Liu @ 2018-07-06  3:02 UTC (permalink / raw)
  To: rjw
  Cc: linux-kernel, Greg Kroah-Hartman, Rafael J . Wysocki,
	Grygorii Strashko, Christoph Hellwig, Bjorn Helgaas, Dave Young,
	linux-pci, linuxppc-dev
In-Reply-To: <2108146.dv4EAOf6IP@aspire.rjw.lan>

On Thu, Jul 5, 2018 at 6:13 PM Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>
> On Tuesday, July 3, 2018 8:50:40 AM CEST Pingfan Liu wrote:
> > commit 52cdbdd49853 ("driver core: correct device's shutdown order")
> > places an assumption of supplier<-consumer order on the process of probe.
> > But it turns out to break down the parent <- child order in some scene.
> > E.g in pci, a bridge is enabled by pci core, and behind it, the devices
> > have been probed. Then comes the bridge's module, which enables extra
> > feature(such as hotplug) on this bridge. This will break the
> > parent<-children order and cause failure when "kexec -e" in some scenario.
> >
> > The detailed description of the scenario:
> > An IBM Power9 machine on which, two drivers portdrv_pci and shpchp(a mod)
> > match the PCI_CLASS_BRIDGE_PCI, but neither of them success to probe due
> > to some issue. For this case, the bridge is moved after its children in
> > devices_kset. Then, when "kexec -e", a ata-disk behind the bridge can not
> > write back buffer in flight due to the former shutdown of the bridge which
> > clears the BusMaster bit.
> >
> > It is a little hard to impose both "parent<-child" and "supplier<-consumer"
> > order on devices_kset. Take the following scene:
> > step0: before a consumer's probing, (note child_a is supplier of consumer_a)
> >   [ consumer-X, child_a, ...., child_z] [... consumer_a, ..., consumer_z, ...] supplier-X
> >                                          ^^^^^^^^^^ affected range ^^^^^^^^^^
> > step1: when probing, moving consumer-X after supplier-X
> >   [ child_a, ...., child_z] [.... consumer_a, ..., consumer_z, ...] supplier-X, consumer-X
> > step2: the children of consumer-X should be re-ordered to maintain the seq
> >   [... consumer_a, ..., consumer_z, ....] supplier-X  [consumer-X, child_a, ...., child_z]
> > step3: the consumer_a should be re-ordered to maintain the seq
> >   [... consumer_z, ...] supplier-X [ consumer-X, child_a, consumer_a ..., child_z]
> >
> > It requires two nested recursion to drain out all out-of-order item in
> > "affected range". To avoid such complicated code, this patch suggests
> > to utilize the info in device tree, instead of using the order of
> > devices_kset during shutdown. It iterates the device tree, and firstly
> > shutdown a device's children and consumers. After this patch, the buggy
> > commit is hollow and left to clean.
> >
> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > Cc: Grygorii Strashko <grygorii.strashko@ti.com>
> > Cc: Christoph Hellwig <hch@infradead.org>
> > Cc: Bjorn Helgaas <helgaas@kernel.org>
> > Cc: Dave Young <dyoung@redhat.com>
> > Cc: linux-pci@vger.kernel.org
> > Cc: linuxppc-dev@lists.ozlabs.org
> > Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
> > ---
> >  drivers/base/core.c    | 48 +++++++++++++++++++++++++++++++++++++++++++-----
> >  include/linux/device.h |  1 +
> >  2 files changed, 44 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/base/core.c b/drivers/base/core.c
> > index a48868f..684b994 100644
> > --- a/drivers/base/core.c
> > +++ b/drivers/base/core.c
> > @@ -1446,6 +1446,7 @@ void device_initialize(struct device *dev)
> >       INIT_LIST_HEAD(&dev->links.consumers);
> >       INIT_LIST_HEAD(&dev->links.suppliers);
> >       dev->links.status = DL_DEV_NO_DRIVER;
> > +     dev->shutdown = false;
> >  }
> >  EXPORT_SYMBOL_GPL(device_initialize);
> >
> > @@ -2811,7 +2812,6 @@ static void __device_shutdown(struct device *dev)
> >        * lock is to be held
> >        */
> >       parent = get_device(dev->parent);
> > -     get_device(dev);
>
> Why is the get_/put_device() not needed any more?
>
They are moved upper layer into device_for_each_child_shutdown().
Since there is lock breakage in __device_shutdown(), resorting to
ref++ to protect the ancestor.  And I think the
get_device(dev->parent) can be deleted either.

> >       /*
> >        * Make sure the device is off the kset list, in the
> >        * event that dev->*->shutdown() doesn't remove it.
> > @@ -2842,23 +2842,60 @@ static void __device_shutdown(struct device *dev)
> >                       dev_info(dev, "shutdown\n");
> >               dev->driver->shutdown(dev);
> >       }
> > -
> > +     dev->shutdown = true;
> >       device_unlock(dev);
> >       if (parent)
> >               device_unlock(parent);
> >
> > -     put_device(dev);
> >       put_device(parent);
> >       spin_lock(&devices_kset->list_lock);
> >  }
> >
> > +/* shutdown dev's children and consumer firstly, then itself */
> > +static int device_for_each_child_shutdown(struct device *dev)
>
> Confusing name.
>
> What about device_shutdown_subordinate()?
>
Fine. My understanding of words is not exact.

> > +{
> > +     struct klist_iter i;
> > +     struct device *child;
> > +     struct device_link *link;
> > +
> > +     /* already shutdown, then skip this sub tree */
> > +     if (dev->shutdown)
> > +             return 0;
> > +
> > +     if (!dev->p)
> > +             goto check_consumers;
> > +
> > +     /* there is breakage of lock in __device_shutdown(), and the redundant
> > +      * ref++ on srcu protected consumer is harmless since shutdown is not
> > +      * hot path.
> > +      */
> > +     get_device(dev);
> > +
> > +     klist_iter_init(&dev->p->klist_children, &i);
> > +     while ((child = next_device(&i)))
> > +             device_for_each_child_shutdown(child);
>
> Why don't you use device_for_each_child() here?
>
OK, I will try use it.

> > +     klist_iter_exit(&i);
> > +
> > +check_consumers:
> > +     list_for_each_entry_rcu(link, &dev->links.consumers, s_node) {
> > +             if (!link->consumer->shutdown)
> > +                     device_for_each_child_shutdown(link->consumer);
> > +     }
> > +
> > +     __device_shutdown(dev);
> > +     put_device(dev);
>
> Possible reference counter imbalance AFAICS.
>
Yes, get_device() should be ahead of "if (!dev->p)". Is anything  else I miss?

> > +     return 0;
> > +}
>
> Well, instead of doing this dance, we might as well walk dpm_list here as it
> is in the right order.
>
Sorry, do you mean that using the same way to manage the dpm_list?

> Of course, that would require dpm_list to be available for CONFIG_PM unset,
> but it may be a better approach long term.
>
> > +
> >  /**
> >   * device_shutdown - call ->shutdown() on each device to shutdown.
> >   */
> >  void device_shutdown(void)
> >  {
> >       struct device *dev;
> > +     int idx;
> >
> > +     idx = device_links_read_lock();
> >       spin_lock(&devices_kset->list_lock);
> >       /*
> >        * Walk the devices list backward, shutting down each in turn.
> > @@ -2866,11 +2903,12 @@ void device_shutdown(void)
> >        * devices offline, even as the system is shutting down.
> >        */
> >       while (!list_empty(&devices_kset->list)) {
> > -             dev = list_entry(devices_kset->list.prev, struct device,
> > +             dev = list_entry(devices_kset->list.next, struct device,
> >                               kobj.entry);
> > -             __device_shutdown(dev);
> > +             device_for_each_child_shutdown(dev);
> >       }
> >       spin_unlock(&devices_kset->list_lock);
> > +     device_links_read_unlock(idx);
> >  }
> >
> >  /*
> > diff --git a/include/linux/device.h b/include/linux/device.h
> > index 055a69d..8a0f784 100644
> > --- a/include/linux/device.h
> > +++ b/include/linux/device.h
> > @@ -1003,6 +1003,7 @@ struct device {
> >       bool                    offline:1;
> >       bool                    of_node_reused:1;
> >       bool                    dma_32bit_limit:1;
> > +     bool                    shutdown:1; /* one direction: false->true */
> >  };
> >
> >  static inline struct device *kobj_to_dev(struct kobject *kobj)
> >
>
> If the device_kset_move_last() in really_probe() is the only problem,
> I'd rather try to fix that one in the first place.
>
> Why is it needed?
>
I had tried, but it turns out not easy to archive. The code is
https://patchwork.kernel.org/patch/10485195/. And I make a detailed
description of the algorithm in this patch's commit log. To be more
detailed, we face the potential out of order issue in really_probe()
like : 0th. [ consumer-X, child_a, ...., child_z] [... consumer_a,
..., consumer_z, ...] supplier-X //(note child_a is supplier of
consumer_a).  To address all the potential out of order item in the
affected section [... consumer_a, ..., consumer_z, ...],  it will
incur two nested recursions.  1st, moving  consumer-X and its
descendants after supplier-X,  2nd, moving consumer_a after child_a,
3rd. the 2nd step may pose the same situation of 0th.  Besides the two
interleaved recursion,  the breakage of spin lock requires more effort
to protect the item from disappearing in linked-list  (which I did not
implement in the https://patchwork.kernel.org/patch/10485195/). Hence
I turn to this cheap method.

Thanks,
Pingfan

^ permalink raw reply

* Re: [PATCH kernel v3 2/2] KVM: PPC: Check if IOMMU page is contained in the pinned physical page
From: David Gibson @ 2018-07-06  5:06 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: linuxppc-dev, kvm-ppc, Alex Williamson, Michael Ellerman,
	Paul Mackerras
In-Reply-To: <20180705151904.17de7322@aik.ozlabs.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 6662 bytes --]

On Thu, Jul 05, 2018 at 03:19:04PM +1000, Alexey Kardashevskiy wrote:
> On Thu, 5 Jul 2018 12:42:20 +1000
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Wed, Jul 04, 2018 at 03:00:52PM +1000, Alexey Kardashevskiy wrote:
> > > A VM which has:
> > >  - a DMA capable device passed through to it (eg. network card);
> > >  - running a malicious kernel that ignores H_PUT_TCE failure;
> > >  - capability of using IOMMU pages bigger that physical pages
> > > can create an IOMMU mapping that exposes (for example) 16MB of
> > > the host physical memory to the device when only 64K was allocated to the VM.
> > > 
> > > The remaining 16MB - 64K will be some other content of host memory, possibly
> > > including pages of the VM, but also pages of host kernel memory, host
> > > programs or other VMs.
> > > 
> > > The attacking VM does not control the location of the page it can map,
> > > and is only allowed to map as many pages as it has pages of RAM.
> > > 
> > > We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that
> > > an IOMMU page is contained in the physical page so the PCI hardware won't
> > > get access to unassigned host memory; however this check is missing in
> > > the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and
> > > did not hit this yet as the very first time when the mapping happens
> > > we do not have tbl::it_userspace allocated yet and fall back to
> > > the userspace which in turn calls VFIO IOMMU driver, this fails and
> > > the guest does not retry,
> > > 
> > > This stores the smallest preregistered page size in the preregistered
> > > region descriptor and changes the mm_iommu_xxx API to check this against
> > > the IOMMU page size. This only allows huge pages use if the entire
> > > preregistered block is backed with huge pages which are completely
> > > contained the preregistered chunk; otherwise this defaults to PAGE_SIZE.
> > > 
> > > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>  
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > On the grounds that I think this version is safe, which the old one
> > wasn't.  However it still has some flaws..
> > 
> > [snip]
> > > @@ -125,7 +126,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> > >  {
> > >  	struct mm_iommu_table_group_mem_t *mem;
> > >  	long i, j, ret = 0, locked_entries = 0;
> > > -	struct page *page = NULL;
> > > +	unsigned int pageshift;
> > > +	struct page *page = NULL, *head = NULL;
> > >  
> > >  	mutex_lock(&mem_list_mutex);
> > >  
> > > @@ -159,6 +161,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> > >  		goto unlock_exit;
> > >  	}
> > >  
> > > +	mem->pageshift = 64;
> > >  	mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
> > >  	if (!mem->hpas) {
> > >  		kfree(mem);
> > > @@ -199,9 +202,35 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> > >  			}
> > >  		}
> > >  populate:
> > > +		pageshift = PAGE_SHIFT;
> > > +		if (PageCompound(page)) {
> > > +			/* Make sure huge page is contained completely */
> > > +			struct page *tmphead = compound_head(page);
> > > +			unsigned int n = compound_order(tmphead);
> > > +
> > > +			if (!head) {
> > > +				/* Is it a head of a huge page? */
> > > +				if (page == tmphead) {
> > > +					head = tmphead;
> > > +					pageshift += n;
> > > +				}
> > > +			} else if (head == tmphead) {
> > > +				/* Still same huge page, good */
> > > +				pageshift += n;
> > > +
> > > +				/* End of the huge page */
> > > +				if (page - head == (1UL << n) - 1)
> > > +					head = NULL;
> > > +			}
> > > +		}
> > > +		mem->pageshift = min(mem->pageshift, pageshift);
> > >  		mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
> > >  	}
> > >  
> > > +	/* We have an incomplete huge page, default to PAGE_SHIFT */
> > > +	if (head)
> > > +		mem->pageshift = PAGE_SHIFT;
> > > +  
> > 
> > So, if the user attempts to prereg a region which starts or ends in
> > the middle of a hugepage, this logic will clamp the region's max page
> > shift down to PAGE_SHIFT.  That's safe, but not optimal.
> > 
> > Suppose userspace had an area backed with 16MiB hugepages, and wanted
> > to pre-reg a window that was 2MiB aligned, but not 16MiB aligned.  It
> > would still be safe to allow 2MiB TCEs, but the code above would clamp
> > it down to 64kiB (or 4kiB).
> > 
> > The code to do it is also pretty convoluted.
> > 
> > I think you'd be better off initializing mem->pageshift to the largest
> > possible natural alignment of the region:
> > 	mem->pageshift = ctz64(ua | (entries << PAGE_SHIFT));
> > 
> > Then it should just be sufficient to clamp pageshift down to
> > compound_order() + PAGE_SHIFT for each entry.
> 
> 
> I like this better, just one question - does hugetlbfs guarantee the @ua
> alignment if backed with an actual huge page?

So, yeah it does, as you determined.  And it has to - I don't know of
any MMU that allows for large pages that aren't naturally aligned, so
the uas would have to be aligned to actually map the pages into
userspace.

But... there's another more subtle case that I'm less sure about.
What you're actually checking for here is a compound page on the
physical side.  A hugetlbfs mapping in userspace is the main case
where I'd expect that, but, I'm not absolutely certain there can't be
some other case where a compound page is used to back a normal 64k
mapping in a user process.  If that is possible, it would probably
also be possible for the UA to end up misaligned with the compound
page's natural alignment.

I don't know of any case where that could happen, but I'm far from
confident it doesn't exist.  Things to consider:
   - mapping hugetlbfs, then mremap()ing part of it
   - a SHARED mapping, where it's aligned in one process and gets
     THPed, but is not aligned in the other
   - mmap() from a device or subsystem that provides some kind
     of IO or special memory that's handled with compound pages on the
     kernel side, but is just mapped into userspace with regular 64k
     PTEs
   - One process mapping libhugetlbfs, then another (say a debugger_
     attempting to map the first process's address space via
     /proc/pid/mem)
   - ..and that's just the ones I could think of quickly

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH kernel v4 2/2] KVM: PPC: Check if IOMMU page is contained in the pinned physical page
From: David Gibson @ 2018-07-06  5:13 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: linuxppc-dev, kvm-ppc, Alex Williamson, Michael Ellerman,
	Paul Mackerras
In-Reply-To: <20180705080133.18690-3-aik@ozlabs.ru>

[-- Attachment #1: Type: text/plain, Size: 6235 bytes --]

On Thu, Jul 05, 2018 at 06:01:33PM +1000, Alexey Kardashevskiy wrote:
> A VM which has:
>  - a DMA capable device passed through to it (eg. network card);
>  - running a malicious kernel that ignores H_PUT_TCE failure;
>  - capability of using IOMMU pages bigger that physical pages
> can create an IOMMU mapping that exposes (for example) 16MB of
> the host physical memory to the device when only 64K was allocated to
> the VM.
> 
> The remaining 16MB - 64K will be some other content of host memory,
> possibly including pages of the VM, but also pages of host kernel memory,
> host programs or other VMs.
> 
> The attacking VM does not control the location of the page it can map,
> and is only allowed to map as many pages as it has pages of RAM.
> 
> We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that
> an IOMMU page is contained in the physical page so the PCI hardware won't
> get access to unassigned host memory; however this check is missing in
> the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and
> did not hit this yet as the very first time when the mapping happens
> we do not have tbl::it_userspace allocated yet and fall back to
> the userspace which in turn calls VFIO IOMMU driver, this fails and
> the guest does not retry,
> 
> This stores the smallest preregistered page size in the preregistered
> region descriptor and changes the mm_iommu_xxx API to check this against
> the IOMMU page size. This calculates maximum page size as a minimum of
> the natural region alignment and compound page size.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

It's certainly better than what we have, though a couple of comments
still:

[snip]
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
> index abb4364..11e1029 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> @@ -27,6 +27,7 @@ struct mm_iommu_table_group_mem_t {
>  	struct rcu_head rcu;
>  	unsigned long used;
>  	atomic64_t mapped;
> +	unsigned int pageshift;
>  	u64 ua;			/* userspace address */
>  	u64 entries;		/* number of entries in hpas[] */
>  	u64 *hpas;		/* vmalloc'ed */
> @@ -125,7 +126,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
>  {
>  	struct mm_iommu_table_group_mem_t *mem;
>  	long i, j, ret = 0, locked_entries = 0;
> -	struct page *page = NULL;
> +	unsigned int pageshift;
> +	struct page *page = NULL, *head = NULL;
>  
>  	mutex_lock(&mem_list_mutex);
>  
> @@ -159,6 +161,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
>  		goto unlock_exit;
>  	}
>  
> +	mem->pageshift = __builtin_ctzl(ua | (entries << PAGE_SHIFT));

This could definitely do with a comment saying what this is trying to
calculate.

Explicitly calling a _builtin_ is also kinda horrid.  I wrote my
sample thinking of qemu where there's a standard and widely used
ctz64().  Not sure if there's something else we should be using in kernelspace.

>  	mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
>  	if (!mem->hpas) {
>  		kfree(mem);
> @@ -199,9 +202,17 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
>  			}
>  		}
>  populate:
> +		pageshift = PAGE_SHIFT;
> +		if (PageCompound(page))
> +			pageshift += compound_order(compound_head(page));

So, as I said in reply to the earlier version, I'm not 100% sure there
isn't some way a compound_page could end up mapped unaligned in
userspace (with smaller userspace mappings of a larger underlying
physical page).  A WARN_ON() and fallback to assuming pageshift =
PAGE_SHIFT in that case would probably be a good idea.

> +		mem->pageshift = min(mem->pageshift, pageshift);
>  		mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
>  	}
>  
> +	/* We have an incomplete huge page, default to PAGE_SHIFT */
> +	if (head)
> +		mem->pageshift = PAGE_SHIFT;
> +
>  	atomic64_set(&mem->mapped, 1);
>  	mem->used = 1;
>  	mem->ua = ua;
> @@ -349,7 +360,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
>  EXPORT_SYMBOL_GPL(mm_iommu_find);
>  
>  long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
> -		unsigned long ua, unsigned long *hpa)
> +		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
>  {
>  	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
>  	u64 *va = &mem->hpas[entry];
> @@ -357,6 +368,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
>  	if (entry >= mem->entries)
>  		return -EFAULT;
>  
> +	if (pageshift > mem->pageshift)
> +		return -EFAULT;
> +
>  	*hpa = *va | (ua & ~PAGE_MASK);
>  
>  	return 0;
> @@ -364,7 +378,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
>  EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
>  
>  long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
> -		unsigned long ua, unsigned long *hpa)
> +		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
>  {
>  	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
>  	void *va = &mem->hpas[entry];
> @@ -373,6 +387,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
>  	if (entry >= mem->entries)
>  		return -EFAULT;
>  
> +	if (pageshift > mem->pageshift)
> +		return -EFAULT;
> +
>  	pa = (void *) vmalloc_to_phys(va);
>  	if (!pa)
>  		return -EFAULT;
> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
> index 2da5f05..7cd63b0 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -467,7 +467,7 @@ static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
>  	if (!mem)
>  		return -EINVAL;
>  
> -	ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
> +	ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
>  	if (ret)
>  		return -EINVAL;
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox