* [PATCH v1 02/11] powerpc: Rename PPC_NATIVE to PPC_HASH_MMU_NATIVE
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
PPC_NATIVE now only controls the native HPT code, so rename it to be
more descriptive. Restrict it to Book3S only.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/mm/book3s64/Makefile | 2 +-
arch/powerpc/mm/book3s64/hash_utils.c | 2 +-
arch/powerpc/platforms/52xx/Kconfig | 2 +-
arch/powerpc/platforms/Kconfig | 4 ++--
arch/powerpc/platforms/cell/Kconfig | 2 +-
arch/powerpc/platforms/chrp/Kconfig | 2 +-
arch/powerpc/platforms/embedded6xx/Kconfig | 2 +-
arch/powerpc/platforms/maple/Kconfig | 2 +-
arch/powerpc/platforms/microwatt/Kconfig | 2 +-
arch/powerpc/platforms/pasemi/Kconfig | 2 +-
arch/powerpc/platforms/powermac/Kconfig | 2 +-
arch/powerpc/platforms/powernv/Kconfig | 2 +-
arch/powerpc/platforms/pseries/Kconfig | 2 +-
13 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 1b56d3af47d4..319f4b7f3357 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -6,7 +6,7 @@ CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
obj-y += hash_pgtable.o hash_utils.o slb.o \
mmu_context.o pgtable.o hash_tlb.o
-obj-$(CONFIG_PPC_NATIVE) += hash_native.o
+obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o
obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index c145776d3ae5..ebe3044711ce 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1091,7 +1091,7 @@ void __init hash__early_init_mmu(void)
ps3_early_mm_init();
else if (firmware_has_feature(FW_FEATURE_LPAR))
hpte_init_pseries();
- else if (IS_ENABLED(CONFIG_PPC_NATIVE))
+ else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE))
hpte_init_native();
if (!mmu_hash_ops.hpte_insert)
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index 99d60acc20c8..b72ed2950ca8 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -34,7 +34,7 @@ config PPC_EFIKA
bool "bPlan Efika 5k2. MPC5200B based computer"
depends on PPC_MPC52xx
select PPC_RTAS
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
config PPC_LITE5200
bool "Freescale Lite5200 Eval Board"
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index e02d29a9d12f..d41dad227de8 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -40,9 +40,9 @@ config EPAPR_PARAVIRT
In case of doubt, say Y
-config PPC_NATIVE
+config PPC_HASH_MMU_NATIVE
bool
- depends on PPC_BOOK3S_32 || PPC64
+ depends on PPC_BOOK3S
help
Support for running natively on the hardware, i.e. without
a hypervisor. This option is not user-selectable but should
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index cb70c5f25bc6..db4465c51b56 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -8,7 +8,7 @@ config PPC_CELL_COMMON
select PPC_DCR_MMIO
select PPC_INDIRECT_PIO
select PPC_INDIRECT_MMIO
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select PPC_RTAS
select IRQ_EDGE_EOI_HANDLER
diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig
index 9b5c5505718a..ff30ed579a39 100644
--- a/arch/powerpc/platforms/chrp/Kconfig
+++ b/arch/powerpc/platforms/chrp/Kconfig
@@ -11,6 +11,6 @@ config PPC_CHRP
select RTAS_ERROR_LOGGING
select PPC_MPC106
select PPC_UDBG_16550
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select FORCE_PCI
default y
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
index 4c6d703a4284..c54786f8461e 100644
--- a/arch/powerpc/platforms/embedded6xx/Kconfig
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -55,7 +55,7 @@ config MVME5100
select FORCE_PCI
select PPC_INDIRECT_PCI
select PPC_I8259
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select PPC_UDBG_16550
help
This option enables support for the Motorola (now Emerson) MVME5100
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
index 86ae210bee9a..7fd84311ade5 100644
--- a/arch/powerpc/platforms/maple/Kconfig
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -9,7 +9,7 @@ config PPC_MAPLE
select GENERIC_TBSYNC
select PPC_UDBG_16550
select PPC_970_NAP
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select PPC_RTAS
select MMIO_NVRAM
select ATA_NONSTANDARD if ATA
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
index 8f6a81978461..62b51e37fc05 100644
--- a/arch/powerpc/platforms/microwatt/Kconfig
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -5,7 +5,7 @@ config PPC_MICROWATT
select PPC_XICS
select PPC_ICS_NATIVE
select PPC_ICP_NATIVE
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select PPC_UDBG_16550
select ARCH_RANDOM
help
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
index c52731a7773f..bc7137353a7f 100644
--- a/arch/powerpc/platforms/pasemi/Kconfig
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -5,7 +5,7 @@ config PPC_PASEMI
select MPIC
select FORCE_PCI
select PPC_UDBG_16550
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select MPIC_BROKEN_REGREAD
help
This option enables support for PA Semi's PWRficient line
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index b97bf12801eb..2b56df145b82 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -6,7 +6,7 @@ config PPC_PMAC
select FORCE_PCI
select PPC_INDIRECT_PCI if PPC32
select PPC_MPC106 if PPC32
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select ZONE_DMA if PPC32
default y
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 043eefbbdd28..cd754e116184 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -2,7 +2,7 @@
config PPC_POWERNV
depends on PPC64 && PPC_BOOK3S
bool "IBM PowerNV (Non-Virtualized) platform support"
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select PPC_XICS
select PPC_ICP_NATIVE
select PPC_XIVE_NATIVE
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 5e037df2a3a1..69a1ff8c079b 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -17,7 +17,7 @@ config PPC_PSERIES
select PPC_RTAS_DAEMON
select RTAS_ERROR_LOGGING
select PPC_UDBG_16550
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
select PPC_DOORBELL
select HOTPLUG_CPU
select ARCH_RANDOM
--
2.23.0
^ permalink raw reply related
* [PATCH v1 03/11] powerpc/pseries: Stop selecting PPC_HASH_MMU_NATIVE
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
The pseries platform does not use the native hash code but the PAPR
virtualised hash interfaces, so remove PPC_HASH_MMU_NATIVE.
This requires moving tlbiel code from hash_native.c to hash_utils.c.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/book3s/64/tlbflush.h | 4 -
arch/powerpc/mm/book3s64/hash_native.c | 104 ------------------
arch/powerpc/mm/book3s64/hash_utils.c | 104 ++++++++++++++++++
arch/powerpc/platforms/pseries/Kconfig | 1 -
4 files changed, 104 insertions(+), 109 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 215973b4cb26..d2e80f178b6d 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -14,7 +14,6 @@ enum {
TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */
};
-#ifdef CONFIG_PPC_NATIVE
static inline void tlbiel_all(void)
{
/*
@@ -30,9 +29,6 @@ static inline void tlbiel_all(void)
else
hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
}
-#else
-static inline void tlbiel_all(void) { BUG(); }
-#endif
static inline void tlbiel_all_lpid(bool radix)
{
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index d8279bfe68ea..d2a320828c0b 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -43,110 +43,6 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
-{
- unsigned long rb;
-
- rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
-
- asm volatile("tlbiel %0" : : "r" (rb));
-}
-
-/*
- * tlbiel instruction for hash, set invalidation
- * i.e., r=1 and is=01 or is=10 or is=11
- */
-static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
- unsigned int pid,
- unsigned int ric, unsigned int prs)
-{
- unsigned long rb;
- unsigned long rs;
- unsigned int r = 0; /* hash format */
-
- rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
- rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
- : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
- : "memory");
-}
-
-
-static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
-{
- unsigned int set;
-
- asm volatile("ptesync": : :"memory");
-
- for (set = 0; set < num_sets; set++)
- tlbiel_hash_set_isa206(set, is);
-
- ppc_after_tlbiel_barrier();
-}
-
-static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
-{
- unsigned int set;
-
- asm volatile("ptesync": : :"memory");
-
- /*
- * Flush the partition table cache if this is HV mode.
- */
- if (early_cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_hash_set_isa300(0, is, 0, 2, 0);
-
- /*
- * Now invalidate the process table cache. UPRT=0 HPT modes (what
- * current hardware implements) do not use the process table, but
- * add the flushes anyway.
- *
- * From ISA v3.0B p. 1078:
- * The following forms are invalid.
- * * PRS=1, R=0, and RIC!=2 (The only process-scoped
- * HPT caching is of the Process Table.)
- */
- tlbiel_hash_set_isa300(0, is, 0, 2, 1);
-
- /*
- * Then flush the sets of the TLB proper. Hash mode uses
- * partition scoped TLB translations, which may be flushed
- * in !HV mode.
- */
- for (set = 0; set < num_sets; set++)
- tlbiel_hash_set_isa300(set, is, 0, 0, 0);
-
- ppc_after_tlbiel_barrier();
-
- asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
-}
-
-void hash__tlbiel_all(unsigned int action)
-{
- unsigned int is;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- is = 3;
- break;
- case TLB_INVAL_SCOPE_LPID:
- is = 2;
- break;
- default:
- BUG();
- }
-
- if (early_cpu_has_feature(CPU_FTR_ARCH_300))
- tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
- else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
- tlbiel_all_isa206(POWER8_TLB_SETS, is);
- else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
- tlbiel_all_isa206(POWER7_TLB_SETS, is);
- else
- WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
-}
-
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index ebe3044711ce..ffc52ff0b3f0 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -175,6 +175,110 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
},
};
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+ unsigned long rb;
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+ asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+ unsigned int r = 0; /* hash format */
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
+ : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa206(set, is);
+
+ ppc_after_tlbiel_barrier();
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the partition table cache if this is HV mode.
+ */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+
+ /*
+ * Now invalidate the process table cache. UPRT=0 HPT modes (what
+ * current hardware implements) do not use the process table, but
+ * add the flushes anyway.
+ *
+ * From ISA v3.0B p. 1078:
+ * The following forms are invalid.
+ * * PRS=1, R=0, and RIC!=2 (The only process-scoped
+ * HPT caching is of the Process Table.)
+ */
+ tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+ /*
+ * Then flush the sets of the TLB proper. Hash mode uses
+ * partition scoped TLB translations, which may be flushed
+ * in !HV mode.
+ */
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+ ppc_after_tlbiel_barrier();
+
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ tlbiel_all_isa206(POWER8_TLB_SETS, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+ tlbiel_all_isa206(POWER7_TLB_SETS, is);
+ else
+ WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+}
+
/*
* 'R' and 'C' update notes:
* - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 69a1ff8c079b..98ab9697ab5e 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -17,7 +17,6 @@ config PPC_PSERIES
select PPC_RTAS_DAEMON
select RTAS_ERROR_LOGGING
select PPC_UDBG_16550
- select PPC_HASH_MMU_NATIVE
select PPC_DOORBELL
select HOTPLUG_CPU
select ARCH_RANDOM
--
2.23.0
^ permalink raw reply related
* [PATCH v1 04/11] powerpc/64s: Move and rename do_bad_slb_fault as it is not hash specific
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
slb.c is hash-specific SLB management, but do_bad_slb_fault deals with
segment interrupts that occur with radix MMU as well.
---
arch/powerpc/include/asm/interrupt.h | 2 +-
arch/powerpc/kernel/exceptions-64s.S | 4 ++--
arch/powerpc/mm/book3s64/slb.c | 16 ----------------
arch/powerpc/mm/fault.c | 17 +++++++++++++++++
4 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index a1d238255f07..3487aab12229 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -564,7 +564,7 @@ DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
/* slb.c */
DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault);
-DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt);
/* hash_utils.c */
DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eaf1f72131a1..046c99e31d01 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1430,7 +1430,7 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
+ bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1510,7 +1510,7 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
+ bl do_bad_segment_interrupt
b interrupt_return_srr
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index f0037bcc47a0..31f4cef3adac 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -868,19 +868,3 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
return err;
}
}
-
-DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)
-{
- int err = regs->result;
-
- if (err == -EFAULT) {
- if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
- else
- bad_page_fault(regs, SIGSEGV);
- } else if (err == -EINVAL) {
- unrecoverable_exception(regs);
- } else {
- BUG();
- }
-}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a8d0ce85d39a..53ddcae0ac9e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -35,6 +35,7 @@
#include <linux/kfence.h>
#include <linux/pkeys.h>
+#include <asm/asm-prototypes.h>
#include <asm/firmware.h>
#include <asm/interrupt.h>
#include <asm/page.h>
@@ -620,4 +621,20 @@ DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv)
{
bad_page_fault(regs, SIGSEGV);
}
+
+DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt)
+{
+ int err = regs->result;
+
+ if (err == -EFAULT) {
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
+ else
+ bad_page_fault(regs, SIGSEGV);
+ } else if (err == -EINVAL) {
+ unrecoverable_exception(regs);
+ } else {
+ BUG();
+ }
+}
#endif
--
2.23.0
^ permalink raw reply related
* [PATCH v1 05/11] powerpc/pseries: move pseries_lpar_register_process_table() out from hash specific code
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
This reduces ifdefs in a later change making hash support configurable.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/platforms/pseries/lpar.c | 56 +++++++++++++--------------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3df6bdfea475..06d6a824c0dc 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -712,6 +712,34 @@ void vpa_init(int cpu)
#ifdef CONFIG_PPC_BOOK3S_64
+static int pseries_lpar_register_process_table(unsigned long base,
+ unsigned long page_size, unsigned long table_size)
+{
+ long rc;
+ unsigned long flags = 0;
+
+ if (table_size)
+ flags |= PROC_TABLE_NEW;
+ if (radix_enabled()) {
+ flags |= PROC_TABLE_RADIX;
+ if (mmu_has_feature(MMU_FTR_GTSE))
+ flags |= PROC_TABLE_GTSE;
+ } else
+ flags |= PROC_TABLE_HPT_SLB;
+ for (;;) {
+ rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
+ page_size, table_size);
+ if (!H_IS_LONG_BUSY(rc))
+ break;
+ mdelay(get_longbusy_msecs(rc));
+ }
+ if (rc != H_SUCCESS) {
+ pr_err("Failed to register process table (rc=%ld)\n", rc);
+ BUG();
+ }
+ return rc;
+}
+
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
@@ -1680,34 +1708,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
return 0;
}
-static int pseries_lpar_register_process_table(unsigned long base,
- unsigned long page_size, unsigned long table_size)
-{
- long rc;
- unsigned long flags = 0;
-
- if (table_size)
- flags |= PROC_TABLE_NEW;
- if (radix_enabled()) {
- flags |= PROC_TABLE_RADIX;
- if (mmu_has_feature(MMU_FTR_GTSE))
- flags |= PROC_TABLE_GTSE;
- } else
- flags |= PROC_TABLE_HPT_SLB;
- for (;;) {
- rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
- page_size, table_size);
- if (!H_IS_LONG_BUSY(rc))
- break;
- mdelay(get_longbusy_msecs(rc));
- }
- if (rc != H_SUCCESS) {
- pr_err("Failed to register process table (rc=%ld)\n", rc);
- BUG();
- }
- return rc;
-}
-
void __init hpte_init_pseries(void)
{
mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
--
2.23.0
^ permalink raw reply related
* [PATCH v1 06/11] powerpc/pseries: lparcfg don't include slb_size line in radix mode
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
This avoids a change in behaviour in the later patch making hash
support configurable. This is possibly a user interface change, so
the alternative would be a hard-coded slb_size=0 here.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/platforms/pseries/lparcfg.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index f71eac74ea92..3354c00914fa 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -532,7 +532,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
lppaca_shared_proc(get_lppaca()));
#ifdef CONFIG_PPC_BOOK3S_64
- seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+ if (!radix_enabled())
+ seq_printf(m, "slb_size=%d\n", mmu_slb_size);
#endif
parse_em_data(m);
maxmem_data(m);
--
2.23.0
^ permalink raw reply related
* [PATCH v1 07/11] powerpc/64s: move THP trace point creation out of hash specific file
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
In preparation for making hash MMU support configurable, move THP
trace point function definitions out of an otherwise hash specific
file.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/mm/book3s64/Makefile | 2 +-
arch/powerpc/mm/book3s64/hash_pgtable.c | 1 -
arch/powerpc/mm/book3s64/pgtable.c | 1 +
arch/powerpc/mm/book3s64/trace.c | 8 ++++++++
4 files changed, 10 insertions(+), 2 deletions(-)
create mode 100644 arch/powerpc/mm/book3s64/trace.c
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 319f4b7f3357..1579e18e098d 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -5,7 +5,7 @@ ccflags-y := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
obj-y += hash_pgtable.o hash_utils.o slb.o \
- mmu_context.o pgtable.o hash_tlb.o
+ mmu_context.o pgtable.o hash_tlb.o trace.o
obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o
obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index ad5eff097d31..7ce8914992e3 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -16,7 +16,6 @@
#include <mm/mmu_decl.h>
-#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 9e16c7b1a6c5..049843c8c875 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -28,6 +28,7 @@ unsigned long __pmd_frag_size_shift;
EXPORT_SYMBOL(__pmd_frag_size_shift);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
/*
* This is called when relaxing access to a hugepage. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
diff --git a/arch/powerpc/mm/book3s64/trace.c b/arch/powerpc/mm/book3s64/trace.c
new file mode 100644
index 000000000000..b86e7b906257
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/trace.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file is for defining trace points and trace related helpers.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define CREATE_TRACE_POINTS
+#include <trace/events/thp.h>
+#endif
--
2.23.0
^ permalink raw reply related
* [PATCH v1 08/11] powerpc/64s: Make flush_and_reload_slb a no-op when radix is enabled
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
The radix test can exclude slb_flush_all_realmode() from being called
because flush_and_reload_slb() is only expected to flush ERAT when
called by flush_erat(), which is only on pre-ISA v3.0 CPUs that do not
support radix.
This helps the later change to make hash support configurable to not
introduce runtime changes to radix mode behaviour.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kernel/mce_power.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c2f55fe7092d..cf5263b648fc 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -80,12 +80,12 @@ static bool mce_in_guest(void)
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void)
{
- /* Invalidate all SLBs */
- slb_flush_all_realmode();
-
if (early_radix_enabled())
return;
+ /* Invalidate all SLBs */
+ slb_flush_all_realmode();
+
/*
* This probably shouldn't happen, but it may be possible it's
* called in early boot before SLB shadows are allocated.
--
2.23.0
^ permalink raw reply related
* [PATCH v1 09/11] powerpc/64s: Make hash MMU code build configurable
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
Introduce a new option CONFIG_PPC_64S_HASH_MMU which allows the 64s hash
MMU code to be compiled out if radix is selected and the minimum
supported CPU type is POWER9 or higher, and KVM is not selected.
This saves 128kB kernel image size (90kB text) on powernv_defconfig
minus KVM, 350kB on pseries_defconfig minus KVM, 40kB on a tiny config.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/book3s/64/mmu.h | 22 ++++++++++++++++++-
.../include/asm/book3s/64/tlbflush-hash.h | 7 ++++++
arch/powerpc/include/asm/book3s/pgtable.h | 4 ++++
arch/powerpc/include/asm/mmu.h | 14 +++++++++---
arch/powerpc/include/asm/mmu_context.h | 2 ++
arch/powerpc/include/asm/paca.h | 8 +++++++
arch/powerpc/kernel/asm-offsets.c | 2 ++
arch/powerpc/kernel/dt_cpu_ftrs.c | 8 ++++++-
arch/powerpc/kernel/entry_64.S | 4 ++--
arch/powerpc/kernel/exceptions-64s.S | 16 ++++++++++++++
arch/powerpc/kernel/mce.c | 2 +-
arch/powerpc/kernel/mce_power.c | 10 ++++++---
arch/powerpc/kernel/paca.c | 18 ++++++---------
arch/powerpc/kernel/process.c | 13 ++++++-----
arch/powerpc/kernel/prom.c | 2 ++
arch/powerpc/kernel/setup_64.c | 4 ++++
arch/powerpc/kexec/core_64.c | 4 ++--
arch/powerpc/kexec/ranges.c | 4 ++++
arch/powerpc/kvm/Kconfig | 1 +
arch/powerpc/mm/book3s64/Makefile | 17 ++++++++------
arch/powerpc/mm/book3s64/hash_utils.c | 10 ---------
.../{hash_hugetlbpage.c => hugetlbpage.c} | 6 +++++
arch/powerpc/mm/book3s64/mmu_context.c | 16 ++++++++++++++
arch/powerpc/mm/book3s64/pgtable.c | 12 ++++++++++
arch/powerpc/mm/book3s64/radix_pgtable.c | 4 ++++
arch/powerpc/mm/copro_fault.c | 2 ++
arch/powerpc/mm/pgtable.c | 10 ++++++---
arch/powerpc/platforms/Kconfig.cputype | 21 +++++++++++++++++-
arch/powerpc/platforms/cell/Kconfig | 1 +
arch/powerpc/platforms/maple/Kconfig | 1 +
arch/powerpc/platforms/microwatt/Kconfig | 2 +-
arch/powerpc/platforms/pasemi/Kconfig | 1 +
arch/powerpc/platforms/powermac/Kconfig | 1 +
arch/powerpc/platforms/powernv/Kconfig | 2 +-
arch/powerpc/platforms/powernv/idle.c | 2 ++
arch/powerpc/platforms/powernv/setup.c | 2 ++
arch/powerpc/platforms/pseries/lpar.c | 11 ++++++++--
arch/powerpc/platforms/pseries/lparcfg.c | 2 +-
arch/powerpc/platforms/pseries/mobility.c | 6 +++++
arch/powerpc/platforms/pseries/ras.c | 2 ++
arch/powerpc/platforms/pseries/reconfig.c | 2 ++
arch/powerpc/platforms/pseries/setup.c | 6 +++--
arch/powerpc/xmon/xmon.c | 8 +++++--
44 files changed, 233 insertions(+), 60 deletions(-)
rename arch/powerpc/mm/book3s64/{hash_hugetlbpage.c => hugetlbpage.c} (95%)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ba5b66189358..3c36511e6133 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -936,6 +936,7 @@ config PPC_MEM_KEYS
prompt "PowerPC Memory Protection Keys"
def_bool y
depends on PPC_BOOK3S_64
+ depends on PPC_64S_HASH_MMU
select ARCH_USES_HIGH_VMA_FLAGS
select ARCH_HAS_PKEYS
help
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index c02f42d1031e..857dc88b0043 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -98,7 +98,9 @@ typedef struct {
* from EA and new context ids to build the new VAs.
*/
mm_context_id_t id;
+#ifdef CONFIG_PPC_64S_HASH_MMU
mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+#endif
};
/* Number of bits in the mm_cpumask */
@@ -110,7 +112,9 @@ typedef struct {
/* Number of user space windows opened in process mm_context */
atomic_t vas_windows;
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct hash_mm_context *hash_context;
+#endif
void __user *vdso;
/*
@@ -133,6 +137,7 @@ typedef struct {
#endif
} mm_context_t;
+#ifdef CONFIG_PPC_64S_HASH_MMU
static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
{
return ctx->hash_context->user_psize;
@@ -187,11 +192,22 @@ static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
}
#endif
+#endif
+
/*
* The current system page and segment sizes
*/
-extern int mmu_linear_psize;
+#if defined(CONFIG_PPC_RADIX_MMU) && !defined(CONFIG_PPC_64S_HASH_MMU)
+#ifdef CONFIG_PPC_64K_PAGES
+#define mmu_virtual_psize MMU_PAGE_64K
+#else
+#define mmu_virtual_psize MMU_PAGE_4K
+#endif
+#else
extern int mmu_virtual_psize;
+#endif
+
+extern int mmu_linear_psize;
extern int mmu_vmalloc_psize;
extern int mmu_vmemmap_psize;
extern int mmu_io_psize;
@@ -228,6 +244,7 @@ extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* Hash has more strict restrictions. At this point we don't
* know which translations we will pick. Hence go with hash
@@ -235,6 +252,7 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
*/
return hash__setup_initial_memory_limit(first_memblock_base,
first_memblock_size);
+#endif
}
#ifdef CONFIG_PPC_PSERIES
@@ -255,6 +273,7 @@ static inline void radix_init_pseries(void) { }
void cleanup_cpu_mmu_context(void);
#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
{
int index = ea >> MAX_EA_BITS_PER_CONTEXT;
@@ -274,6 +293,7 @@ static inline unsigned long get_user_vsid(mm_context_t *ctx,
return get_vsid(context, ea, ssize);
}
+#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 3b95769739c7..06f4bd09eecf 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -112,8 +112,15 @@ static inline void hash__flush_tlb_kernel_range(unsigned long start,
struct mmu_gather;
extern void hash__tlb_flush(struct mmu_gather *tlb);
+extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Private function for use by PCI IO mapping code */
extern void __flush_hash_table_range(unsigned long start, unsigned long end);
extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr);
+#else
+static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { }
+#endif
#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index ad130e15a126..818d7c851d36 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -25,6 +25,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
#define __HAVE_PHYS_MEM_ACCESS_PROT
+#if defined(CONFIG_PPC_HASH_MMU_NATIVE) || defined(CONFIG_PPC_64S_HASH_MMU)
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
@@ -35,6 +36,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
* waiting for the inevitable extra hash-table miss exception.
*/
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+#else
+static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {}
+#endif
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 8abe8e42e045..0f89fcab834d 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -157,7 +157,7 @@ DECLARE_PER_CPU(int, next_tlbcam_idx);
enum {
MMU_FTRS_POSSIBLE =
-#if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604)
+#if defined(CONFIG_PPC_BOOK3S_604)
MMU_FTR_HPTE_TABLE |
#endif
#ifdef CONFIG_PPC_8xx
@@ -184,15 +184,18 @@ enum {
MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS |
#endif
#ifdef CONFIG_PPC_BOOK3S_64
+ MMU_FTR_KERNEL_RO |
+#ifdef CONFIG_PPC_64S_HASH_MMU
MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
- MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA |
+ MMU_FTR_68_BIT_VA | MMU_FTR_HPTE_TABLE |
#endif
#ifdef CONFIG_PPC_RADIX_MMU
MMU_FTR_TYPE_RADIX |
MMU_FTR_GTSE |
#endif /* CONFIG_PPC_RADIX_MMU */
+#endif
#ifdef CONFIG_PPC_KUAP
MMU_FTR_BOOK3S_KUAP |
#endif /* CONFIG_PPC_KUAP */
@@ -223,6 +226,11 @@ enum {
#ifdef CONFIG_E500
#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+#if defined(CONFIG_PPC_RADIX_MMU) && !defined(CONFIG_PPC_64S_HASH_MMU)
+#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_RADIX
+#endif
+#endif
#ifndef MMU_FTRS_ALWAYS
#define MMU_FTRS_ALWAYS 0
@@ -329,7 +337,7 @@ static __always_inline bool radix_enabled(void)
return mmu_has_feature(MMU_FTR_TYPE_RADIX);
}
-static inline bool early_radix_enabled(void)
+static __always_inline bool early_radix_enabled(void)
{
return early_mmu_has_feature(MMU_FTR_TYPE_RADIX);
}
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 9ba6b585337f..e46394d27785 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -75,6 +75,7 @@ extern void hash__reserve_context_id(int id);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }
+#ifdef CONFIG_PPC_64S_HASH_MMU
static inline int alloc_extended_context(struct mm_struct *mm,
unsigned long ea)
{
@@ -100,6 +101,7 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
return true;
return false;
}
+#endif
#else
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index dc05a862e72a..295573a82c66 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -97,7 +97,9 @@ struct paca_struct {
/* this becomes non-zero. */
u8 kexec_state; /* set when kexec down has irqs off */
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct slb_shadow *slb_shadow_ptr;
+#endif
struct dtl_entry *dispatch_log;
struct dtl_entry *dispatch_log_end;
#endif
@@ -110,6 +112,7 @@ struct paca_struct {
/* used for most interrupts/exceptions */
u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* SLB related definitions */
u16 vmalloc_sllp;
u8 slb_cache_ptr;
@@ -120,6 +123,7 @@ struct paca_struct {
u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */
u32 slb_kern_bitmap;
u32 slb_cache[SLB_CACHE_ENTRIES];
+#endif
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_BOOK3E
@@ -149,6 +153,7 @@ struct paca_struct {
#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_64S_HASH_MMU
#ifdef CONFIG_PPC_MM_SLICES
unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
@@ -156,6 +161,7 @@ struct paca_struct {
u16 mm_ctx_user_psize;
u16 mm_ctx_sllp;
#endif
+#endif
#endif
/*
@@ -268,9 +274,11 @@ struct paca_struct {
#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Capture SLB related old contents in MCE handler. */
struct slb_entry *mce_faulty_slbs;
u16 slb_save_cache_ptr;
+#endif
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_STACKPROTECTOR
unsigned long canary;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e563d3222d69..c54fdfcfd72b 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -220,10 +220,12 @@ int main(void)
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_64S_HASH_MMU
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
+#endif
OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 358aee7c2d79..2335e5a4e4e3 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -269,6 +269,7 @@ static int __init feat_enable_idle_stop(struct dt_cpu_feature *f)
static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
{
+#ifdef CONFIG_PPC_64S_HASH_MMU
u64 lpcr;
lpcr = mfspr(SPRN_LPCR);
@@ -284,10 +285,13 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
return 1;
+#endif
+ return 0;
}
static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
+#ifdef CONFIG_PPC_64S_HASH_MMU
u64 lpcr;
lpcr = mfspr(SPRN_LPCR);
@@ -298,14 +302,16 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
return 1;
+#endif
+ return 0;
}
static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
{
#ifdef CONFIG_PPC_RADIX_MMU
+ cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO;
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
- cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 70cff7b49e17..9581906b5ee9 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -180,7 +180,7 @@ _GLOBAL(_switch)
#endif
ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
b 2f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
@@ -232,7 +232,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
slbmte r7,r0
isync
2:
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 046c99e31d01..65b695e9401e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1369,11 +1369,15 @@ EXC_COMMON_BEGIN(data_access_common)
addi r3,r1,STACK_FRAME_OVERHEAD
andis. r0,r4,DSISR_DABRMATCH@h
bne- 1f
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl do_page_fault
+#endif
b interrupt_return_srr
1: bl do_break
@@ -1416,6 +1420,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
EXC_COMMON_BEGIN(data_access_slb_common)
GEN_COMMON data_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1428,6 +1433,9 @@ MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_segment_interrupt
@@ -1462,11 +1470,15 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80)
EXC_COMMON_BEGIN(instruction_access_common)
GEN_COMMON instruction_access
addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl do_page_fault
+#endif
b interrupt_return_srr
@@ -1496,6 +1508,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
EXC_COMMON_BEGIN(instruction_access_slb_common)
GEN_COMMON instruction_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1508,6 +1521,9 @@ MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_segment_interrupt
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd829f7f25a4..2503dd4713b9 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -586,7 +586,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Display faulty slb contents for SLB errors. */
if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
slb_dump_contents(local_paca->mce_faulty_slbs);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index cf5263b648fc..a48ff18d6d65 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -77,7 +77,7 @@ static bool mce_in_guest(void)
}
/* flush SLBs and reload */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
void flush_and_reload_slb(void)
{
if (early_radix_enabled())
@@ -99,7 +99,7 @@ void flush_and_reload_slb(void)
void flush_erat(void)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
flush_and_reload_slb();
return;
@@ -114,7 +114,7 @@ void flush_erat(void)
static int mce_flush(int what)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
return 1;
@@ -499,8 +499,10 @@ static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -588,8 +590,10 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 9bd30cac852b..813930374d24 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -139,8 +139,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
}
#endif /* CONFIG_PPC_PSERIES */
-#ifdef CONFIG_PPC_BOOK3S_64
-
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
@@ -169,8 +168,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
return s;
}
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
#ifdef CONFIG_PPC_PSERIES
/**
@@ -226,7 +224,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
new_paca->slb_shadow_ptr = NULL;
#endif
@@ -307,7 +305,7 @@ void __init allocate_paca(int cpu)
#ifdef CONFIG_PPC_PSERIES
paca->lppaca_ptr = new_lppaca(cpu, limit);
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
#endif
#ifdef CONFIG_PPC_PSERIES
@@ -328,7 +326,7 @@ void __init free_unused_pacas(void)
paca_nr_cpu_ids = nr_cpu_ids;
paca_ptrs_size = new_ptrs_size;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (early_radix_enabled()) {
/* Ugly fixup, see new_slb_shadow() */
memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
@@ -341,9 +339,9 @@ void __init free_unused_pacas(void)
paca_ptrs_size + paca_struct_size, nr_cpu_ids);
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
void copy_mm_to_paca(struct mm_struct *mm)
{
-#ifdef CONFIG_PPC_BOOK3S
mm_context_t *context = &mm->context;
#ifdef CONFIG_PPC_MM_SLICES
@@ -356,7 +354,5 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
#endif
-#else /* !CONFIG_PPC_BOOK3S */
- return;
-#endif
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 50436b52c213..48482aaa9388 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1206,7 +1206,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
{
struct thread_struct *new_thread, *old_thread;
struct task_struct *last;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct ppc64_tlb_batch *batch;
#endif
@@ -1215,7 +1215,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
WARN_ON(!irqs_disabled());
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1294,6 +1294,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
*/
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* This applies to a process that was context switched while inside
* arch_enter_lazy_mmu_mode(), to re-activate the batch that was
@@ -1305,6 +1306,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
+#endif
/*
* Math facilities are masked out of the child MSR in copy_thread.
@@ -1655,7 +1657,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -2302,10 +2304,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
* the heap, we can put it above 1TB so it is backed by a 1TB
* segment. Otherwise the heap will be in the bottom 1TB
* which always uses 256MB segments and this may result in a
- * performance penalty. We don't need to worry about radix. For
- * radix, mmu_highuser_ssize remains unchanged from 256MB.
+ * performance penalty.
*/
- if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
+ if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 2e67588f6f6e..2197404cdcc4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -234,6 +234,7 @@ static void __init check_cpu_pa_features(unsigned long node)
#ifdef CONFIG_PPC_BOOK3S_64
static void __init init_mmu_slb_size(unsigned long node)
{
+#ifdef CONFIG_PPC_64S_HASH_MMU
const __be32 *slb_size_ptr;
slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL) ? :
@@ -241,6 +242,7 @@ static void __init init_mmu_slb_size(unsigned long node)
if (slb_size_ptr)
mmu_slb_size = be32_to_cpup(slb_size_ptr);
+#endif
}
#else
#define init_mmu_slb_size(node) do { } while(0)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eaa79a0996d1..4f67b26bf597 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -879,6 +879,7 @@ void __init setup_per_cpu_areas(void)
unsigned int cpu;
int rc = -EINVAL;
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* Linear mapping is one of 4K, 1M and 16M. For 4K, no need
* to group units. For larger mappings, use 1M atom which
@@ -888,6 +889,9 @@ void __init setup_per_cpu_areas(void)
atom_size = PAGE_SIZE;
else
atom_size = 1 << 20;
+#else
+ atom_size = PAGE_SIZE;
+#endif
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 89c069d664a5..90b45613b194 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -378,7 +378,7 @@ void default_machine_kexec(struct kimage *image)
/* NOTREACHED */
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Values we need to export to the second kernel via the device tree. */
static unsigned long htab_base;
static unsigned long htab_size;
@@ -420,4 +420,4 @@ static int __init export_htab_values(void)
return 0;
}
late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 6b81c852feab..92d831621fa0 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -306,10 +306,14 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges)
*/
int add_htab_mem_range(struct crash_mem **mem_ranges)
{
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!htab_address)
return 0;
return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
+#else
+ return 0;
+#endif
}
#endif
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index ff581d70f20c..4801424614be 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -69,6 +69,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
+ select PPC_64S_HASH_MMU
select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
help
Support running unmodified book3s_64 and book3s_32 guest kernels
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 1579e18e098d..2d50cac499c5 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -2,20 +2,23 @@
ccflags-y := $(NO_MINIMAL_TOC)
+obj-y += mmu_context.o pgtable.o trace.o
+ifdef CONFIG_PPC_64S_HASH_MMU
CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
-
-obj-y += hash_pgtable.o hash_utils.o slb.o \
- mmu_context.o pgtable.o hash_tlb.o trace.o
+obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o
obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o
-obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
-obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
+obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
+endif
+
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
endif
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
-obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o
obj-$(CONFIG_PPC_PKEY) += pkeys.o
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index ffc52ff0b3f0..2bf0c112b16c 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -99,8 +99,6 @@
*/
static unsigned long _SDR1;
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
-EXPORT_SYMBOL_GPL(mmu_psize_defs);
u8 hpte_page_sizes[1 << LP_BITS];
EXPORT_SYMBOL_GPL(hpte_page_sizes);
@@ -109,15 +107,7 @@ struct hash_pte *htab_address;
unsigned long htab_size_bytes;
unsigned long htab_hash_mask;
EXPORT_SYMBOL_GPL(htab_hash_mask);
-int mmu_linear_psize = MMU_PAGE_4K;
-EXPORT_SYMBOL_GPL(mmu_linear_psize);
int mmu_virtual_psize = MMU_PAGE_4K;
-int mmu_vmalloc_psize = MMU_PAGE_4K;
-EXPORT_SYMBOL_GPL(mmu_vmalloc_psize);
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int mmu_vmemmap_psize = MMU_PAGE_4K;
-#endif
-int mmu_io_psize = MMU_PAGE_4K;
int mmu_kernel_ssize = MMU_SEGSIZE_256M;
EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
int mmu_highuser_ssize = MMU_SEGSIZE_256M;
diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c
similarity index 95%
rename from arch/powerpc/mm/book3s64/hash_hugetlbpage.c
rename to arch/powerpc/mm/book3s64/hugetlbpage.c
index a688e1324ae5..d185c14802aa 100644
--- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/hugetlbpage.c
@@ -16,6 +16,11 @@
unsigned int hpage_shift;
EXPORT_SYMBOL(hpage_shift);
+#ifdef CONFIG_PPC_64S_HASH_MMU
+extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
+ unsigned long pa, unsigned long rlags,
+ unsigned long vflags, int psize, int ssize);
+
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, unsigned int shift, unsigned int mmu_psize)
@@ -122,6 +127,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
+#endif
pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index c10fc8a72fb3..642cabc25e99 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -31,6 +31,7 @@ static int alloc_context_id(int min_id, int max_id)
return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
void hash__reserve_context_id(int id)
{
int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
@@ -50,7 +51,9 @@ int hash__alloc_context_id(void)
return alloc_context_id(MIN_USER_CONTEXT, max);
}
EXPORT_SYMBOL_GPL(hash__alloc_context_id);
+#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
static int realloc_context_ids(mm_context_t *ctx)
{
int i, id;
@@ -144,12 +147,15 @@ static int hash__init_new_context(struct mm_struct *mm)
return index;
}
+void slb_setup_new_exec(void);
+
void hash__setup_new_exec(void)
{
slice_setup_new_exec();
slb_setup_new_exec();
}
+#endif
static int radix__init_new_context(struct mm_struct *mm)
{
@@ -175,7 +181,9 @@ static int radix__init_new_context(struct mm_struct *mm)
*/
asm volatile("ptesync;isync" : : : "memory");
+#ifdef CONFIG_PPC_64S_HASH_MMU
mm->context.hash_context = NULL;
+#endif
return index;
}
@@ -186,8 +194,10 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
if (radix_enabled())
index = radix__init_new_context(mm);
+#ifdef CONFIG_PPC_64S_HASH_MMU
else
index = hash__init_new_context(mm);
+#endif
if (index < 0)
return index;
@@ -211,6 +221,7 @@ void __destroy_context(int context_id)
}
EXPORT_SYMBOL_GPL(__destroy_context);
+#ifdef CONFIG_PPC_64S_HASH_MMU
static void destroy_contexts(mm_context_t *ctx)
{
int index, context_id;
@@ -222,6 +233,7 @@ static void destroy_contexts(mm_context_t *ctx)
}
kfree(ctx->hash_context);
}
+#endif
static void pmd_frag_destroy(void *pmd_frag)
{
@@ -274,7 +286,11 @@ void destroy_context(struct mm_struct *mm)
process_tb[mm->context.id].prtb0 = 0;
else
subpage_prot_free(mm);
+#ifdef CONFIG_PPC_64S_HASH_MMU
destroy_contexts(&mm->context);
+#else
+ ida_free(&mmu_context_ida, mm->context.id);
+#endif
mm->context.id = MMU_NO_CONTEXT;
}
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 049843c8c875..6e68513bcda3 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -22,6 +22,18 @@
#include "internal.h"
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+
+int mmu_linear_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_linear_psize);
+int mmu_vmalloc_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_vmalloc_psize);
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int mmu_vmemmap_psize = MMU_PAGE_4K;
+#endif
+int mmu_io_psize = MMU_PAGE_4K;
+
unsigned long __pmd_frag_nr;
EXPORT_SYMBOL(__pmd_frag_nr);
unsigned long __pmd_frag_size_shift;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index ae20add7954a..123146d1500f 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -334,8 +334,10 @@ static void __init radix_init_pgtable(void)
phys_addr_t start, end;
u64 i;
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* We don't support slb for radix */
mmu_slb_size = 0;
+#endif
/*
* Create the linear mapping
@@ -588,6 +590,7 @@ void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
+#ifdef CONFIG_PPC_64S_HASH_MMU
#ifdef CONFIG_PPC_64K_PAGES
/* PAGE_SIZE mappings */
mmu_virtual_psize = MMU_PAGE_64K;
@@ -604,6 +607,7 @@ void __init radix__early_init_mmu(void)
mmu_vmemmap_psize = MMU_PAGE_2M;
} else
mmu_vmemmap_psize = mmu_virtual_psize;
+#endif
#endif
/*
* initialize page table size
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 8acd00178956..c1cb21a00884 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -82,6 +82,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
}
EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
+#ifdef CONFIG_PPC_64S_HASH_MMU
int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
{
u64 vsid, vsidkey;
@@ -146,3 +147,4 @@ void copro_flush_all_slbs(struct mm_struct *mm)
cxl_slbia(mm);
}
EXPORT_SYMBOL_GPL(copro_flush_all_slbs);
+#endif
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cd16b407f47e..ab105d33e0b0 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -81,9 +81,6 @@ static struct page *maybe_pte_to_page(pte_t pte)
static pte_t set_pte_filter_hash(pte_t pte)
{
- if (radix_enabled())
- return pte;
-
pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
cpu_has_feature(CPU_FTR_NOEXECUTE))) {
@@ -112,6 +109,9 @@ static inline pte_t set_pte_filter(pte_t pte)
{
struct page *pg;
+ if (radix_enabled())
+ return pte;
+
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return set_pte_filter_hash(pte);
@@ -144,6 +144,10 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
{
struct page *pg;
+#ifdef CONFIG_PPC_BOOK3S_64
+ return pte;
+#endif
+
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return pte;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a208997ade88..fa68c29778ca 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -105,9 +105,9 @@ config PPC_BOOK3S_64
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
select IRQ_WORK
- select PPC_MM_SLICES
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
+ select PPC_64S_HASH_MMU if !PPC_RADIX_MMU
config PPC_BOOK3E_64
bool "Embedded processors"
@@ -130,11 +130,13 @@ choice
config GENERIC_CPU
bool "Generic (POWER4 and above)"
depends on PPC64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU if PPC_BOOK3S_64
config GENERIC_CPU
bool "Generic (POWER8 and above)"
depends on PPC64 && CPU_LITTLE_ENDIAN
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
config GENERIC_CPU
bool "Generic 32 bits powerpc"
@@ -143,24 +145,29 @@ config GENERIC_CPU
config CELL_CPU
bool "Cell Broadband Engine"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
config POWER5_CPU
bool "POWER5"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
config POWER6_CPU
bool "POWER6"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
config POWER7_CPU
bool "POWER7"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
config POWER8_CPU
bool "POWER8"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
config POWER9_CPU
bool "POWER9"
@@ -364,6 +371,17 @@ config SPE
If in doubt, say Y here.
+config PPC_64S_HASH_MMU
+ bool "Hash MMU Support"
+ depends on PPC_BOOK3S_64
+ default y
+ select PPC_MM_SLICES
+ help
+ Enable support for the Power ISA Hash style MMU. This is implemented
+ by all IBM Power and other Book3S CPUs.
+
+ If you're unsure, say Y.
+
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
@@ -377,6 +395,7 @@ config PPC_RADIX_MMU
config PPC_RADIX_MMU_DEFAULT
bool "Default to using the Radix MMU when possible"
depends on PPC_RADIX_MMU
+ depends on PPC_64S_HASH_MMU
default y
help
When the hardware supports the Radix MMU, default to using it unless
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index db4465c51b56..faa894714a2a 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -8,6 +8,7 @@ config PPC_CELL_COMMON
select PPC_DCR_MMIO
select PPC_INDIRECT_PIO
select PPC_INDIRECT_MMIO
+ select PPC_64S_HASH_MMU
select PPC_HASH_MMU_NATIVE
select PPC_RTAS
select IRQ_EDGE_EOI_HANDLER
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
index 7fd84311ade5..4c058cc57c90 100644
--- a/arch/powerpc/platforms/maple/Kconfig
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -9,6 +9,7 @@ config PPC_MAPLE
select GENERIC_TBSYNC
select PPC_UDBG_16550
select PPC_970_NAP
+ select PPC_64S_HASH_MMU
select PPC_HASH_MMU_NATIVE
select PPC_RTAS
select MMIO_NVRAM
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
index 62b51e37fc05..823192e9d38a 100644
--- a/arch/powerpc/platforms/microwatt/Kconfig
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -5,7 +5,7 @@ config PPC_MICROWATT
select PPC_XICS
select PPC_ICS_NATIVE
select PPC_ICP_NATIVE
- select PPC_HASH_MMU_NATIVE
+ select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU
select PPC_UDBG_16550
select ARCH_RANDOM
help
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
index bc7137353a7f..85ae18ddd911 100644
--- a/arch/powerpc/platforms/pasemi/Kconfig
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -5,6 +5,7 @@ config PPC_PASEMI
select MPIC
select FORCE_PCI
select PPC_UDBG_16550
+ select PPC_64S_HASH_MMU
select PPC_HASH_MMU_NATIVE
select MPIC_BROKEN_REGREAD
help
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index 2b56df145b82..130707ec9f99 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -6,6 +6,7 @@ config PPC_PMAC
select FORCE_PCI
select PPC_INDIRECT_PCI if PPC32
select PPC_MPC106 if PPC32
+ select PPC_64S_HASH_MMU if PPC64
select PPC_HASH_MMU_NATIVE
select ZONE_DMA if PPC32
default y
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index cd754e116184..161dfe024085 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -2,7 +2,7 @@
config PPC_POWERNV
depends on PPC64 && PPC_BOOK3S
bool "IBM PowerNV (Non-Virtualized) platform support"
- select PPC_HASH_MMU_NATIVE
+ select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU
select PPC_XICS
select PPC_ICP_NATIVE
select PPC_XIVE_NATIVE
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index e3ffdc8e8567..fa1915d29462 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -492,12 +492,14 @@ static unsigned long power7_idle_insn(unsigned long type)
mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* The SLB has to be restored here, but it sometimes still
* contains entries, so the __ variant must be used to prevent
* multi hits.
*/
__slb_restore_bolted_realmode();
+#endif
return srr1;
}
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index a8db3f153063..c6dbfa2e075a 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -207,6 +207,7 @@ static void __init pnv_init(void)
#endif
add_preferred_console("hvc", 0, NULL);
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!radix_enabled()) {
size_t size = sizeof(struct slb_entry) * mmu_slb_size;
int i;
@@ -219,6 +220,7 @@ static void __init pnv_init(void)
cpu_to_node(i));
}
}
+#endif
}
static void __init pnv_init_IRQ(void)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 06d6a824c0dc..fac5d86777db 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -58,6 +58,7 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* H_BLOCK_REMOVE supported block size for this page size in segment who's base
* page size is that page size.
@@ -66,6 +67,7 @@ EXPORT_SYMBOL(plpar_hcall_norets);
* page size.
*/
static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+#endif
/*
* Due to the involved complexity, and that the current hypervisor is only
@@ -689,7 +691,7 @@ void vpa_init(int cpu)
return;
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
@@ -702,7 +704,7 @@ void vpa_init(int cpu)
"cpu %d (hw %d) of area %lx failed with %ld\n",
cpu, hwcpu, addr, ret);
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
/*
* Register dispatch trace log, if one has been allocated.
@@ -740,6 +742,8 @@ static int pseries_lpar_register_process_table(unsigned long base,
return rc;
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
+
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
@@ -1730,6 +1734,7 @@ void __init hpte_init_pseries(void)
if (cpu_has_feature(CPU_FTR_ARCH_300))
pseries_lpar_register_process_table(0, 0, 0);
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
#ifdef CONFIG_PPC_RADIX_MMU
void radix_init_pseries(void)
@@ -1932,6 +1937,7 @@ int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
return rc;
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
static unsigned long vsid_unscramble(unsigned long vsid, int ssize)
{
unsigned long protovsid;
@@ -1992,6 +1998,7 @@ static int __init reserve_vrma_context_id(void)
return 0;
}
machine_device_initcall(pseries, reserve_vrma_context_id);
+#endif
#ifdef CONFIG_DEBUG_FS
/* debugfs file interface for vpa data */
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 3354c00914fa..c7940fcfc911 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -531,7 +531,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
seq_printf(m, "shared_processor_mode=%d\n",
lppaca_shared_proc(get_lppaca()));
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!radix_enabled())
seq_printf(m, "slb_size=%d\n", mmu_slb_size);
#endif
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index e83e0891272d..aec1971e16a1 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -417,11 +417,15 @@ static void prod_others(void)
static u16 clamp_slb_size(void)
{
+#ifdef CONFIG_PPC_64S_HASH_MMU
u16 prev = mmu_slb_size;
slb_set_size(SLB_MIN_SIZE);
return prev;
+#else
+ return 0;
+#endif
}
static int do_suspend(void)
@@ -446,7 +450,9 @@ static int do_suspend(void)
ret = rtas_ibm_suspend_me(&status);
if (ret != 0) {
pr_err("ibm,suspend-me error: %d\n", status);
+#ifdef CONFIG_PPC_64S_HASH_MMU
slb_set_size(saved_slb_size);
+#endif
}
return ret;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 56092dccfdb8..74c9b1b5bc66 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -526,6 +526,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type)
disposition = RTAS_DISP_FULLY_RECOVERED;
break;
case MC_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* Store the old slb content in paca before flushing.
* Print this when we go to virtual mode.
@@ -538,6 +539,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
+#endif
break;
default:
break;
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 7f7369fec46b..80dae18d6621 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -337,8 +337,10 @@ static int do_update_property(char *buf, size_t bufsize)
if (!newprop)
return -ENOMEM;
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
slb_set_size(*(int *)value);
+#endif
return of_update_property(np, newprop);
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f79126f16258..a7f3c0d50fc9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -113,7 +113,7 @@ static void __init fwnmi_init(void)
u8 *mce_data_buf;
unsigned int i;
int nr_cpus = num_possible_cpus();
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct slb_entry *slb_ptr;
size_t size;
#endif
@@ -153,7 +153,7 @@ static void __init fwnmi_init(void)
(RTAS_ERROR_LOG_MAX * i);
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!radix_enabled()) {
/* Allocate per cpu area to save old slb contents during MCE */
size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
@@ -802,7 +802,9 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
pseries_setup_security_mitigations();
+#ifdef CONFIG_PPC_64S_HASH_MMU
pseries_lpar_read_hblkrm_characteristics();
+#endif
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index dd8241c009e5..33de8d798c95 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1160,9 +1160,11 @@ cmds(struct pt_regs *excp)
show_tasks();
break;
#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_64S_HASH_MMU
case 'u':
dump_segments();
break;
+#endif
#elif defined(CONFIG_44x)
case 'u':
dump_tlb_44x();
@@ -2608,7 +2610,7 @@ static void dump_tracing(void)
static void dump_one_paca(int cpu)
{
struct paca_struct *p;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
int i = 0;
#endif
@@ -2650,6 +2652,7 @@ static void dump_one_paca(int cpu)
DUMP(p, cpu_start, "%#-*x");
DUMP(p, kexec_state, "%#-*x");
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!early_radix_enabled()) {
for (i = 0; i < SLB_NUM_BOLTED; i++) {
u64 esid, vsid;
@@ -2677,6 +2680,7 @@ static void dump_one_paca(int cpu)
22, "slb_cache", i, p->slb_cache[i]);
}
}
+#endif
DUMP(p, rfi_flush_fallback_area, "%-*px");
#endif
@@ -3741,7 +3745,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
printf("%s", after);
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
void dump_segments(void)
{
int i;
--
2.23.0
^ permalink raw reply related
* [PATCH v1 10/11] powerpc/configs/microwatt: add POWER9_CPU
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
Microwatt implements a subset of ISA v3.0 (which is equivalent to
the POWER9_CPU option).
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/configs/microwatt_defconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index 9465209b8c5b..6e62966730d3 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -15,6 +15,7 @@ CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB_MERGE_DEFAULT is not set
CONFIG_PPC64=y
+CONFIG_POWER9_CPU=y
# CONFIG_PPC_KUEP is not set
# CONFIG_PPC_KUAP is not set
CONFIG_CPU_LITTLE_ENDIAN=y
--
2.23.0
^ permalink raw reply related
* [PATCH v1 11/11] powerpc/microwatt: Don't select the hash MMU code
From: Nicholas Piggin @ 2021-10-15 15:46 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211015154624.922960-1-npiggin@gmail.com>
Microwatt is radix-only, so it does not require hash MMU support.
This saves 20kB compressed dtbImage and 56kB vmlinux size.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/configs/microwatt_defconfig | 1 -
arch/powerpc/platforms/microwatt/Kconfig | 1 -
2 files changed, 2 deletions(-)
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index 6e62966730d3..7c8eb29d8afe 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -27,7 +27,6 @@ CONFIG_PPC_MICROWATT=y
# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
CONFIG_CPU_FREQ=y
CONFIG_HZ_100=y
-# CONFIG_PPC_MEM_KEYS is not set
# CONFIG_SECCOMP is not set
# CONFIG_MQ_IOSCHED_KYBER is not set
# CONFIG_COREDUMP is not set
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
index 823192e9d38a..5e320f49583a 100644
--- a/arch/powerpc/platforms/microwatt/Kconfig
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -5,7 +5,6 @@ config PPC_MICROWATT
select PPC_XICS
select PPC_ICS_NATIVE
select PPC_ICP_NATIVE
- select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU
select PPC_UDBG_16550
select ARCH_RANDOM
help
--
2.23.0
^ permalink raw reply related
* Re: [PATCH v2] powerpc/mpc512x: dts: fix PSC node warnings
From: Rob Herring @ 2021-10-15 16:15 UTC (permalink / raw)
To: Anatolij Gustschin; +Cc: devicetree, linuxppc-dev, linux-kernel@vger.kernel.org
In-Reply-To: <20211014224201.24027-1-agust@denx.de>
On Thu, Oct 14, 2021 at 5:42 PM Anatolij Gustschin <agust@denx.de> wrote:
>
> Rework PSC node description to fix build warnings like:
> mpc5121.dtsi:397.13-406.5: Warning (spi_bus_bridge): /soc@80000000/psc@11400: node name for SPI buses should be 'spi'
> mpc5121.dtsi:409.13-418.5: Warning (spi_bus_bridge): /soc@80000000/psc@11500: node name for SPI buses should be 'spi'
> mpc5121.dtsi:457.13-466.5: Warning (spi_bus_bridge): /soc@80000000/psc@11900: node name for SPI buses should be 'spi'
Okay, I now see the block supports either spi or serial modes. I would
handle this a bit differently that doesn't create a bunch of new .dtsi
files.
>
> Signed-off-by: Anatolij Gustschin <agust@denx.de>
> ---
> Changes in v2:
> - extract PSC nodes to files which can be included
> separately and extended as needed
>
> arch/powerpc/boot/dts/ac14xx.dts | 118 ++++++++--------
> arch/powerpc/boot/dts/mpc5121-psc0.dtsi | 16 +++
> arch/powerpc/boot/dts/mpc5121-psc1.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc10.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc11.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc2.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc3.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc4-spi.dtsi | 17 +++
> arch/powerpc/boot/dts/mpc5121-psc4.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc5-spi.dtsi | 17 +++
> arch/powerpc/boot/dts/mpc5121-psc5.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc6.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc7.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc8.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121-psc9-spi.dtsi | 17 +++
> arch/powerpc/boot/dts/mpc5121-psc9.dtsi | 15 ++
> arch/powerpc/boot/dts/mpc5121.dtsi | 148 +-------------------
> arch/powerpc/boot/dts/mpc5121ads.dts | 42 +++---
> arch/powerpc/boot/dts/pdm360ng.dts | 104 +++++++-------
> 19 files changed, 371 insertions(+), 273 deletions(-)
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc0.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc1.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc10.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc11.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc2.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc3.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc4-spi.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc4.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc5-spi.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc5.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc6.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc7.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc8.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc9-spi.dtsi
> create mode 100644 arch/powerpc/boot/dts/mpc5121-psc9.dtsi
[...]
> diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
> index 3f66b91a8e3c..21674da8beb1 100644
> --- a/arch/powerpc/boot/dts/mpc5121.dtsi
> +++ b/arch/powerpc/boot/dts/mpc5121.dtsi
> @@ -87,7 +87,7 @@
> };
> };
>
> - soc@80000000 {
> + soc: soc@80000000 {
> compatible = "fsl,mpc5121-immr";
> #address-cells = <1>;
> #size-cells = <1>;
> @@ -343,152 +343,6 @@
> clock-names = "ipg";
> };
>
> - /* 512x PSCs are not 52xx PSC compatible */
> -
> - /* PSC0 */
> - psc@11000 {
I would just put here 'serial@11000' and 'spi@11000' nodes with both
nodes set to disabled. Then the board dts just has to change status of
the the nodes it wants to enable (and add child nodes for spi).
Overlapping addresses are okay if nodes are disabled.
> - compatible = "fsl,mpc5121-psc";
> - reg = <0x11000 0x100>;
> - interrupts = <40 0x8>;
> - fsl,rx-fifo-size = <16>;
> - fsl,tx-fifo-size = <16>;
> - clocks = <&clks MPC512x_CLK_PSC0>,
> - <&clks MPC512x_CLK_PSC0_MCLK>;
> - clock-names = "ipg", "mclk";
> - };
^ permalink raw reply
* Re: [PATCH] tracing: Have all levels of checks prevent recursion
From: Peter Zijlstra @ 2021-10-15 16:17 UTC (permalink / raw)
To: Steven Rostedt
Cc: 王贇, Paul Walmsley, James E.J. Bottomley,
Paul Mackerras, Jisheng Zhang, H. Peter Anvin, live-patching,
linux-riscv, Miroslav Benes, Joe Lawrence, Helge Deller, x86,
linux-csky, Ingo Molnar, Petr Mladek, Albert Ou, Jiri Kosina,
Nicholas Piggin, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, LKML, Palmer Dabbelt, Masami Hiramatsu, Guo Ren,
Colin Ian King, linuxppc-dev
In-Reply-To: <20211015110035.14813389@gandalf.local.home>
On Fri, Oct 15, 2021 at 11:00:35AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
>
> While writing an email explaining the "bit = 0" logic for a discussion on
> bit = trace_get_context_bit() + start;
While there, you were also going to update that function to match/use
get_recursion_context(). Because your version is still branch hell.
^ permalink raw reply
* Re: [PATCH 4/4] powerpc/5200: dts: fix localbus node warnings
From: Rob Herring @ 2021-10-15 16:31 UTC (permalink / raw)
To: Anatolij Gustschin
Cc: devicetree, Stephen Rothwell, linux-kernel@vger.kernel.org,
Paul Mackerras, linuxppc-dev
In-Reply-To: <20211013220532.24759-5-agust@denx.de>
On Wed, Oct 13, 2021 at 5:05 PM Anatolij Gustschin <agust@denx.de> wrote:
>
> Fix build warnings like:
> localbus:ranges: 'oneOf' conditional failed, one must be fixed
> ...
> Warning (unit_address_vs_reg): /localbus: node has a reg or ranges property, but no unit name
> Warning (simple_bus_reg): /localbus/flash@0,0: simple-bus unit address format error, expected "0"
>
> Signed-off-by: Anatolij Gustschin <agust@denx.de>
> ---
> arch/powerpc/boot/dts/a3m071.dts | 12 +++++-----
> arch/powerpc/boot/dts/a4m072.dts | 20 ++++++++---------
> arch/powerpc/boot/dts/charon.dts | 14 ++++++------
> arch/powerpc/boot/dts/cm5200.dts | 7 ++++--
> arch/powerpc/boot/dts/digsy_mtc.dts | 16 ++++++++------
> arch/powerpc/boot/dts/lite5200.dts | 4 ++--
> arch/powerpc/boot/dts/lite5200b.dts | 6 +++--
> arch/powerpc/boot/dts/media5200.dts | 20 +++++++++--------
> arch/powerpc/boot/dts/motionpro.dts | 32 +++++++++++++++------------
> arch/powerpc/boot/dts/mpc5200b.dtsi | 2 +-
> arch/powerpc/boot/dts/mucmc52.dts | 34 +++++++++++++++--------------
> arch/powerpc/boot/dts/o2d.dts | 10 +++++----
> arch/powerpc/boot/dts/o2d.dtsi | 12 +++++-----
> arch/powerpc/boot/dts/o2d300.dts | 10 +++++----
> arch/powerpc/boot/dts/o2dnt2.dts | 10 +++++----
> arch/powerpc/boot/dts/o2i.dts | 4 ++--
> arch/powerpc/boot/dts/o2mnt.dts | 4 ++--
> arch/powerpc/boot/dts/o3dnt.dts | 10 +++++----
> arch/powerpc/boot/dts/pcm030.dts | 2 +-
> arch/powerpc/boot/dts/pcm032.dts | 26 ++++++++++++----------
> arch/powerpc/boot/dts/tqm5200.dts | 4 ++--
> arch/powerpc/boot/dts/uc101.dts | 14 +++++++-----
> 22 files changed, 151 insertions(+), 122 deletions(-)
>
> diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
> index 034cfd8aa95b..14e59aaa0ba7 100644
> --- a/arch/powerpc/boot/dts/a3m071.dts
> +++ b/arch/powerpc/boot/dts/a3m071.dts
> @@ -87,15 +87,15 @@
> };
> };
>
> - localbus {
> + localbus@80000000 {
That's not right, as 0x80000000 doesn't exist in 'reg' or 'ranges'.
Without 'reg', the correct unit-address is 'fc000000'. Perhaps there
should be a 'reg' entry though.
> compatible = "fsl,mpc5200b-lpb","simple-bus";
> #address-cells = <2>;
> #size-cells = <1>;
> - ranges = <0 0 0xfc000000 0x02000000
> - 3 0 0xe9000000 0x00080000
> - 5 0 0xe8000000 0x00010000>;
> + ranges = <0 0 0xfc000000 0x02000000>,
> + <3 0 0xe9000000 0x00080000>,
> + <5 0 0xe8000000 0x00010000>;
>
> - flash@0,0 {
> + flash@0 {
This change is debatable. Normally, if we have chipselects, then
that's a separate field and a comma is appropriate. However, h/w with
chip selects require setup and aren't a simple-bus. I guess the
bootloader does the setup, so kind of a gray area.
The warning for this is off by default, so I'd leave it alone.
Rob
^ permalink raw reply
* Re: [PATCH 0/4] Update mpc5200 dts files to fix warnings
From: Rob Herring @ 2021-10-15 16:34 UTC (permalink / raw)
To: Anatolij Gustschin
Cc: devicetree, Stephen Rothwell, linux-kernel@vger.kernel.org,
Paul Mackerras, linuxppc-dev
In-Reply-To: <20211013220532.24759-1-agust@denx.de>
On Wed, Oct 13, 2021 at 5:05 PM Anatolij Gustschin <agust@denx.de> wrote:
>
> This series fixes localbus, memory and pci node build warnings.
> It was tested with current linux-next on digsy_mtc and tqm5200
> boards.
>
> Anatolij Gustschin (4):
> powerpc/5200: dts: add missing pci ranges
> powerpc/5200: dts: fix pci ranges warnings
> powerpc/5200: dts: fix memory node unit name
> powerpc/5200: dts: fix localbus node warnings
For patches 1-3:
Reviewed-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* Re: [PATCH] powerpc/smp: do not decrement idle task preempt count in CPU offline
From: Nathan Lynch @ 2021-10-15 17:02 UTC (permalink / raw)
To: Valentin Schneider; +Cc: srikar, peterz, linux-kernel, clg, linuxppc-dev, mingo
In-Reply-To: <87h7dijo4m.mognet@arm.com>
Valentin Schneider <valentin.schneider@arm.com> writes:
> On 15/10/21 09:55, Nathan Lynch wrote:
>> With PREEMPT_COUNT=y, when a CPU is offlined and then onlined again, we
>> get:
>>
>> BUG: scheduling while atomic: swapper/1/0/0x00000000
>> no locks held by swapper/1/0.
>> CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.15.0-rc2+ #100
>> Call Trace:
>> dump_stack_lvl+0xac/0x108
>> __schedule_bug+0xac/0xe0
>> __schedule+0xcf8/0x10d0
>> schedule_idle+0x3c/0x70
>> do_idle+0x2d8/0x4a0
>> cpu_startup_entry+0x38/0x40
>> start_secondary+0x2ec/0x3a0
>> start_secondary_prolog+0x10/0x14
>>
>> This is because powerpc's arch_cpu_idle_dead() decrements the idle task's
>> preempt count, for reasons explained in commit a7c2bb8279d2 ("powerpc:
>> Re-enable preemption before cpu_die()"), specifically "start_secondary()
>> expects a preempt_count() of 0."
>>
>> However, since commit 2c669ef6979c ("powerpc/preempt: Don't touch the idle
>> task's preempt_count during hotplug") and commit f1a0a376ca0c ("sched/core:
>> Initialize the idle task with preemption disabled"), that justification no
>> longer holds.
>>
>> The idle task isn't supposed to re-enable preemption, so remove the
>> vestigial preempt_enable() from the CPU offline path.
>>
>
> Humph, I got confused because 2c669ef6979c explicitly mentions hotplug,
> but that's the hotplug machinery which is already involved for bringing up
> the secondaries at boot time.
>
> IIUC your issue here is the preempt_count being messed up when
> hot-unplugging a CPU, which leads to fireworks during hotplug
That's right.
> (IOW I didn't
> test my last patch against hotplug - my bad!)
>
> Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
No worries and thank you for reviewing.
>> Tested with pseries and powernv in qemu, and pseries on PowerVM.
>>
>> Fixes: 2c669ef6979c ("powerpc/preempt: Don't touch the idle task's preempt_count during hotplug")
>> Fixes: f1a0a376ca0c ("sched/core: Initialize the idle task with preemption disabled")
>
> I think only the first Fixes: is needed.
OK, I'll re-send with that changed as well as your r-b. Thanks.
^ permalink raw reply
* Re: [PATCH] tracing: Have all levels of checks prevent recursion
From: Steven Rostedt @ 2021-10-15 17:35 UTC (permalink / raw)
To: Peter Zijlstra
Cc: 王贇, Paul Walmsley, James E.J. Bottomley,
Paul Mackerras, Jisheng Zhang, H. Peter Anvin, live-patching,
linux-riscv, Miroslav Benes, Joe Lawrence, Helge Deller, x86,
linux-csky, Ingo Molnar, Petr Mladek, Albert Ou, Jiri Kosina,
Nicholas Piggin, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, LKML, Palmer Dabbelt, Masami Hiramatsu, Guo Ren,
Colin Ian King, linuxppc-dev
In-Reply-To: <20211015161702.GF174703@worktop.programming.kicks-ass.net>
On Fri, 15 Oct 2021 18:17:02 +0200
Peter Zijlstra <peterz@infradead.org> wrote:
> On Fri, Oct 15, 2021 at 11:00:35AM -0400, Steven Rostedt wrote:
> > From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> >
> > While writing an email explaining the "bit = 0" logic for a discussion on
>
> > bit = trace_get_context_bit() + start;
>
> While there, you were also going to update that function to match/use
> get_recursion_context(). Because your version is still branch hell.
That would probably be a separate patch. This is just a fix to a design
flaw, changing the context tests would be performance enhancement.
Thanks for the reminder (as it was on my todo list to update that code).
-- Steve
^ permalink raw reply
* [PATCH v2] powerpc/smp: do not decrement idle task preempt count in CPU offline
From: Nathan Lynch @ 2021-10-15 17:39 UTC (permalink / raw)
To: linuxppc-dev; +Cc: srikar, peterz, linux-kernel, valentin.schneider, clg, mingo
With PREEMPT_COUNT=y, when a CPU is offlined and then onlined again, we
get:
BUG: scheduling while atomic: swapper/1/0/0x00000000
no locks held by swapper/1/0.
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.15.0-rc2+ #100
Call Trace:
dump_stack_lvl+0xac/0x108
__schedule_bug+0xac/0xe0
__schedule+0xcf8/0x10d0
schedule_idle+0x3c/0x70
do_idle+0x2d8/0x4a0
cpu_startup_entry+0x38/0x40
start_secondary+0x2ec/0x3a0
start_secondary_prolog+0x10/0x14
This is because powerpc's arch_cpu_idle_dead() decrements the idle task's
preempt count, for reasons explained in commit a7c2bb8279d2 ("powerpc:
Re-enable preemption before cpu_die()"), specifically "start_secondary()
expects a preempt_count() of 0."
However, since commit 2c669ef6979c ("powerpc/preempt: Don't touch the idle
task's preempt_count during hotplug") and commit f1a0a376ca0c ("sched/core:
Initialize the idle task with preemption disabled"), that justification no
longer holds.
The idle task isn't supposed to re-enable preemption, so remove the
vestigial preempt_enable() from the CPU offline path.
Tested with pseries and powernv in qemu, and pseries on PowerVM.
Fixes: 2c669ef6979c ("powerpc/preempt: Don't touch the idle task's preempt_count during hotplug")
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
---
Notes:
Changes since v1:
- remove incorrect Fixes: tag, add Valentin's r-b.
arch/powerpc/kernel/smp.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9cc7d3dbf439..605bab448f84 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1730,8 +1730,6 @@ void __cpu_die(unsigned int cpu)
void arch_cpu_idle_dead(void)
{
- sched_preempt_enable_no_resched();
-
/*
* Disable on the down path. This will be re-enabled by
* start_secondary() via start_secondary_resume() below
--
2.31.1
^ permalink raw reply related
* Re: [PATCH] tracing: Have all levels of checks prevent recursion
From: Steven Rostedt @ 2021-10-15 17:58 UTC (permalink / raw)
To: Peter Zijlstra
Cc: 王贇, Paul Walmsley, James E.J. Bottomley,
Paul Mackerras, Jisheng Zhang, H. Peter Anvin, live-patching,
linux-riscv, Miroslav Benes, Joe Lawrence, Helge Deller, x86,
linux-csky, Ingo Molnar, Petr Mladek, Albert Ou, Jiri Kosina,
Nicholas Piggin, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, LKML, Palmer Dabbelt, Masami Hiramatsu, Guo Ren,
Colin Ian King, linuxppc-dev
In-Reply-To: <20211015133504.6c0a9fcc@gandalf.local.home>
On Fri, 15 Oct 2021 13:35:04 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:
> On Fri, 15 Oct 2021 18:17:02 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
>
> > On Fri, Oct 15, 2021 at 11:00:35AM -0400, Steven Rostedt wrote:
> > > From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> > >
> > > While writing an email explaining the "bit = 0" logic for a discussion on
> >
> > > bit = trace_get_context_bit() + start;
> >
> > While there, you were also going to update that function to match/use
> > get_recursion_context(). Because your version is still branch hell.
>
> That would probably be a separate patch. This is just a fix to a design
> flaw, changing the context tests would be performance enhancement.
>
> Thanks for the reminder (as it was on my todo list to update that code).
Something like this:
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Subject: [PATCH] tracing: Reuse logic from perf's get_recursion_context()
Instead of having branches that adds noise to the branch prediction, use
the addition logic to set the bit for the level of interrupt context that
the state is currently in. This copies the logic from perf's
get_recursion_context() function.
Link: https://lore.kernel.org/all/20211015161702.GF174703@worktop.programming.kicks-ass.net/
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
include/linux/trace_recursion.h | 11 ++++++-----
kernel/trace/ring_buffer.c | 12 ++++++------
2 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 168fdf07419a..41f5bfd9e93f 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -119,12 +119,13 @@ enum {
static __always_inline int trace_get_context_bit(void)
{
unsigned long pc = preempt_count();
+ unsigned char bit = 0;
- if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
- return TRACE_CTX_NORMAL;
- else
- return pc & NMI_MASK ? TRACE_CTX_NMI :
- pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ;
+ bit += !!(pc & (NMI_MASK));
+ bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
+ bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+ return TRACE_CTX_NORMAL - bit;
}
#ifdef CONFIG_FTRACE_RECORD_RECURSION
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c5a3fbf19617..15d4380006e3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3168,13 +3168,13 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned int val = cpu_buffer->current_context;
unsigned long pc = preempt_count();
- int bit;
+ int bit = 0;
- if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
- bit = RB_CTX_NORMAL;
- else
- bit = pc & NMI_MASK ? RB_CTX_NMI :
- pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
+ bit += !!(pc & (NMI_MASK));
+ bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
+ bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+ bit = RB_CTX_NORMAL - bit;
if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
/*
--
2.31.1
^ permalink raw reply related
* Re: [PATCH] tracing: Have all levels of checks prevent recursion
From: Peter Zijlstra @ 2021-10-15 18:04 UTC (permalink / raw)
To: Steven Rostedt
Cc: 王贇, Paul Walmsley, James E.J. Bottomley,
Paul Mackerras, Jisheng Zhang, H. Peter Anvin, live-patching,
linux-riscv, Miroslav Benes, Joe Lawrence, Helge Deller, x86,
linux-csky, Ingo Molnar, Petr Mladek, Albert Ou, Jiri Kosina,
Nicholas Piggin, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, LKML, Palmer Dabbelt, Masami Hiramatsu, Guo Ren,
Colin Ian King, linuxppc-dev
In-Reply-To: <20211015135806.72d1af23@gandalf.local.home>
On Fri, Oct 15, 2021 at 01:58:06PM -0400, Steven Rostedt wrote:
> Something like this:
I think having one copy of that in a header is better than having 3
copies. But yes, something along them lines.
^ permalink raw reply
* Re: [PATCH] tracing: Have all levels of checks prevent recursion
From: Steven Rostedt @ 2021-10-15 18:20 UTC (permalink / raw)
To: Peter Zijlstra
Cc: 王贇, Paul Walmsley, James E.J. Bottomley,
Paul Mackerras, Jisheng Zhang, H. Peter Anvin, live-patching,
linux-riscv, Miroslav Benes, Joe Lawrence, Helge Deller, x86,
linux-csky, Ingo Molnar, Petr Mladek, Albert Ou, Jiri Kosina,
Nicholas Piggin, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, LKML, Palmer Dabbelt, Masami Hiramatsu, Guo Ren,
Colin Ian King, linuxppc-dev
In-Reply-To: <20211015180429.GK174703@worktop.programming.kicks-ass.net>
On Fri, 15 Oct 2021 20:04:29 +0200
Peter Zijlstra <peterz@infradead.org> wrote:
> On Fri, Oct 15, 2021 at 01:58:06PM -0400, Steven Rostedt wrote:
> > Something like this:
>
> I think having one copy of that in a header is better than having 3
> copies. But yes, something along them lines.
I was just about to ask you about this patch ;-)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 4d244e295e85..a6ae329aa654 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -74,6 +74,27 @@
*/
#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+/**
+ * interrupt_context_level - return interrupt context level
+ *
+ * Returns the current interrupt context level.
+ * 0 - normal context
+ * 1 - softirq context
+ * 2 - hardirq context
+ * 3 - NMI context
+ */
+static __always_inline unsigned char interrupt_context_level(void)
+{
+ unsigned long pc = preempt_count();
+ unsigned char level = 0;
+
+ level += !!(pc & (NMI_MASK));
+ level += !!(pc & (NMI_MASK | HARDIRQ_MASK));
+ level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+ return level;
+}
+
/* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
#include <asm/preempt.h>
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 41f5bfd9e93f..018a04381556 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -118,12 +118,7 @@ enum {
static __always_inline int trace_get_context_bit(void)
{
- unsigned long pc = preempt_count();
- unsigned char bit = 0;
-
- bit += !!(pc & (NMI_MASK));
- bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
- bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+ unsigned char bit = interrupt_context_level();
return TRACE_CTX_NORMAL - bit;
}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 228801e20788..c91711f20cf8 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -206,11 +206,7 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
static inline int get_recursion_context(int *recursion)
{
unsigned int pc = preempt_count();
- unsigned char rctx = 0;
-
- rctx += !!(pc & (NMI_MASK));
- rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK));
- rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+ unsigned char rctx = interrupt_context_level();
if (recursion[rctx])
return -1;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 15d4380006e3..f6520d0a4c8c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3167,12 +3167,7 @@ static __always_inline int
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned int val = cpu_buffer->current_context;
- unsigned long pc = preempt_count();
- int bit = 0;
-
- bit += !!(pc & (NMI_MASK));
- bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
- bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+ int bit = interrupt_context_level();
bit = RB_CTX_NORMAL - bit;
^ permalink raw reply related
* Re: [PATCH] tracing: Have all levels of checks prevent recursion
From: Steven Rostedt @ 2021-10-15 18:25 UTC (permalink / raw)
To: Peter Zijlstra
Cc: 王贇, Paul Walmsley, James E.J. Bottomley,
Paul Mackerras, Jisheng Zhang, H. Peter Anvin, live-patching,
linux-riscv, Miroslav Benes, Joe Lawrence, Helge Deller, x86,
linux-csky, Ingo Molnar, Petr Mladek, Albert Ou, Jiri Kosina,
Nicholas Piggin, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, LKML, Palmer Dabbelt, Masami Hiramatsu, Guo Ren,
Colin Ian King, linuxppc-dev
In-Reply-To: <20211015142033.72605b47@gandalf.local.home>
On Fri, 15 Oct 2021 14:20:33 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:
> > I think having one copy of that in a header is better than having 3
> > copies. But yes, something along them lines.
>
> I was just about to ask you about this patch ;-)
Except it doesn't build :-p (need to move the inlined function down a bit)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 4d244e295e85..b32e3dabe28b 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -77,6 +77,27 @@
/* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
#include <asm/preempt.h>
+/**
+ * interrupt_context_level - return interrupt context level
+ *
+ * Returns the current interrupt context level.
+ * 0 - normal context
+ * 1 - softirq context
+ * 2 - hardirq context
+ * 3 - NMI context
+ */
+static __always_inline unsigned char interrupt_context_level(void)
+{
+ unsigned long pc = preempt_count();
+ unsigned char level = 0;
+
+ level += !!(pc & (NMI_MASK));
+ level += !!(pc & (NMI_MASK | HARDIRQ_MASK));
+ level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+ return level;
+}
+
#define nmi_count() (preempt_count() & NMI_MASK)
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#ifdef CONFIG_PREEMPT_RT
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 41f5bfd9e93f..018a04381556 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -118,12 +118,7 @@ enum {
static __always_inline int trace_get_context_bit(void)
{
- unsigned long pc = preempt_count();
- unsigned char bit = 0;
-
- bit += !!(pc & (NMI_MASK));
- bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
- bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+ unsigned char bit = interrupt_context_level();
return TRACE_CTX_NORMAL - bit;
}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 228801e20788..c91711f20cf8 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -206,11 +206,7 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
static inline int get_recursion_context(int *recursion)
{
unsigned int pc = preempt_count();
- unsigned char rctx = 0;
-
- rctx += !!(pc & (NMI_MASK));
- rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK));
- rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+ unsigned char rctx = interrupt_context_level();
if (recursion[rctx])
return -1;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 15d4380006e3..f6520d0a4c8c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3167,12 +3167,7 @@ static __always_inline int
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned int val = cpu_buffer->current_context;
- unsigned long pc = preempt_count();
- int bit = 0;
-
- bit += !!(pc & (NMI_MASK));
- bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
- bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+ int bit = interrupt_context_level();
bit = RB_CTX_NORMAL - bit;
^ permalink raw reply related
* Re: [PATCH] tracing: Have all levels of checks prevent recursion
From: Peter Zijlstra @ 2021-10-15 18:24 UTC (permalink / raw)
To: Steven Rostedt
Cc: 王贇, Paul Walmsley, James E.J. Bottomley,
Paul Mackerras, Jisheng Zhang, H. Peter Anvin, live-patching,
linux-riscv, Miroslav Benes, Joe Lawrence, Helge Deller, x86,
linux-csky, Ingo Molnar, Petr Mladek, Albert Ou, Jiri Kosina,
Nicholas Piggin, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, LKML, Palmer Dabbelt, Masami Hiramatsu, Guo Ren,
Colin Ian King, linuxppc-dev
In-Reply-To: <20211015142033.72605b47@gandalf.local.home>
On Fri, Oct 15, 2021 at 02:20:33PM -0400, Steven Rostedt wrote:
> On Fri, 15 Oct 2021 20:04:29 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
>
> > On Fri, Oct 15, 2021 at 01:58:06PM -0400, Steven Rostedt wrote:
> > > Something like this:
> >
> > I think having one copy of that in a header is better than having 3
> > copies. But yes, something along them lines.
>
> I was just about to ask you about this patch ;-)
Much better :-)
> diff --git a/kernel/events/internal.h b/kernel/events/internal.h
> index 228801e20788..c91711f20cf8 100644
> --- a/kernel/events/internal.h
> +++ b/kernel/events/internal.h
> @@ -206,11 +206,7 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
> static inline int get_recursion_context(int *recursion)
> {
> unsigned int pc = preempt_count();
Although I think we can do without that ^ line as well :-)
> - unsigned char rctx = 0;
> -
> - rctx += !!(pc & (NMI_MASK));
> - rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK));
> - rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
> + unsigned char rctx = interrupt_context_level();
>
> if (recursion[rctx])
> return -1;
^ permalink raw reply
* Re: [PATCH] tracing: Have all levels of checks prevent recursion
From: Steven Rostedt @ 2021-10-15 19:00 UTC (permalink / raw)
To: Peter Zijlstra
Cc: 王贇, Paul Walmsley, James E.J. Bottomley,
Paul Mackerras, Jisheng Zhang, H. Peter Anvin, live-patching,
linux-riscv, Miroslav Benes, Joe Lawrence, Helge Deller, x86,
linux-csky, Ingo Molnar, Petr Mladek, Albert Ou, Jiri Kosina,
Nicholas Piggin, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, LKML, Palmer Dabbelt, Masami Hiramatsu, Guo Ren,
Colin Ian King, linuxppc-dev
In-Reply-To: <20211015182459.GL174703@worktop.programming.kicks-ass.net>
On Fri, 15 Oct 2021 20:24:59 +0200
Peter Zijlstra <peterz@infradead.org> wrote:
> > @@ -206,11 +206,7 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
> > static inline int get_recursion_context(int *recursion)
> > {
> > unsigned int pc = preempt_count();
>
> Although I think we can do without that ^ line as well :-)
Ah, I could have sworn I deleted it. Oh well, will make a proper patch set.
Thanks!
-- Steve
>
> > - unsigned char rctx = 0;
^ permalink raw reply
* Re: [PATCH] ibmvscsi: use GFP_KERNEL with dma_alloc_coherent in initialize_event_pool
From: Tyrel Datwyler @ 2021-10-15 19:37 UTC (permalink / raw)
To: Michael Ellerman, tyreld
Cc: brking, james.bottomley, linuxppc-dev, linux-scsi,
martin.petersen
In-Reply-To: <878ryuykd3.fsf@mpe.ellerman.id.au>
On 10/14/21 9:36 PM, Michael Ellerman wrote:
> Tyrel Datwyler <tyreld@linux.ibm.com> writes:
>> Just stumbled upon this trivial little patch that looks to have gotten lost in
>> the shuffle. Seems it even got a reviewed-by from Brian [1].
>>
>> So, uh I guess after almost 3 years...ping?
>
> It's marked "Changes Requested" here:
>
> https://patchwork.kernel.org/project/linux-scsi/patch/1547089149-20577-1-git-send-email-tyreld@linux.vnet.ibm.com/
Interesting
>
>
> If it isn't picked up in a few days you should probably do a resend so
> that it reappears in patchwork.
Fair enough
-Tyrel
>
> cheers
>
^ permalink raw reply
* Re: [PATCH v2 12/13] lkdtm: Fix execute_[user]_location()
From: Kees Cook @ 2021-10-15 21:31 UTC (permalink / raw)
To: Christophe Leroy
Cc: linux-arch, linux-ia64, linux-parisc, Arnd Bergmann,
Greg Kroah-Hartman, Helge Deller, linux-kernel,
James E.J. Bottomley, linux-mm, Paul Mackerras, Andrew Morton,
linuxppc-dev
In-Reply-To: <cbee30c66890994e116a8eae8094fa8c5336f90a.1634190022.git.christophe.leroy@csgroup.eu>
On Thu, Oct 14, 2021 at 07:50:01AM +0200, Christophe Leroy wrote:
> execute_location() and execute_user_location() intent
> to copy do_nothing() text and execute it at a new location.
> However, at the time being it doesn't copy do_nothing() function
> but do_nothing() function descriptor which still points to the
> original text. So at the end it still executes do_nothing() at
> its original location allthough using a copied function descriptor.
>
> So, fix that by really copying do_nothing() text and build a new
> function descriptor by copying do_nothing() function descriptor and
> updating the target address with the new location.
>
> Also fix the displayed addresses by dereferencing do_nothing()
> function descriptor.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
> drivers/misc/lkdtm/perms.c | 25 +++++++++++++++++++++----
> include/asm-generic/sections.h | 5 +++++
> 2 files changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
> index 5266dc28df6e..96b3ebfcb8ed 100644
> --- a/drivers/misc/lkdtm/perms.c
> +++ b/drivers/misc/lkdtm/perms.c
> @@ -44,19 +44,32 @@ static noinline void do_overwritten(void)
> return;
> }
>
> +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
> +{
> + memcpy(fdesc, do_nothing, sizeof(*fdesc));
> + fdesc->addr = (unsigned long)dst;
> + barrier();
> +
> + return fdesc;
> +}
How about collapsing the "have_function_descriptors()" check into
setup_function_descriptor()?
static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
{
if (__is_defined(HAVE_FUNCTION_DESCRIPTORS)) {
memcpy(fdesc, do_nothing, sizeof(*fdesc));
fdesc->addr = (unsigned long)dst;
barrier();
return fdesc;
} else {
return dst;
}
}
> +
> static noinline void execute_location(void *dst, bool write)
> {
> void (*func)(void) = dst;
> + func_desc_t fdesc;
> + void *do_nothing_text = dereference_function_descriptor(do_nothing);
>
> - pr_info("attempting ok execution at %px\n", do_nothing);
> + pr_info("attempting ok execution at %px\n", do_nothing_text);
> do_nothing();
>
> if (write == CODE_WRITE) {
> - memcpy(dst, do_nothing, EXEC_SIZE);
> + memcpy(dst, do_nothing_text, EXEC_SIZE);
> flush_icache_range((unsigned long)dst,
> (unsigned long)dst + EXEC_SIZE);
> }
> pr_info("attempting bad execution at %px\n", func);
> + if (have_function_descriptors())
> + func = setup_function_descriptor(&fdesc, dst);
> func();
> pr_err("FAIL: func returned\n");
> }
> @@ -67,15 +80,19 @@ static void execute_user_location(void *dst)
>
> /* Intentionally crossing kernel/user memory boundary. */
> void (*func)(void) = dst;
> + func_desc_t fdesc;
> + void *do_nothing_text = dereference_function_descriptor(do_nothing);
>
> - pr_info("attempting ok execution at %px\n", do_nothing);
> + pr_info("attempting ok execution at %px\n", do_nothing_text);
> do_nothing();
>
> - copied = access_process_vm(current, (unsigned long)dst, do_nothing,
> + copied = access_process_vm(current, (unsigned long)dst, do_nothing_text,
> EXEC_SIZE, FOLL_WRITE);
> if (copied < EXEC_SIZE)
> return;
> pr_info("attempting bad execution at %px\n", func);
> + if (have_function_descriptors())
> + func = setup_function_descriptor(&fdesc, dst);
> func();
> pr_err("FAIL: func returned\n");
> }
> diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
> index 76163883c6ff..d225318538bd 100644
> --- a/include/asm-generic/sections.h
> +++ b/include/asm-generic/sections.h
> @@ -70,6 +70,11 @@ typedef struct {
> } func_desc_t;
> #endif
>
> +static inline bool have_function_descriptors(void)
> +{
> + return __is_defined(HAVE_FUNCTION_DESCRIPTORS);
> +}
> +
> /* random extra sections (if any). Override
> * in asm/sections.h */
> #ifndef arch_is_kernel_text
This hunk seems like it should live in a separate patch.
--
Kees Cook
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox