* [PATCH v4 09/21] powerpc/64: context tracking remove _TIF_NOHZ
From: Nicholas Piggin @ 2021-01-02 12:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
Add context tracking to the system call handler explicitly, and remove
_TIF_NOHZ.
This saves 35 cycles on gettid system call cost on POWER9 with a
CONFIG_NOHZ_FULL kernel.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 1 -
arch/powerpc/include/asm/thread_info.h | 4 +---
arch/powerpc/kernel/ptrace/ptrace.c | 4 ----
arch/powerpc/kernel/signal.c | 4 ----
arch/powerpc/kernel/syscall_64.c | 10 ++++++++++
5 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 107bb4319e0e..28d5a1b1510f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -196,7 +196,6 @@ config PPC
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
- select HAVE_TIF_NOHZ if PPC64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 3d8a47af7a25..386d576673a1 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -94,7 +94,6 @@ void arch_setup_new_exec(void);
#define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
-#define TIF_NOHZ 9 /* in adaptive nohz mode */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
@@ -128,11 +127,10 @@ void arch_setup_new_exec(void);
#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
-#define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ | _TIF_SYSCALL_EMU)
+ _TIF_SYSCALL_EMU)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index 3d44b73adb83..4f3d4ff3728c 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -262,8 +262,6 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{
u32 flags;
- user_exit();
-
flags = READ_ONCE(current_thread_info()->flags) &
(_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
@@ -340,8 +338,6 @@ void do_syscall_trace_leave(struct pt_regs *regs)
step = test_thread_flag(TIF_SINGLESTEP);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
-
- user_enter();
}
void __init pt_regs_check(void);
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 53782aa60ade..9ded046edb0e 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -282,8 +282,6 @@ static void do_signal(struct task_struct *tsk)
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{
- user_exit();
-
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
@@ -299,8 +297,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}
-
- user_enter();
}
static unsigned long get_tm_stackpointer(struct task_struct *tsk)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index dd87b2118620..d7d256a7a41f 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/context_tracking.h>
#include <linux/err.h>
#include <asm/asm-prototypes.h>
#include <asm/kup.h>
#include <asm/cputime.h>
+#include <asm/interrupt.h>
#include <asm/hw_irq.h>
#include <asm/interrupt.h>
#include <asm/kprobes.h>
@@ -28,6 +30,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+ CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
+ user_exit_irqoff();
+
trace_hardirqs_off(); /* finish reconciling */
if (IS_ENABLED(CONFIG_PPC_BOOK3S))
@@ -182,6 +187,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
unsigned long ti_flags;
unsigned long ret = 0;
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
kuap_check_amr();
regs->result = r3;
@@ -258,8 +265,11 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
}
}
+ user_enter_irqoff();
+
/* scv need not set RI=0 because SRRs are not used */
if (unlikely(!prep_irq_for_enabled_exit(!scv))) {
+ user_exit_irqoff();
local_irq_enable();
goto again;
}
--
2.23.0
^ permalink raw reply related
* [PATCH v4 10/21] powerpc/64s/hash: improve context tracking of hash faults
From: Nicholas Piggin @ 2021-01-02 12:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
This moves the 64s/hash context tracking from hash_page_mm() to
__do_hash_fault(), so it's no longer called by OCXL / SPU
accelerators, which was certainly the wrong thing to be doing,
because those callers are not low level interrupt handlers, so
should have entered a kernel context tracking already.
Then remain in kernel context for the duration of the fault,
rather than enter/exit for the hash fault then enter/exit for
the page fault, which is pointless.
Even still, calling exception_enter/exit in __do_hash_fault seems
questionable because that's touching per-cpu variables, tracing,
etc., which might have been interrupted by this hash fault or
themselves cause hash faults. But maybe I miss something because
hash_page_mm very deliberately calls trace_hash_fault too, for
example. So for now go with it, it's no worse than before, in this
regard.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/bug.h | 1 +
arch/powerpc/mm/book3s64/hash_utils.c | 7 ++++---
arch/powerpc/mm/fault.c | 9 +++++++++
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 4220789b9a97..e048c820ca02 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -112,6 +112,7 @@
struct pt_regs;
long do_page_fault(struct pt_regs *);
+long hash__do_page_fault(struct pt_regs *);
void bad_page_fault(struct pt_regs *, int);
void __bad_page_fault(struct pt_regs *regs, int sig);
extern void _exception(int, struct pt_regs *, int, unsigned long);
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 453afb9ae9b4..801d5e94cd2b 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1289,7 +1289,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
unsigned long flags)
{
bool is_thp;
- enum ctx_state prev_state = exception_enter();
pgd_t *pgdir;
unsigned long vsid;
pte_t *ptep;
@@ -1491,7 +1490,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
DBG_LOW(" -> rc=%d\n", rc);
bail:
- exception_exit(prev_state);
return rc;
}
EXPORT_SYMBOL_GPL(hash_page_mm);
@@ -1515,6 +1513,7 @@ EXPORT_SYMBOL_GPL(hash_page);
DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
{
+ enum ctx_state prev_state = exception_enter();
unsigned long ea = regs->dar;
unsigned long dsisr = regs->dsisr;
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
@@ -1563,9 +1562,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
err = 0;
} else if (err) {
- err = do_page_fault(regs);
+ err = hash__do_page_fault(regs);
}
+ exception_exit(prev_state);
+
return err;
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 81dbce473726..cc71c93cceaf 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -589,6 +589,15 @@ DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
}
NOKPROBE_SYMBOL(do_page_fault);
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
+long hash__do_page_fault(struct pt_regs *regs)
+{
+ return __do_page_fault(regs);
+}
+NOKPROBE_SYMBOL(hash__do_page_fault);
+#endif
+
/*
* bad_page_fault is called when we have a bad access from the kernel.
* It is called from the DSI and ISI handlers in head.S and from some
--
2.23.0
^ permalink raw reply related
* [PATCH v4 11/21] powerpc/64: context tracking move to interrupt wrappers
From: Nicholas Piggin @ 2021-01-02 12:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
This moves exception_enter/exit calls to wrapper functions for
synchronous interrupts. More interrupt handlers are covered by
this than previously.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 9 ++++
arch/powerpc/kernel/traps.c | 74 ++++++---------------------
arch/powerpc/mm/book3s64/hash_utils.c | 3 --
3 files changed, 26 insertions(+), 60 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index dfa846ebae43..7fab54a14152 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -7,10 +7,16 @@
#include <asm/ftrace.h>
struct interrupt_state {
+#ifdef CONFIG_PPC64
+ enum ctx_state ctx_state;
+#endif
};
static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+#ifdef CONFIG_PPC64
+ state->ctx_state = exception_enter();
+#endif
}
/*
@@ -29,6 +35,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
*/
static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+#ifdef CONFIG_PPC64
+ exception_exit(state->ctx_state);
+#endif
}
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 0b712c40272b..b2c53883580b 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1077,41 +1077,28 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
DEFINE_INTERRUPT_HANDLER(unknown_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
_exception(SIGTRAP, regs, TRAP_UNK, 0);
-
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
_exception(SIGTRAP, regs, TRAP_UNK, 0);
-
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
{
- enum ctx_state prev_state = exception_enter();
-
if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- goto bail;
+ return;
if (debugger_iabr_match(regs))
- goto bail;
+ return;
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
-
-bail:
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(RunModeException)
@@ -1121,8 +1108,6 @@ DEFINE_INTERRUPT_HANDLER(RunModeException)
DEFINE_INTERRUPT_HANDLER(single_step_exception)
{
- enum ctx_state prev_state = exception_enter();
-
clear_single_step(regs);
clear_br_trace(regs);
@@ -1131,14 +1116,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- goto bail;
+ return;
if (debugger_sstep(regs))
- goto bail;
+ return;
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
-
-bail:
- exception_exit(prev_state);
}
NOKPROBE_SYMBOL(single_step_exception);
@@ -1466,7 +1448,6 @@ static inline int emulate_math(struct pt_regs *regs) { return -1; }
DEFINE_INTERRUPT_HANDLER(program_check_exception)
{
- enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
/* We can now get here via a FP Unavailable exception if the core
@@ -1475,22 +1456,22 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
if (reason & REASON_FP) {
/* IEEE FP exception */
parse_fpe(regs);
- goto bail;
+ return;
}
if (reason & REASON_TRAP) {
unsigned long bugaddr;
/* Debugger is first in line to stop recursive faults in
* rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
- goto bail;
+ return;
if (kprobe_handler(regs))
- goto bail;
+ return;
/* trap exception */
if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
- goto bail;
+ return;
bugaddr = regs->nip;
/*
@@ -1502,10 +1483,10 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
if (!(regs->msr & MSR_PR) && /* not user-mode */
report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
regs->nip += 4;
- goto bail;
+ return;
}
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
- goto bail;
+ return;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (reason & REASON_TM) {
@@ -1526,7 +1507,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
*/
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
- goto bail;
+ return;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
"at %lx (msr 0x%lx) tm_scratch=%llx\n",
@@ -1557,7 +1538,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
* pattern to occurrences etc. -dgibson 31/Mar/2003
*/
if (!emulate_math(regs))
- goto bail;
+ return;
/* Try to emulate it if we should. */
if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
@@ -1565,10 +1546,10 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
case 0:
regs->nip += 4;
emulate_single_step(regs);
- goto bail;
+ return;
case -EFAULT:
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
- goto bail;
+ return;
}
}
@@ -1577,9 +1558,6 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
else
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-
-bail:
- exception_exit(prev_state);
}
NOKPROBE_SYMBOL(program_check_exception);
@@ -1596,14 +1574,12 @@ NOKPROBE_SYMBOL(emulation_assist_interrupt);
DEFINE_INTERRUPT_HANDLER(alignment_exception)
{
- enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
unsigned long reason;
interrupt_cond_local_irq_enable(regs);
reason = get_reason(regs);
-
if (reason & REASON_BOUNDARY) {
sig = SIGBUS;
code = BUS_ADRALN;
@@ -1611,7 +1587,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
}
if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
- goto bail;
+ return;
/* we don't implement logging of alignment exceptions */
if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
@@ -1621,7 +1597,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
/* skip over emulated instruction */
regs->nip += inst_length(reason);
emulate_single_step(regs);
- goto bail;
+ return;
}
/* Operand address was bad */
@@ -1637,9 +1613,6 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
_exception(sig, regs, code, regs->dar);
else
bad_page_fault(regs, sig);
-
-bail:
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(StackOverflow)
@@ -1653,41 +1626,28 @@ DEFINE_INTERRUPT_HANDLER(StackOverflow)
DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
{
- enum ctx_state prev_state = exception_enter();
-
die("Kernel stack overflow", regs, SIGSEGV);
-
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
-
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
{
- enum ctx_state prev_state = exception_enter();
-
if (user_mode(regs)) {
/* A user program has executed an altivec instruction,
but this kernel doesn't support altivec. */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- goto bail;
+ return;
}
printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
-
-bail:
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 801d5e94cd2b..662adafc92e0 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1513,7 +1513,6 @@ EXPORT_SYMBOL_GPL(hash_page);
DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
{
- enum ctx_state prev_state = exception_enter();
unsigned long ea = regs->dar;
unsigned long dsisr = regs->dsisr;
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
@@ -1565,8 +1564,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
err = hash__do_page_fault(regs);
}
- exception_exit(prev_state);
-
return err;
}
--
2.23.0
^ permalink raw reply related
* [PATCH v4 12/21] powerpc/64: add context tracking to asynchronous interrupts
From: Nicholas Piggin @ 2021-01-02 12:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
Previously context tracking was not done for asynchronous interrupts,
(those that run in interrupt context), and if those would cause a
reschedule when they exit, then scheduling functions (schedule_user,
preempt_schedule_irq) call exception_enter/exit to fix this up and
exit user context.
This is a hack we would like to get away from, so do context tracking
for asynchronous interrupts too.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 7fab54a14152..7c40ce78c4bb 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -42,10 +42,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+ interrupt_enter_prepare(regs, state);
}
static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+ interrupt_exit_prepare(regs, state);
}
struct interrupt_nmi_state {
--
2.23.0
^ permalink raw reply related
* [PATCH v4 13/21] powerpc: handle irq_enter/irq_exit in interrupt handler wrappers
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
Move irq_enter/irq_exit into asynchronous interrupt handler wrappers.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 2 ++
arch/powerpc/kernel/dbell.c | 3 +--
arch/powerpc/kernel/irq.c | 4 ----
arch/powerpc/kernel/tau_6xx.c | 3 ---
arch/powerpc/kernel/time.c | 4 ++--
arch/powerpc/kernel/traps.c | 6 ------
6 files changed, 5 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 7c40ce78c4bb..bee393c72fe5 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -43,10 +43,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
interrupt_enter_prepare(regs, state);
+ irq_enter();
}
static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+ irq_exit();
interrupt_exit_prepare(regs, state);
}
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index c0f99f8ffa7d..84ee9c511459 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -22,7 +22,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
#ifdef CONFIG_SMP
struct pt_regs *old_regs = set_irq_regs(regs);
- irq_enter();
trace_doorbell_entry(regs);
ppc_msgsync();
@@ -35,7 +34,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
smp_ipi_demux_relaxed(); /* already performed the barrier */
trace_doorbell_exit(regs);
- irq_exit();
+
set_irq_regs(old_regs);
#else /* CONFIG_SMP */
printk(KERN_WARNING "Received doorbell on non-smp system\n");
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 2055d204d08e..681abb7c0507 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -641,8 +641,6 @@ void __do_irq(struct pt_regs *regs)
{
unsigned int irq;
- irq_enter();
-
trace_irq_entry(regs);
/*
@@ -662,8 +660,6 @@ void __do_irq(struct pt_regs *regs)
generic_handle_irq(irq);
trace_irq_exit(regs);
-
- irq_exit();
}
DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 46b2e5de4ef5..d864f07bab74 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -104,12 +104,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException)
{
int cpu = smp_processor_id();
- irq_enter();
tau[cpu].interrupts++;
TAUupdate(cpu);
-
- irq_exit();
}
#endif /* CONFIG_TAU_INT */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 435a251247ed..2177defb7884 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -610,7 +610,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
#endif
old_regs = set_irq_regs(regs);
- irq_enter();
+
trace_timer_interrupt_entry(regs);
if (test_irq_work_pending()) {
@@ -635,7 +635,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
}
trace_timer_interrupt_exit(regs);
- irq_exit();
+
set_irq_regs(old_regs);
}
EXPORT_SYMBOL(timer_interrupt);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index b2c53883580b..b4f23e871a68 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1051,7 +1051,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
struct pt_regs *old_regs;
old_regs = set_irq_regs(regs);
- irq_enter();
#ifdef CONFIG_VSX
/* Real mode flagged P9 special emu is needed */
@@ -1071,7 +1070,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
if (ppc_md.handle_hmi_exception)
ppc_md.handle_hmi_exception(regs);
- irq_exit();
set_irq_regs(old_regs);
}
@@ -1889,13 +1887,9 @@ DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
{
- irq_enter();
-
__this_cpu_inc(irq_stat.pmu_irqs);
perf_irq(regs);
-
- irq_exit();
}
DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
--
2.23.0
^ permalink raw reply related
* [PATCH v4 14/21] powerpc/64s: move context tracking exit to interrupt exit path
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
The interrupt handler wrapper functions are not the ideal place to
maintain context tracking because after they return, the low level exit
code must then determine if there are interrupts to replay, or if the
task should be preempted, etc. Those paths (e.g., schedule_user) include
their own exception_enter/exit pairs to fix this up but it's a bit hacky
(see schedule_user() comments).
Ideally context tracking will go to user mode only when there are no
more interrupts or context switches or other exit processing work to
handle.
64e can not do this because it does not use the C interrupt exit code.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 34 +++++++++++++++++++++++++---
arch/powerpc/kernel/syscall_64.c | 9 ++++++++
2 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index bee393c72fe5..34d7cca2cb2e 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -7,16 +7,30 @@
#include <asm/ftrace.h>
struct interrupt_state {
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
enum ctx_state ctx_state;
#endif
};
static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
state->ctx_state = exception_enter();
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (user_mode(regs)) {
+ CT_WARN_ON(ct_state() != CONTEXT_USER);
+ user_exit_irqoff();
+ } else {
+ /*
+ * CT_WARN_ON comes here via program_check_exception,
+ * so avoid recursion.
+ */
+ if (TRAP(regs) != 0x700)
+ CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+ }
+#endif
}
/*
@@ -35,9 +49,23 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
*/
static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
exception_exit(state->ctx_state);
#endif
+
+ /*
+ * Book3S exits to user via interrupt_exit_user_prepare(), which does
+ * context tracking, which is a cleaner way to handle PREEMPT=y
+ * and avoid context entry/exit in e.g., preempt_schedule_irq()),
+ * which is likely to be where the core code wants to end up.
+ *
+ * The above comment explains why we can't do the
+ *
+ * if (user_mode(regs))
+ * user_exit_irqoff();
+ *
+ * sequence here.
+ */
}
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index d7d256a7a41f..42f0ad4b2fbb 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -305,6 +305,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
BUG_ON(regs->softe != IRQS_ENABLED);
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
@@ -347,7 +348,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
}
}
+ user_enter_irqoff();
if (unlikely(!prep_irq_for_enabled_exit(true))) {
+ user_exit_irqoff();
local_irq_enable();
local_irq_disable();
goto again;
@@ -392,6 +395,12 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
unrecoverable_exception(regs);
BUG_ON(regs->msr & MSR_PR);
BUG_ON(!FULL_REGS(regs));
+ /*
+ * CT_WARN_ON comes here via program_check_exception,
+ * so avoid recursion.
+ */
+ if (TRAP(regs) != 0x700)
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
amr = kuap_get_and_check_amr();
--
2.23.0
^ permalink raw reply related
* [PATCH v4 15/21] powerpc/64s: reconcile interrupts in C
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
There is no need for this to be in asm, use the new intrrupt entry wrapper.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 15 +++++++++++----
arch/powerpc/kernel/exceptions-64s.S | 26 --------------------------
2 files changed, 11 insertions(+), 30 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 34d7cca2cb2e..6eba7c489753 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -14,11 +14,14 @@ struct interrupt_state {
static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
-#ifdef CONFIG_PPC_BOOK3E_64
- state->ctx_state = exception_enter();
-#endif
-
+ /*
+ * Book3E reconciles irq soft mask in asm
+ */
#ifdef CONFIG_PPC_BOOK3S_64
+ if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
if (user_mode(regs)) {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();
@@ -31,6 +34,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
}
#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+ state->ctx_state = exception_enter();
+#endif
}
/*
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index fe2a24cd399f..398194711713 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -139,7 +139,6 @@ name:
#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
#define __ISTACK(name) .L_ISTACK_ ## name
-#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */
#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
#define INT_DEFINE_BEGIN(n) \
@@ -203,9 +202,6 @@ do_define_int n
.ifndef ISTACK
ISTACK=1
.endif
- .ifndef IRECONCILE
- IRECONCILE=1
- .endif
.ifndef IKUAP
IKUAP=1
.endif
@@ -653,10 +649,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.if ISTACK
ACCOUNT_STOLEN_TIME
.endif
-
- .if IRECONCILE
- RECONCILE_IRQ_STATE(r10, r11)
- .endif
.endm
/*
@@ -935,7 +927,6 @@ INT_DEFINE_BEGIN(system_reset)
*/
ISET_RI=0
ISTACK=0
- IRECONCILE=0
IKVM_REAL=1
INT_DEFINE_END(system_reset)
@@ -1123,7 +1114,6 @@ INT_DEFINE_BEGIN(machine_check_early)
ISTACK=0
IDAR=1
IDSISR=1
- IRECONCILE=0
IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
INT_DEFINE_END(machine_check_early)
@@ -1476,7 +1466,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
INT_DEFINE_BEGIN(data_access_slb)
IVEC=0x380
IAREA=PACA_EXSLB
- IRECONCILE=0
IDAR=1
IKVM_SKIP=1
IKVM_REAL=1
@@ -1503,7 +1492,6 @@ MMU_FTR_SECTION_ELSE
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b interrupt_return
@@ -1565,7 +1553,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
INT_DEFINE_BEGIN(instruction_access_slb)
IVEC=0x480
IAREA=PACA_EXSLB
- IRECONCILE=0
IISIDE=1
IDAR=1
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -1594,7 +1581,6 @@ MMU_FTR_SECTION_ELSE
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b interrupt_return
@@ -1754,7 +1740,6 @@ EXC_COMMON_BEGIN(program_check_common)
*/
INT_DEFINE_BEGIN(fp_unavailable)
IVEC=0x800
- IRECONCILE=0
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
@@ -1769,7 +1754,6 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
EXC_COMMON_BEGIN(fp_unavailable_common)
GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
0: trap
@@ -1788,7 +1772,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl fp_unavailable_tm
b interrupt_return
@@ -1853,7 +1836,6 @@ INT_DEFINE_BEGIN(hdecrementer)
IVEC=0x980
IHSRR=1
ISTACK=0
- IRECONCILE=0
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(hdecrementer)
@@ -2227,7 +2209,6 @@ INT_DEFINE_BEGIN(hmi_exception_early)
IHSRR=1
IREALMODE_COMMON=1
ISTACK=0
- IRECONCILE=0
IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
IKVM_REAL=1
INT_DEFINE_END(hmi_exception_early)
@@ -2401,7 +2382,6 @@ EXC_COMMON_BEGIN(performance_monitor_common)
*/
INT_DEFINE_BEGIN(altivec_unavailable)
IVEC=0xf20
- IRECONCILE=0
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
@@ -2431,7 +2411,6 @@ BEGIN_FTR_SECTION
b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_tm
b interrupt_return
@@ -2439,7 +2418,6 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_exception
b interrupt_return
@@ -2455,7 +2433,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
*/
INT_DEFINE_BEGIN(vsx_unavailable)
IVEC=0xf40
- IRECONCILE=0
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
@@ -2484,7 +2461,6 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_tm
b interrupt_return
@@ -2492,7 +2468,6 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_exception
b interrupt_return
@@ -2827,7 +2802,6 @@ EXC_VIRT_NONE(0x5800, 0x100)
INT_DEFINE_BEGIN(soft_nmi)
IVEC=0x900
ISTACK=0
- IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */
INT_DEFINE_END(soft_nmi)
/*
--
2.23.0
^ permalink raw reply related
* [PATCH v4 16/21] powerpc/64: move account_stolen_time into its own function
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
This will be used by interrupt entry as well.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/cputime.h | 15 +++++++++++++++
arch/powerpc/kernel/syscall_64.c | 10 +---------
2 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index ed75d1c318e3..3f61604e1fcf 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -87,6 +87,18 @@ static notrace inline void account_cpu_user_exit(void)
acct->starttime_user = tb;
}
+static notrace inline void account_stolen_time(void)
+{
+#ifdef CONFIG_PPC_SPLPAR
+ if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct lppaca *lp = local_paca->lppaca_ptr;
+
+ if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
+ accumulate_stolen_time();
+ }
+#endif
+}
#endif /* __KERNEL__ */
#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
@@ -96,5 +108,8 @@ static inline void account_cpu_user_entry(void)
static inline void account_cpu_user_exit(void)
{
}
+static notrace inline void account_stolen_time(void)
+{
+}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 42f0ad4b2fbb..32f72965da26 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -69,15 +69,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
account_cpu_user_entry();
-#ifdef CONFIG_PPC_SPLPAR
- if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
- firmware_has_feature(FW_FEATURE_SPLPAR)) {
- struct lppaca *lp = local_paca->lppaca_ptr;
-
- if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
- accumulate_stolen_time();
- }
-#endif
+ account_stolen_time();
/*
* This is not required for the syscall exit path, but makes the
--
2.23.0
^ permalink raw reply related
* [PATCH v4 17/21] powerpc/64: entry cpu time accounting in C
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
There is no need for this to be in asm, use the new intrrupt entry wrapper.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 7 +++++++
arch/powerpc/include/asm/ppc_asm.h | 24 ------------------------
arch/powerpc/kernel/exceptions-64e.S | 1 -
arch/powerpc/kernel/exceptions-64s.S | 5 -----
4 files changed, 7 insertions(+), 30 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 6eba7c489753..e278dffe7657 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -4,6 +4,7 @@
#include <linux/context_tracking.h>
#include <linux/hardirq.h>
+#include <asm/cputime.h>
#include <asm/ftrace.h>
struct interrupt_state {
@@ -25,6 +26,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
if (user_mode(regs)) {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();
+
+ account_cpu_user_entry();
+ account_stolen_time();
} else {
/*
* CT_WARN_ON comes here via program_check_exception,
@@ -38,6 +42,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
#ifdef CONFIG_PPC_BOOK3E_64
state->ctx_state = exception_enter();
#endif
+
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) && user_mode(regs))
+ account_cpu_user_entry();
}
/*
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index cc1bca571332..3dceb64fc9af 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -25,7 +25,6 @@
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)
#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)
-#define ACCOUNT_STOLEN_TIME
#else
#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \
MFTB(ra); /* get timebase */ \
@@ -44,29 +43,6 @@
PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \
add ra,ra,rb; /* add on to system time */ \
PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr)
-
-#ifdef CONFIG_PPC_SPLPAR
-#define ACCOUNT_STOLEN_TIME \
-BEGIN_FW_FTR_SECTION; \
- beq 33f; \
- /* from user - see if there are any DTL entries to process */ \
- ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \
- ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \
- addi r10,r10,LPPACA_DTLIDX; \
- LDX_BE r10,0,r10; /* get log write index */ \
- cmpd cr1,r11,r10; \
- beq+ cr1,33f; \
- bl accumulate_stolen_time; \
- ld r12,_MSR(r1); \
- andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \
-33: \
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-
-#else /* CONFIG_PPC_SPLPAR */
-#define ACCOUNT_STOLEN_TIME
-
-#endif /* CONFIG_PPC_SPLPAR */
-
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/*
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 52421042a020..87b3e74ded41 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -398,7 +398,6 @@ exc_##n##_common: \
std r10,_NIP(r1); /* save SRR0 to stackframe */ \
std r11,_MSR(r1); /* save SRR1 to stackframe */ \
beq 2f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \
2: ld r3,excf+EX_R10(r13); /* get back r10 */ \
ld r4,excf+EX_R11(r13); /* get back r11 */ \
mfspr r5,scratch; /* get back r13 */ \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 398194711713..e6e61c6f7298 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -577,7 +577,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
kuap_save_amr_and_lock r9, r10, cr1, cr0
.endif
beq 101f /* if from kernel mode */
- ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
BEGIN_FTR_SECTION
ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */
std r9,_PPR(r1)
@@ -645,10 +644,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r11,exception_marker@toc(r2)
std r10,RESULT(r1) /* clear regs->result */
std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
-
- .if ISTACK
- ACCOUNT_STOLEN_TIME
- .endif
.endm
/*
--
2.23.0
^ permalink raw reply related
* [PATCH v4 18/21] powerpc: move NMI entry/exit code into wrapper
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
This moves the common NMI entry and exit code into the interrupt handler
wrappers.
This changes the behaviour of soft-NMI (watchdog) and HMI interrupts, and
also MCE interrupts on 64e, by adding missing parts of the NMI entry to
them.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 24 ++++++++++++++++
arch/powerpc/kernel/mce.c | 11 --------
arch/powerpc/kernel/traps.c | 42 +++++-----------------------
arch/powerpc/kernel/watchdog.c | 10 +++----
4 files changed, 35 insertions(+), 52 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index e278dffe7657..01192e213f9a 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -95,14 +95,38 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int
}
struct interrupt_nmi_state {
+#ifdef CONFIG_PPC64
+ u8 ftrace_enabled;
+#endif
};
static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
{
+#ifdef CONFIG_PPC64
+ state->ftrace_enabled = this_cpu_get_ftrace_enabled();
+ this_cpu_set_ftrace_enabled(0);
+#endif
+
+ /*
+ * Do not use nmi_enter() for pseries hash guest taking a real-mode
+ * NMI because not everything it touches is within the RMA limit.
+ */
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) ||
+ !firmware_has_feature(FW_FEATURE_LPAR) ||
+ radix_enabled() || (mfmsr() & MSR_DR))
+ nmi_enter();
}
static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
{
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) ||
+ !firmware_has_feature(FW_FEATURE_LPAR) ||
+ radix_enabled() || (mfmsr() & MSR_DR))
+ nmi_exit();
+
+#ifdef CONFIG_PPC64
+ this_cpu_set_ftrace_enabled(state->ftrace_enabled);
+#endif
}
/**
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 54269947113d..51456217ec40 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -592,12 +592,6 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
{
long handled = 0;
- u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
-
- this_cpu_set_ftrace_enabled(0);
- /* Do not use nmi_enter/exit for pseries hpte guest */
- if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
- nmi_enter();
hv_nmi_check_nonrecoverable(regs);
@@ -607,11 +601,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
- if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
- nmi_exit();
-
- this_cpu_set_ftrace_enabled(ftrace_enabled);
-
return handled;
}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index b4f23e871a68..43d23232ef5c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -435,11 +435,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
{
unsigned long hsrr0, hsrr1;
bool saved_hsrrs = false;
- u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
-
- this_cpu_set_ftrace_enabled(0);
-
- nmi_enter();
/*
* System reset can interrupt code where HSRRs are live and MSR[RI]=1.
@@ -511,10 +506,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
mtspr(SPRN_HSRR1, hsrr1);
}
- nmi_exit();
-
- this_cpu_set_ftrace_enabled(ftrace_enabled);
-
/* What should we do here? We could issue a shutdown or hard reset. */
return 0;
@@ -792,6 +783,12 @@ int machine_check_generic(struct pt_regs *regs)
#endif /* everything else */
+/*
+ * BOOK3S_64 does not call this handler as a non-maskable interrupt
+ * (it uses its own early real-mode handler to handle the MCE proper
+ * and then raises irq_work to call this handler when interrupts are
+ * enabled).
+ */
#ifdef CONFIG_PPC_BOOK3S_64
DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception)
#else
@@ -800,20 +797,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
{
int recover = 0;
- /*
- * BOOK3S_64 does not call this handler as a non-maskable interrupt
- * (it uses its own early real-mode handler to handle the MCE proper
- * and then raises irq_work to call this handler when interrupts are
- * enabled).
- *
- * This is silly. The BOOK3S_64 should just call a different function
- * rather than expecting semantics to magically change. Something
- * like 'non_nmi_machine_check_exception()', perhaps?
- */
- const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64);
-
- if (nmi) nmi_enter();
-
__this_cpu_inc(irq_stat.mce_exceptions);
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -838,24 +821,17 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
if (check_io_access(regs))
goto bail;
- if (nmi) nmi_exit();
-
die("Machine check", regs, SIGBUS);
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
die("Unrecoverable Machine check", regs, SIGBUS);
-#ifdef CONFIG_PPC_BOOK3S_64
bail:
+#ifdef CONFIG_PPC_BOOK3S_64
return;
#else
return 0;
-
-bail:
- if (nmi) nmi_exit();
-
- return 0;
#endif
}
NOKPROBE_SYMBOL(machine_check_exception);
@@ -1873,14 +1849,10 @@ DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
#ifdef CONFIG_PPC64
DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
{
- nmi_enter();
-
__this_cpu_inc(irq_stat.pmu_irqs);
perf_irq(regs);
- nmi_exit();
-
return 0;
}
#endif
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 824b9376ac35..dc39534836a3 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -254,11 +254,12 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
int cpu = raw_smp_processor_id();
u64 tb;
+ /* should only arrive from kernel, with irqs disabled */
+ WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
+
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0;
- nmi_enter();
-
__this_cpu_inc(irq_stat.soft_nmi_irqs);
tb = get_tb();
@@ -266,7 +267,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
wd_smp_lock(&flags);
if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
wd_smp_unlock(&flags);
- goto out;
+ return 0;
}
set_cpu_stuck(cpu, tb);
@@ -290,9 +291,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
if (wd_panic_timeout_tb < 0x7fffffff)
mtspr(SPRN_DEC, wd_panic_timeout_tb);
-out:
- nmi_exit();
-
return 0;
}
--
2.23.0
^ permalink raw reply related
* [PATCH v4 19/21] powerpc/64s: move NMI soft-mask handling to C
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
Saving and restoring soft-mask state can now be done in C using the
interrupt handler wrapper functions.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 26 ++++++++++++
arch/powerpc/kernel/exceptions-64s.S | 60 ----------------------------
2 files changed, 26 insertions(+), 60 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 01192e213f9a..db89ecfef762 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -96,6 +96,10 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int
struct interrupt_nmi_state {
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
+ u8 irq_soft_mask;
+ u8 irq_happened;
+#endif
u8 ftrace_enabled;
#endif
};
@@ -103,6 +107,21 @@ struct interrupt_nmi_state {
static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
{
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
+ state->irq_soft_mask = local_paca->irq_soft_mask;
+ state->irq_happened = local_paca->irq_happened;
+
+ /*
+ * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
+ * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile
+ * because that goes through irq tracing which we don't want in NMI.
+ */
+ local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /* Don't do any per-CPU operations until interrupt state is fixed */
+ state->ftrace_enabled = this_cpu_get_ftrace_enabled();
+#endif
state->ftrace_enabled = this_cpu_get_ftrace_enabled();
this_cpu_set_ftrace_enabled(0);
#endif
@@ -126,6 +145,13 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
#ifdef CONFIG_PPC64
this_cpu_set_ftrace_enabled(state->ftrace_enabled);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Check we didn't change the pending interrupt mask. */
+ WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
+ local_paca->irq_happened = state->irq_happened;
+ local_paca->irq_soft_mask = state->irq_soft_mask;
+#endif
#endif
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e6e61c6f7298..f9a2751570ef 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1008,20 +1008,6 @@ EXC_COMMON_BEGIN(system_reset_common)
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
__GEN_COMMON_BODY system_reset
- /*
- * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
- * the right thing. We do not want to reconcile because that goes
- * through irq tracing which we don't want in NMI.
- *
- * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS
- * as we are running with MSR[EE]=0.
- */
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- lbz r10,PACAIRQHAPPENED(r13)
- std r10,RESULT(r1)
- ori r10,r10,PACA_IRQ_HARD_DIS
- stb r10,PACAIRQHAPPENED(r13)
addi r3,r1,STACK_FRAME_OVERHEAD
bl system_reset_exception
@@ -1037,14 +1023,6 @@ EXC_COMMON_BEGIN(system_reset_common)
subi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- /*
- * Restore soft mask settings.
- */
- ld r10,RESULT(r1)
- stb r10,PACAIRQHAPPENED(r13)
- ld r10,SOFTE(r1)
- stb r10,PACAIRQSOFTMASK(r13)
-
kuap_kernel_restore r9, r10
EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
@@ -1190,30 +1168,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
li r10,MSR_RI
mtmsrd r10,1
- /*
- * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see
- * system_reset_common)
- */
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- lbz r10,PACAIRQHAPPENED(r13)
- std r10,RESULT(r1)
- ori r10,r10,PACA_IRQ_HARD_DIS
- stb r10,PACAIRQHAPPENED(r13)
-
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
- /*
- * Restore soft mask settings.
- */
- ld r10,RESULT(r1)
- stb r10,PACAIRQHAPPENED(r13)
- ld r10,SOFTE(r1)
- stb r10,PACAIRQSOFTMASK(r13)
-
#ifdef CONFIG_PPC_P7_NAP
/*
* Check if thread was in power saving mode. We come here when any
@@ -2815,17 +2774,6 @@ EXC_COMMON_BEGIN(soft_nmi_common)
subi r1,r1,INT_FRAME_SIZE
__GEN_COMMON_BODY soft_nmi
- /*
- * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see
- * system_reset_common)
- */
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- lbz r10,PACAIRQHAPPENED(r13)
- std r10,RESULT(r1)
- ori r10,r10,PACA_IRQ_HARD_DIS
- stb r10,PACAIRQHAPPENED(r13)
-
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
@@ -2833,14 +2781,6 @@ EXC_COMMON_BEGIN(soft_nmi_common)
li r9,0
mtmsrd r9,1
- /*
- * Restore soft mask settings.
- */
- ld r10,RESULT(r1)
- stb r10,PACAIRQHAPPENED(r13)
- ld r10,SOFTE(r1)
- stb r10,PACAIRQSOFTMASK(r13)
-
kuap_kernel_restore r9, r10
EXCEPTION_RESTORE_REGS hsrr=0
RFI_TO_KERNEL
--
2.23.0
^ permalink raw reply related
* [PATCH v4 20/21] powerpc/64s: runlatch interrupt handling in C
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
There is no need for this to be in asm, use the new intrrupt entry wrapper.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 7 +++++++
arch/powerpc/kernel/exceptions-64s.S | 18 ------------------
2 files changed, 7 insertions(+), 18 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index db89ecfef762..9c16e9a48df6 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -6,6 +6,7 @@
#include <linux/hardirq.h>
#include <asm/cputime.h>
#include <asm/ftrace.h>
+#include <asm/runlatch.h>
struct interrupt_state {
#ifdef CONFIG_PPC_BOOK3E_64
@@ -84,6 +85,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_CTRL) &&
+ !test_thread_local_flags(_TLF_RUNLATCH))
+ __ppc64_runlatch_on();
+#endif
+
interrupt_enter_prepare(regs, state);
irq_enter();
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f9a2751570ef..05a358559274 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -692,14 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r1,GPR1(r1)
.endm
-#define RUNLATCH_ON \
-BEGIN_FTR_SECTION \
- ld r3, PACA_THREAD_INFO(r13); \
- ld r4,TI_LOCAL_FLAGS(r3); \
- andi. r0,r4,_TLF_RUNLATCH; \
- beql ppc64_runlatch_on_trampoline; \
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
/*
* When the idle code in power4_idle puts the CPU into NAP mode,
* it has to do so in a loop, and relies on the external interrupt
@@ -1582,7 +1574,6 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_COMMON hardware_interrupt
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
b interrupt_return
@@ -1768,7 +1759,6 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80)
EXC_COMMON_BEGIN(decrementer_common)
GEN_COMMON decrementer
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl timer_interrupt
b interrupt_return
@@ -1854,7 +1844,6 @@ EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
EXC_COMMON_BEGIN(doorbell_super_common)
GEN_COMMON doorbell_super
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
@@ -2209,7 +2198,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b interrupt_return
@@ -2239,7 +2227,6 @@ EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
EXC_COMMON_BEGIN(h_doorbell_common)
GEN_COMMON h_doorbell
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
@@ -2273,7 +2260,6 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
EXC_COMMON_BEGIN(h_virt_irq_common)
GEN_COMMON h_virt_irq
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
b interrupt_return
@@ -2320,7 +2306,6 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
b interrupt_return
@@ -3035,9 +3020,6 @@ kvmppc_skip_Hinterrupt:
* come here.
*/
-EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
- b __ppc64_runlatch_on
-
USE_FIXED_SECTION(virt_trampolines)
/*
* All code below __end_interrupts is treated as soft-masked. If
--
2.23.0
^ permalink raw reply related
* [PATCH v4 21/21] powerpc/64s: power4 nap fixup in C
From: Nicholas Piggin @ 2021-01-02 12:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210102122508.1950592-1-npiggin@gmail.com>
There is no need for this to be in asm, use the new intrrupt entry wrapper.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 15 +++++++++
arch/powerpc/include/asm/processor.h | 1 +
arch/powerpc/include/asm/thread_info.h | 6 ++++
arch/powerpc/kernel/exceptions-64s.S | 45 --------------------------
arch/powerpc/kernel/idle_book3s.S | 4 +++
5 files changed, 26 insertions(+), 45 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 9c16e9a48df6..4e290680f461 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -8,6 +8,16 @@
#include <asm/ftrace.h>
#include <asm/runlatch.h>
+static inline void nap_adjust_return(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_970_NAP
+ if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+ clear_thread_local_flags(_TLF_NAPPING);
+ regs->nip = (unsigned long)power4_idle_nap_return;
+ }
+#endif
+}
+
struct interrupt_state {
#ifdef CONFIG_PPC_BOOK3E_64
enum ctx_state ctx_state;
@@ -99,6 +109,9 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int
{
irq_exit();
interrupt_exit_prepare(regs, state);
+
+ /* Adjust at exit so the main handler sees the true NIA */
+ nap_adjust_return(regs);
}
struct interrupt_nmi_state {
@@ -150,6 +163,8 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
radix_enabled() || (mfmsr() & MSR_DR))
nmi_exit();
+ nap_adjust_return(regs);
+
#ifdef CONFIG_PPC64
this_cpu_set_ftrace_enabled(state->ftrace_enabled);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8acc3590c971..eedc3c775141 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -393,6 +393,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
#ifdef CONFIG_PPC_970_NAP
extern void power4_idle_nap(void);
+void power4_idle_nap_return(void);
#endif
extern unsigned long cpuidle_disable;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 386d576673a1..bf137151100b 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -152,6 +152,12 @@ void arch_setup_new_exec(void);
#ifndef __ASSEMBLY__
+static inline void clear_thread_local_flags(unsigned int flags)
+{
+ struct thread_info *ti = current_thread_info();
+ ti->local_flags &= ~flags;
+}
+
static inline bool test_thread_local_flags(unsigned int flags)
{
struct thread_info *ti = current_thread_info();
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 05a358559274..c3351c9a0a4c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -692,25 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r1,GPR1(r1)
.endm
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP \
-BEGIN_FTR_SECTION \
- ld r11, PACA_THREAD_INFO(r13); \
- ld r9,TI_LOCAL_FLAGS(r11); \
- andi. r10,r9,_TLF_NAPPING; \
- bnel power4_fixup_nap; \
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
-
/*
* There are a few constraints to be concerned with.
* - Real mode exceptions code/data must be located at their physical location.
@@ -1248,7 +1229,6 @@ EXC_COMMON_BEGIN(machine_check_common)
*/
GEN_COMMON machine_check
- FINISH_NAP
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
@@ -1573,7 +1553,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_COMMON hardware_interrupt
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
b interrupt_return
@@ -1758,7 +1737,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
EXC_VIRT_END(decrementer, 0x4900, 0x80)
EXC_COMMON_BEGIN(decrementer_common)
GEN_COMMON decrementer
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl timer_interrupt
b interrupt_return
@@ -1843,7 +1821,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
EXC_COMMON_BEGIN(doorbell_super_common)
GEN_COMMON doorbell_super
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
@@ -2197,7 +2174,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b interrupt_return
@@ -2226,7 +2202,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
EXC_COMMON_BEGIN(h_doorbell_common)
GEN_COMMON h_doorbell
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
@@ -2259,7 +2234,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
EXC_COMMON_BEGIN(h_virt_irq_common)
GEN_COMMON h_virt_irq
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
b interrupt_return
@@ -2305,7 +2279,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
b interrupt_return
@@ -3037,24 +3010,6 @@ USE_FIXED_SECTION(virt_trampolines)
__end_interrupts:
DEFINE_FIXED_SYMBOL(__end_interrupts)
-#ifdef CONFIG_PPC_970_NAP
- /*
- * Called by exception entry code if _TLF_NAPPING was set, this clears
- * the NAPPING flag, and redirects the exception exit to
- * power4_fixup_nap_return.
- */
- .globl power4_fixup_nap
-EXC_COMMON_BEGIN(power4_fixup_nap)
- andc r9,r9,r10
- std r9,TI_LOCAL_FLAGS(r11)
- LOAD_REG_ADDR(r10, power4_idle_nap_return)
- std r10,_NIP(r1)
- blr
-
-power4_idle_nap_return:
- blr
-#endif
-
CLOSE_FIXED_SECTION(real_vectors);
CLOSE_FIXED_SECTION(real_trampolines);
CLOSE_FIXED_SECTION(virt_vectors);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 22f249b6f58d..27d2e6a72ec9 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -201,4 +201,8 @@ _GLOBAL(power4_idle_nap)
mtmsrd r7
isync
b 1b
+
+ .globl power4_idle_nap_return
+power4_idle_nap_return:
+ blr
#endif
--
2.23.0
^ permalink raw reply related
* Re: [PATCH v4 02/21] powerpc/64s: move the last of the page fault handling logic to C
From: Christophe Leroy @ 2021-01-02 17:56 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: linuxppc-dev
In-Reply-To: <20210102122508.1950592-3-npiggin@gmail.com>
Nicholas Piggin <npiggin@gmail.com> a écrit :
> The page fault handling still has some complex logic particularly around
> hash table handling, in asm. Implement this in C instead.
Hi,
I'm afk at the moment and unable to look at this in details before one
week but this looks pretty complexe, especially the churn around
___do_page_fault
Do we really need 3 layers of do_page_fault() ?
I think it would likely be more straight forward to just move
handle_page_fault() to C.
There also seems to be some unrelated changes, like the (msr & MSR_PR)
changed to user_mode(regs).
Christophe
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 +
> arch/powerpc/kernel/exceptions-64s.S | 131 +++---------------
> arch/powerpc/mm/book3s64/hash_utils.c | 77 ++++++----
> arch/powerpc/mm/fault.c | 59 ++++++--
> 4 files changed, 119 insertions(+), 149 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> index 066b1d34c7bc..60a669379aa0 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> @@ -454,6 +454,7 @@ static inline unsigned long hpt_hash(unsigned long vpn,
> #define HPTE_NOHPTE_UPDATE 0x2
> #define HPTE_USE_KERNEL_KEY 0x4
>
> +int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned
> long dsisr);
> extern int __hash_page_4K(unsigned long ea, unsigned long access,
> unsigned long vsid, pte_t *ptep, unsigned long trap,
> unsigned long flags, int ssize, int subpage_prot);
> diff --git a/arch/powerpc/kernel/exceptions-64s.S
> b/arch/powerpc/kernel/exceptions-64s.S
> index e02ad6fefa46..bda91c79b261 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -1401,14 +1401,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> *
> * Handling:
> * - Hash MMU
> - * Go to do_hash_page first to see if the HPT can be filled from
> an entry in
> - * the Linux page table. Hash faults can hit in kernel mode in a fairly
> + * Go to do_hash_fault, which attempts to fill the HPT from an
> entry in the
> + * Linux page table. Hash faults can hit in kernel mode in a fairly
> * arbitrary state (e.g., interrupts disabled, locks held) when accessing
> * "non-bolted" regions, e.g., vmalloc space. However these
> should always be
> - * backed by Linux page tables.
> + * backed by Linux page table entries.
> *
> - * If none is found, do a Linux page fault. Linux page faults can
> happen in
> - * kernel mode due to user copy operations of course.
> + * If no entry is found the Linux page fault handler is invoked (by
> + * do_hash_fault). Linux page faults can happen in kernel mode due to user
> + * copy operations of course.
> *
> * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
> * MMU context, which may cause a DSI in the host, which must go to the
> @@ -1439,13 +1440,17 @@ EXC_COMMON_BEGIN(data_access_common)
> GEN_COMMON data_access
> ld r4,_DAR(r1)
> ld r5,_DSISR(r1)
> + addi r3,r1,STACK_FRAME_OVERHEAD
> BEGIN_MMU_FTR_SECTION
> - ld r6,_MSR(r1)
> - li r3,0x300
> - b do_hash_page /* Try to handle as hpte fault */
> + bl do_hash_fault
> MMU_FTR_SECTION_ELSE
> - b handle_page_fault
> + bl do_page_fault
> ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
> + cmpdi r3,0
> + beq+ interrupt_return
> + /* We need to restore NVGPRS */
> + REST_NVGPRS(r1)
> + b interrupt_return
>
> GEN_KVM data_access
>
> @@ -1540,13 +1545,17 @@ EXC_COMMON_BEGIN(instruction_access_common)
> GEN_COMMON instruction_access
> ld r4,_DAR(r1)
> ld r5,_DSISR(r1)
> + addi r3,r1,STACK_FRAME_OVERHEAD
> BEGIN_MMU_FTR_SECTION
> - ld r6,_MSR(r1)
> - li r3,0x400
> - b do_hash_page /* Try to handle as hpte fault */
> + bl do_hash_fault
> MMU_FTR_SECTION_ELSE
> - b handle_page_fault
> + bl do_page_fault
> ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
> + cmpdi r3,0
> + beq+ interrupt_return
> + /* We need to restore NVGPRS */
> + REST_NVGPRS(r1)
> + b interrupt_return
>
> GEN_KVM instruction_access
>
> @@ -3202,99 +3211,3 @@ disable_machine_check:
> RFI_TO_KERNEL
> 1: mtlr r0
> blr
> -
> -/*
> - * Hash table stuff
> - */
> - .balign IFETCH_ALIGN_BYTES
> -do_hash_page:
> -#ifdef CONFIG_PPC_BOOK3S_64
> - lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
> - ori r0,r0,DSISR_BAD_FAULT_64S@l
> - and. r0,r5,r0 /* weird error? */
> - bne- handle_page_fault /* if not, try to insert a HPTE */
> -
> - /*
> - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
> - * don't call hash_page, just fail the fault. This is required to
> - * prevent re-entrancy problems in the hash code, namely perf
> - * interrupts hitting while something holds H_PAGE_BUSY, and taking a
> - * hash fault. See the comment in hash_preload().
> - */
> - ld r11, PACA_THREAD_INFO(r13)
> - lwz r0,TI_PREEMPT(r11)
> - andis. r0,r0,NMI_MASK@h
> - bne 77f
> -
> - /*
> - * r3 contains the trap number
> - * r4 contains the faulting address
> - * r5 contains dsisr
> - * r6 msr
> - *
> - * at return r3 = 0 for success, 1 for page fault, negative for error
> - */
> - bl __hash_page /* build HPTE if possible */
> - cmpdi r3,0 /* see if __hash_page succeeded */
> -
> - /* Success */
> - beq interrupt_return /* Return from exception on success */
> -
> - /* Error */
> - blt- 13f
> -
> - /* Reload DAR/DSISR into r4/r5 for the DABR check below */
> - ld r4,_DAR(r1)
> - ld r5,_DSISR(r1)
> -#endif /* CONFIG_PPC_BOOK3S_64 */
> -
> -/* Here we have a page fault that hash_page can't handle. */
> -handle_page_fault:
> -11: andis. r0,r5,DSISR_DABRMATCH@h
> - bne- handle_dabr_fault
> - addi r3,r1,STACK_FRAME_OVERHEAD
> - bl do_page_fault
> - cmpdi r3,0
> - beq+ interrupt_return
> - mr r5,r3
> - addi r3,r1,STACK_FRAME_OVERHEAD
> - ld r4,_DAR(r1)
> - bl __bad_page_fault
> - b interrupt_return
> -
> -/* We have a data breakpoint exception - handle it */
> -handle_dabr_fault:
> - ld r4,_DAR(r1)
> - ld r5,_DSISR(r1)
> - addi r3,r1,STACK_FRAME_OVERHEAD
> - bl do_break
> - /*
> - * do_break() may have changed the NV GPRS while handling a breakpoint.
> - * If so, we need to restore them with their updated values.
> - */
> - REST_NVGPRS(r1)
> - b interrupt_return
> -
> -
> -#ifdef CONFIG_PPC_BOOK3S_64
> -/* We have a page fault that hash_page could handle but HV refused
> - * the PTE insertion
> - */
> -13: mr r5,r3
> - addi r3,r1,STACK_FRAME_OVERHEAD
> - ld r4,_DAR(r1)
> - bl low_hash_fault
> - b interrupt_return
> -#endif
> -
> -/*
> - * We come here as a result of a DSI at a point where we don't want
> - * to call hash_page, such as when we are accessing memory (possibly
> - * user memory) inside a PMU interrupt that occurred while interrupts
> - * were soft-disabled. We want to invoke the exception handler for
> - * the access, or panic if there isn't a handler.
> - */
> -77: addi r3,r1,STACK_FRAME_OVERHEAD
> - li r5,SIGSEGV
> - bl bad_page_fault
> - b interrupt_return
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c
> b/arch/powerpc/mm/book3s64/hash_utils.c
> index 73b06adb6eeb..5a61182ddf75 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -1512,16 +1512,40 @@ int hash_page(unsigned long ea, unsigned
> long access, unsigned long trap,
> }
> EXPORT_SYMBOL_GPL(hash_page);
>
> -int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr,
> - unsigned long msr)
> +int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned
> long dsisr)
> {
> unsigned long access = _PAGE_PRESENT | _PAGE_READ;
> unsigned long flags = 0;
> - struct mm_struct *mm = current->mm;
> - unsigned int region_id = get_region_id(ea);
> + struct mm_struct *mm;
> + unsigned int region_id;
> + int err;
> +
> + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_DABRMATCH |
> DSISR_KEYFAULT)))
> + goto page_fault;
> +
> + /*
> + * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
> + * don't call hash_page, just fail the fault. This is required to
> + * prevent re-entrancy problems in the hash code, namely perf
> + * interrupts hitting while something holds H_PAGE_BUSY, and taking a
> + * hash fault. See the comment in hash_preload().
> + *
> + * We come here as a result of a DSI at a point where we don't want
> + * to call hash_page, such as when we are accessing memory (possibly
> + * user memory) inside a PMU interrupt that occurred while interrupts
> + * were soft-disabled. We want to invoke the exception handler for
> + * the access, or panic if there isn't a handler.
> + */
> + if (unlikely(in_nmi())) {
> + bad_page_fault(regs, ea, SIGSEGV);
> + return 0;
> + }
>
> + region_id = get_region_id(ea);
> if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
> mm = &init_mm;
> + else
> + mm = current->mm;
>
> if (dsisr & DSISR_NOHPTE)
> flags |= HPTE_NOHPTE_UPDATE;
> @@ -1537,13 +1561,31 @@ int __hash_page(unsigned long trap, unsigned
> long ea, unsigned long dsisr,
> * 2) user space access kernel space.
> */
> access |= _PAGE_PRIVILEGED;
> - if ((msr & MSR_PR) || (region_id == USER_REGION_ID))
> + if (user_mode(regs) || (region_id == USER_REGION_ID))
> access &= ~_PAGE_PRIVILEGED;
>
> - if (trap == 0x400)
> + if (regs->trap == 0x400)
> access |= _PAGE_EXEC;
>
> - return hash_page_mm(mm, ea, access, trap, flags);
> + err = hash_page_mm(mm, ea, access, regs->trap, flags);
> + if (unlikely(err < 0)) {
> + // failed to instert a hash PTE due to an hypervisor error
> + if (user_mode(regs)) {
> + if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2)
> + _exception(SIGSEGV, regs, SEGV_ACCERR, ea);
> + else
> + _exception(SIGBUS, regs, BUS_ADRERR, ea);
> + } else {
> + bad_page_fault(regs, ea, SIGBUS);
> + }
> + err = 0;
> +
> + } else if (err) {
> +page_fault:
> + err = do_page_fault(regs, ea, dsisr);
> + }
> +
> + return err;
> }
>
> #ifdef CONFIG_PPC_MM_SLICES
> @@ -1843,27 +1885,6 @@ void flush_hash_range(unsigned long number, int local)
> }
> }
>
> -/*
> - * low_hash_fault is called when we the low level hash code failed
> - * to instert a PTE due to an hypervisor error
> - */
> -void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
> -{
> - enum ctx_state prev_state = exception_enter();
> -
> - if (user_mode(regs)) {
> -#ifdef CONFIG_PPC_SUBPAGE_PROT
> - if (rc == -2)
> - _exception(SIGSEGV, regs, SEGV_ACCERR, address);
> - else
> -#endif
> - _exception(SIGBUS, regs, BUS_ADRERR, address);
> - } else
> - bad_page_fault(regs, address, SIGBUS);
> -
> - exception_exit(prev_state);
> -}
> -
> long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
> unsigned long pa, unsigned long rflags,
> unsigned long vflags, int psize, int ssize)
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 8961b44f350c..6af4516d1c50 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -369,7 +369,9 @@ static void sanity_check_fault(bool is_write,
> bool is_user,
> #define page_fault_is_bad(__err) (0)
> #elif defined(CONFIG_PPC_8xx)
> #define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
> -#elif defined(CONFIG_PPC64)
> +#elif defined(CONFIG_PPC_BOOK3S_64)
> +#define page_fault_is_bad(__err) ((__err) & (DSISR_BAD_FAULT_64S |
> DSISR_DABRMATCH))
> +#elif defined(CONFIG_PPC_BOOK3E_64)
> #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S)
> #else
> #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
> @@ -388,7 +390,7 @@ static void sanity_check_fault(bool is_write,
> bool is_user,
> * The return value is 0 if the fault was handled, or the signal
> * number if this is a kernel fault that can't be handled here.
> */
> -static int __do_page_fault(struct pt_regs *regs, unsigned long address,
> +static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
> unsigned long error_code)
> {
> struct vm_area_struct * vma;
> @@ -404,6 +406,9 @@ static int __do_page_fault(struct pt_regs *regs,
> unsigned long address,
> return 0;
>
> if (unlikely(page_fault_is_bad(error_code))) {
> + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (error_code & DSISR_DABRMATCH))
> + return -1;
> +
> if (is_user) {
> _exception(SIGBUS, regs, BUS_OBJERR, address);
> return 0;
> @@ -540,25 +545,55 @@ static int __do_page_fault(struct pt_regs
> *regs, unsigned long address,
>
> return 0;
> }
> +NOKPROBE_SYMBOL(___do_page_fault);
> +
> +static int __do_page_fault(struct pt_regs *regs, unsigned long address,
> + unsigned long error_code)
> +{
> + int err;
> +
> + err = ___do_page_fault(regs, address, error_code);
> + if (unlikely(err)) {
> + const struct exception_table_entry *entry;
> +
> + entry = search_exception_tables(regs->nip);
> + if (likely(entry)) {
> + instruction_pointer_set(regs, extable_fixup(entry));
> + err = 0;
> + }
> + }
> +
> +#ifdef CONFIG_PPC_BOOK3S_64
> + /* 32 and 64e handle these errors in asm */
> + if (unlikely(err)) {
> + if (err > 0) {
> + __bad_page_fault(regs, address, err);
> + err = 0;
> + } else {
> + /*
> + * do_break() may change NV GPRS while handling the
> + * breakpoint. Return -ve to caller to do that.
> + */
> + do_break(regs, address, error_code);
> + }
> + }
> +#endif
> +
> + return err;
> +}
> NOKPROBE_SYMBOL(__do_page_fault);
>
> int do_page_fault(struct pt_regs *regs, unsigned long address,
> unsigned long error_code)
> {
> - const struct exception_table_entry *entry;
> enum ctx_state prev_state = exception_enter();
> - int rc = __do_page_fault(regs, address, error_code);
> - exception_exit(prev_state);
> - if (likely(!rc))
> - return 0;
> + int err;
>
> - entry = search_exception_tables(regs->nip);
> - if (unlikely(!entry))
> - return rc;
> + err = __do_page_fault(regs, address, error_code);
>
> - instruction_pointer_set(regs, extable_fixup(entry));
> + exception_exit(prev_state);
>
> - return 0;
> + return err;
> }
> NOKPROBE_SYMBOL(do_page_fault);
>
> --
> 2.23.0
^ permalink raw reply
* [PATCH v2] powerpc: fix alignment bug whithin the init sections
From: Ariel Marcovitch @ 2021-01-02 20:11 UTC (permalink / raw)
To: mpe
Cc: keescook, maskray, linux-kernel, npiggin, oss, paulus,
ariel.marcovitch, naveen.n.rao, linuxppc-dev, dja
This is a bug that causes early crashes in builds with a
.exit.text section smaller than a page and a .init.text section that
ends in the beginning of a physical page (this is kinda random, which
might explain why this wasn't really encountered before).
The init sections are ordered like this:
.init.text
.exit.text
.init.data
Currently, these sections aren't page aligned.
Because the init code might become read-only at runtime and because the
.init.text section can potentially reside on the same physical page as
.init.data, the beginning of .init.data might be mapped read-only along
with .init.text.
Then when the kernel tries to modify a variable in .init.data (like
kthreadd_done, used in kernel_init()) the kernel panics.
To avoid this, make _einittext page aligned and also align .exit.text
to make sure .init.data is always seperated from the text segments.
Fixes: 060ef9d89d18 ("powerpc32: PAGE_EXEC required for inittext")
Signed-off-by: Ariel Marcovitch <ariel.marcovitch@gmail.com>
---
arch/powerpc/kernel/vmlinux.lds.S | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 6db90cdf11da..b6c765d8e7ee 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -187,6 +187,11 @@ SECTIONS
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
INIT_TEXT
+
+ /* .init.text might be RO so we must
+ * ensure this section ends in a page boundary.
+ */
+ . = ALIGN(PAGE_SIZE);
_einittext = .;
#ifdef CONFIG_PPC64
*(.tramp.ftrace.init);
@@ -200,6 +205,8 @@ SECTIONS
EXIT_TEXT
}
+ . = ALIGN(PAGE_SIZE);
+
.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA
}
base-commit: 2c85ebc57b3e1817b6ce1a6b703928e113a90442
--
2.17.1
^ permalink raw reply related
* [PATCH 0/2] powerpc: fixes for 32-bit little-endian processes
From: Will Springer @ 2021-01-03 1:32 UTC (permalink / raw)
To: linuxppc-dev; +Cc: eerykitty, daniel
These are a couple small fixes that enable 32-bit little endian ("ppcle")
processes to run on a ppc64le kernel. Currently this is of interest for
the purposes of emulating ia32 programs with native userland assistance
via box86[1] (see PR#279 for initial ppc support), but a standalone
userland is functional, and may be used to complement a future ppcle
kernel port. We (those of us working on the userland effort in the
void-ppc project[2]) hope to come up with an ABI proposal to submit to
submit to the libc projects as a new port.
Cheers to Christophe Leroy and Michael Ellerman for converting the ppc
vDSO to C, and Michael in particular for tracking down a small issue
with it on ppcle, meaning the 32-bit LE vDSO gets to be functional
instead of half-broken with the old asm. (Sorry it took a minute to push
these patches, protonmail would not cooperate with git-send-email and then
I took off for the holidays.)
Cheers,
Will Springer [she/her]
[1]: https://github.com/ptitSeb/box86
[2]: https://voidlinux-ppc.org/
Joseph J Allen (1):
powerpc: use kernel endianness in MSR in 32-bit signal handler
Will Springer (1):
powerpc/compat_sys: swap hi/lo parts of 64-bit syscall args in LE mode
arch/powerpc/kernel/signal_32.c | 3 +-
arch/powerpc/kernel/sys_ppc32.c | 49 +++++++++++++++++++--------------
2 files changed, 30 insertions(+), 22 deletions(-)
--
2.29.2
^ permalink raw reply
* [PATCH 1/2] powerpc: use kernel endianness in MSR in 32-bit signal handler
From: Will Springer @ 2021-01-03 1:34 UTC (permalink / raw)
To: linuxppc-dev; +Cc: eerykitty, daniel
From: Joseph J Allen <eerykitty@gmail.com>
This mirrors the behavior in handle_rt_signal32, to obey kernel endianness
rather than assume a 32-bit process is big-endian. Without this change,
any 32-bit little-endian process will SIGILL immediately upon handling a
signal.
Signed-off-by: Joseph J Allen <eerykitty@gmail.com>
Signed-off-by: Will Springer <skirmisher@protonmail.com>
---
arch/powerpc/kernel/signal_32.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 934cbdf6dd10..75ee918a120a 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -929,8 +929,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
regs->gpr[3] = ksig->sig;
regs->gpr[4] = (unsigned long) sc;
regs->nip = (unsigned long)ksig->ka.sa.sa_handler;
- /* enter the signal handler in big-endian mode */
+ /* enter the signal handler in native-endian mode */
regs->msr &= ~MSR_LE;
+ regs->msr |= (MSR_KERNEL & MSR_LE);
return 0;
failed:
--
2.29.2
^ permalink raw reply related
* [PATCH 2/2] powerpc/compat_sys: swap hi/lo parts of 64-bit syscall args in LE mode
From: Will Springer @ 2021-01-03 1:35 UTC (permalink / raw)
To: linuxppc-dev; +Cc: eerykitty, daniel
Swap upper/lower 32 bits for 64-bit compat syscalls, conditioned on
endianness. This is modeled after the same functionality in
arch/mips/kernel/linux32.c.
This fixes compat_sys on ppc64le, when called by 32-bit little-endian
processes.
Tested with `file /bin/bash` (pread64) and `truncate -s 5G test`
(ftruncate64).
Signed-off-by: Will Springer <skirmisher@protonmail.com>
---
arch/powerpc/kernel/sys_ppc32.c | 49 +++++++++++++++++++--------------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index d36c6391eaf5..16ff0399a257 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -59,57 +59,64 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
/*
* long long munging:
* The 32 bit ABI passes long longs in an odd even register pair.
+ * High and low parts are swapped depending on endian mode,
+ * so define a macro (similar to mips linux32) to handle that.
*/
+#ifdef __LITTLE_ENDIAN__
+#define merge_64(low, high) ((u64)high << 32) | low
+#else
+#define merge_64(high, low) ((u64)high << 32) | low
+#endif
compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
- u32 reg6, u32 poshi, u32 poslo)
+ u32 reg6, u32 pos1, u32 pos2)
{
- return ksys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
+ return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
}
compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
- u32 reg6, u32 poshi, u32 poslo)
+ u32 reg6, u32 pos1, u32 pos2)
{
- return ksys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
+ return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
}
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count)
+compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count)
{
- return ksys_readahead(fd, ((loff_t)offhi << 32) | offlo, count);
+ return ksys_readahead(fd, merge_64(offset1, offset2), count);
}
asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
- unsigned long high, unsigned long low)
+ unsigned long len1, unsigned long len2)
{
- return ksys_truncate(path, (high << 32) | low);
+ return ksys_truncate(path, merge_64(len1, len2));
}
-asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
- u32 lenhi, u32 lenlo)
+asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2,
+ u32 len1, u32 len2)
{
- return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
- ((loff_t)lenhi << 32) | lenlo);
+ return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2,
+ merge_64(len1, len2));
}
-asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
- unsigned long low)
+asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
+ unsigned long len2)
{
- return ksys_ftruncate(fd, (high << 32) | low);
+ return ksys_ftruncate(fd, merge_64(len1, len2));
}
-long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low,
+long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
size_t len, int advice)
{
- return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, len,
+ return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
advice);
}
asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
- unsigned offset_hi, unsigned offset_lo,
- unsigned nbytes_hi, unsigned nbytes_lo)
+ unsigned offset1, unsigned offset2,
+ unsigned nbytes1, unsigned nbytes2)
{
- loff_t offset = ((loff_t)offset_hi << 32) | offset_lo;
- loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo;
+ loff_t offset = merge_64(offset1, offset2);
+ loff_t nbytes = merge_64(nbytes1, nbytes2);
return ksys_sync_file_range(fd, offset, nbytes, flags);
}
--
2.29.2
^ permalink raw reply related
* Re: [PATCH v4 02/21] powerpc/64s: move the last of the page fault handling logic to C
From: Nicholas Piggin @ 2021-01-03 9:17 UTC (permalink / raw)
To: Christophe Leroy; +Cc: linuxppc-dev
In-Reply-To: <20210102185630.Horde.GwG0xTTuKDzS6PsMZTUftw1@messagerie.c-s.fr>
Excerpts from Christophe Leroy's message of January 3, 2021 3:56 am:
> Nicholas Piggin <npiggin@gmail.com> a écrit :
>
>> The page fault handling still has some complex logic particularly around
>> hash table handling, in asm. Implement this in C instead.
>
> Hi,
>
> I'm afk at the moment and unable to look at this in details before one
> week but this looks pretty complexe, especially the churn around
> ___do_page_fault
> Do we really need 3 layers of do_page_fault() ?
Actually it doesn't, patch 10 wants it. I can move it to there at least
which should make this a bit less churn.
It's convenient because lots of return paths in __do_page_fault, but I
could try convert that to a `goto out` style.
> I think it would likely be more straight forward to just move
> handle_page_fault() to C.
The hash fault stuff makes things work better this way. Perhaps if I can
get to the bottom of the context tracking in the hash fault (I think we
perhaps should avoid it), then it could go back to a common code path.
> There also seems to be some unrelated changes, like the (msr & MSR_PR)
> changed to user_mode(regs).
That's part of making it callable from asm and the radix vs hash
prototypes the same so there are no added complexity in the asm:
>> @@ -1439,13 +1440,17 @@ EXC_COMMON_BEGIN(data_access_common)
>> GEN_COMMON data_access
>> ld r4,_DAR(r1)
>> ld r5,_DSISR(r1)
>> + addi r3,r1,STACK_FRAME_OVERHEAD
>> BEGIN_MMU_FTR_SECTION
>> - ld r6,_MSR(r1)
>> - li r3,0x300
>> - b do_hash_page /* Try to handle as hpte fault */
>> + bl do_hash_fault
>> MMU_FTR_SECTION_ELSE
>> - b handle_page_fault
>> + bl do_page_fault
>> ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
I'll see if anything can be done to move some such changes ahead.
Thanks,
Nick
^ permalink raw reply
* Re: [PATCH v2 -next] misc: ocxl: use DEFINE_MUTEX() for mutex lock
From: Frederic Barrat @ 2021-01-04 8:50 UTC (permalink / raw)
To: Zheng Yongjun, linuxppc-dev, linux-kernel; +Cc: gregkh, ajd, arnd
In-Reply-To: <20201224132446.31286-1-zhengyongjun3@huawei.com>
On 24/12/2020 14:24, Zheng Yongjun wrote:
> mutex lock can be initialized automatically with DEFINE_MUTEX()
> rather than explicitly calling mutex_init().
>
> Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
> ---
Thanks!
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
> drivers/misc/ocxl/file.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
> index 4d1b44de1492..e70525eedaae 100644
> --- a/drivers/misc/ocxl/file.c
> +++ b/drivers/misc/ocxl/file.c
> @@ -15,7 +15,7 @@
>
> static dev_t ocxl_dev;
> static struct class *ocxl_class;
> -static struct mutex minors_idr_lock;
> +static DEFINE_MUTEX(minors_idr_lock);
> static struct idr minors_idr;
>
> static struct ocxl_file_info *find_and_get_file_info(dev_t devno)
> @@ -588,7 +588,6 @@ int ocxl_file_init(void)
> {
> int rc;
>
> - mutex_init(&minors_idr_lock);
> idr_init(&minors_idr);
>
> rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl");
>
^ permalink raw reply
* Re: [PATCH] selftests/powerpc: make the test check in eeh-basic.sh posix compliant
From: Frederic Barrat @ 2021-01-04 9:46 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201228043459.14281-1-po-hsu.lin@canonical.com>
On 28/12/2020 05:34, Po-Hsu Lin wrote:
> The == operand is a bash extension, thus this will fail on Ubuntu with
>
Error message is somehow missing. It should read:
"./eeh-basic.sh: 89: test: 2: unexpected operator"
> As the /bin/sh on Ubuntu is pointed to DASH.
>
> Use -eq to fix this posix compatibility issue.
>
> Fixes: 996f9e0f93f162 ("selftests/powerpc: Fix eeh-basic.sh exit codes")
> Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
> ---
With the above,
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Thanks!
> tools/testing/selftests/powerpc/eeh/eeh-basic.sh | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
> index 0d783e1..64779f0 100755
> --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
> +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
> @@ -86,5 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)"
> lspci | diff -u $pre_lspci -
> rm -f $pre_lspci
>
> -test "$failed" == 0
> +test "$failed" -eq 0
> exit $?
>
^ permalink raw reply
* [PATCH] tools headers UAPI: Sync linux/types.h with the kernel sources
From: Michael Ellerman @ 2021-01-04 12:27 UTC (permalink / raw)
To: acme; +Cc: daniel, linuxppc-dev, linux-kernel, ast, vt, andrii, yhs, kafai
Unlike the other headers in tools/include/uapi/linux, types.h has
never been synced from the exported headers, instead it's been hand
written over time. This means it doesn't match the exported headers
which can cause build errors on some architectures.
For example on powerpc, tools/bpf doesn't build:
$ make O=/build -C tools/bpf/
make: Entering directory '/linux/tools/bpf'
Auto-detecting system features:
... libbfd: [ on ]
... disassembler-four-args: [ on ]
CC /build/bpf_jit_disasm.o
LINK /build/bpf_jit_disasm
CC /build/bpf_dbg.o
In file included from /usr/include/powerpc64le-linux-gnu/asm/sigcontext.h:14,
from /usr/include/powerpc64le-linux-gnu/bits/sigcontext.h:30,
from /usr/include/signal.h:288,
from /linux/tools/bpf/bpf_dbg.c:51:
/usr/include/powerpc64le-linux-gnu/asm/elf.h:160:9: error: unknown type name '__vector128'
160 | typedef __vector128 elf_vrreg_t;
| ^~~~~~~~~~~
make: *** [Makefile:67: /build/bpf_dbg.o] Error 1
This is because tools/include/uapi/linux/types.h doesn't include
asm/types.h, where __vector128 is defined in the powerpc headers.
We can fix it by syncing the tools header with the exported kernel
header, as is done for the other headers in tools/include/uapi/linux.
Reported-by: Vitaly Chikunov <vt@altlinux.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
Earlier discussion: https://lore.kernel.org/netdev/20201024203040.4cjxnxrdy6qx557c@altlinux.org/
Build tested on powerpc and x86.
---
tools/include/uapi/linux/types.h | 41 ++++++++++++++++++++++++++------
1 file changed, 34 insertions(+), 7 deletions(-)
diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..999cb0fa88eb 100644
--- a/tools/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
@@ -1,11 +1,26 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _UAPI_LINUX_TYPES_H
-#define _UAPI_LINUX_TYPES_H
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_TYPES_H
+#define _LINUX_TYPES_H
-#include <asm-generic/int-ll64.h>
+#include <asm/types.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/posix_types.h>
+
+
+/*
+ * Below are truly Linux-specific types that should never collide with
+ * any application/library that wants linux/types.h.
+ */
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+#define __bitwise __bitwise__
-/* copied from linux:include/uapi/linux/types.h */
-#define __bitwise
typedef __u16 __bitwise __le16;
typedef __u16 __bitwise __be16;
typedef __u32 __bitwise __le32;
@@ -16,8 +31,20 @@ typedef __u64 __bitwise __be64;
typedef __u16 __bitwise __sum16;
typedef __u32 __bitwise __wsum;
+/*
+ * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid
+ * common 32/64-bit compat problems.
+ * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other
+ * architectures) and to 8-byte boundaries on 64-bit architectures. The new
+ * aligned_64 type enforces 8-byte alignment so that structs containing
+ * aligned_64 values have the same alignment on 32-bit and 64-bit architectures.
+ * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
+ */
#define __aligned_u64 __u64 __attribute__((aligned(8)))
#define __aligned_be64 __be64 __attribute__((aligned(8)))
#define __aligned_le64 __le64 __attribute__((aligned(8)))
-#endif /* _UAPI_LINUX_TYPES_H */
+typedef unsigned __bitwise __poll_t;
+
+#endif /* __ASSEMBLY__ */
+#endif /* _LINUX_TYPES_H */
--
2.25.1
^ permalink raw reply related
* Re: [PATCH v9 05/12] mm: HUGE_VMAP arch support cleanup
From: Ding Tianhong @ 2021-01-04 12:33 UTC (permalink / raw)
To: Nicholas Piggin, linux-mm, Andrew Morton
Cc: linux-arch, H. Peter Anvin, Will Deacon, Catalin Marinas, x86,
linux-kernel, Christoph Hellwig, Zefan Li, Borislav Petkov,
Jonathan Cameron, Thomas Gleixner, Rick Edgecombe, linuxppc-dev,
Ingo Molnar, linux-arm-kernel
In-Reply-To: <20201205065725.1286370-6-npiggin@gmail.com>
On 2020/12/5 14:57, Nicholas Piggin wrote:
> This changes the awkward approach where architectures provide init
> functions to determine which levels they can provide large mappings for,
> to one where the arch is queried for each call.
>
> This removes code and indirection, and allows constant-folding of dead
> code for unsupported levels.
>
> This also adds a prot argument to the arch query. This is unused
> currently but could help with some architectures (e.g., some powerpc
> processors can't map uncacheable memory with large pages).
>
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: x86@kernel.org
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Acked-by: Catalin Marinas <catalin.marinas@arm.com> [arm64]
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/arm64/include/asm/vmalloc.h | 8 +++
> arch/arm64/mm/mmu.c | 10 +--
> arch/powerpc/include/asm/vmalloc.h | 8 +++
> arch/powerpc/mm/book3s64/radix_pgtable.c | 8 +--
> arch/x86/include/asm/vmalloc.h | 7 ++
> arch/x86/mm/ioremap.c | 10 +--
> include/linux/io.h | 9 ---
> include/linux/vmalloc.h | 6 ++
> init/main.c | 1 -
> mm/ioremap.c | 88 +++++++++---------------
> 10 files changed, 77 insertions(+), 78 deletions(-)
>
> diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
> index 2ca708ab9b20..597b40405319 100644
> --- a/arch/arm64/include/asm/vmalloc.h
> +++ b/arch/arm64/include/asm/vmalloc.h
> @@ -1,4 +1,12 @@
> #ifndef _ASM_ARM64_VMALLOC_H
> #define _ASM_ARM64_VMALLOC_H
>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> +bool arch_vmap_p4d_supported(pgprot_t prot);
> +bool arch_vmap_pud_supported(pgprot_t prot);
> +bool arch_vmap_pmd_supported(pgprot_t prot);
> +#endif
> +
> #endif /* _ASM_ARM64_VMALLOC_H */
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index ca692a815731..1b60079c1cef 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -1315,12 +1315,12 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
> return dt_virt;
> }
>
> -int __init arch_ioremap_p4d_supported(void)
> +bool arch_vmap_p4d_supported(pgprot_t prot)
> {
> - return 0;
> + return false;
> }
>
I think you should put this function in the CONFIG_HAVE_ARCH_HUGE_VMAP, otherwise it may break the compile when disable the CONFIG_HAVE_ARCH_HUGE_VMAP, the same
as the x86 and ppc.
Ding
> -int __init arch_ioremap_pud_supported(void)
> +bool arch_vmap_pud_supported(pgprot_t prot);
> {
> /*
> * Only 4k granule supports level 1 block mappings.
> @@ -1330,9 +1330,9 @@ int __init arch_ioremap_pud_supported(void)
> !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
> }
>
> -int __init arch_ioremap_pmd_supported(void)
> +bool arch_vmap_pmd_supported(pgprot_t prot)
> {
> - /* See arch_ioremap_pud_supported() */
> + /* See arch_vmap_pud_supported() */
> return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
> }
>
> diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
> index b992dfaaa161..105abb73f075 100644
> --- a/arch/powerpc/include/asm/vmalloc.h
> +++ b/arch/powerpc/include/asm/vmalloc.h
> @@ -1,4 +1,12 @@
> #ifndef _ASM_POWERPC_VMALLOC_H
> #define _ASM_POWERPC_VMALLOC_H
>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> +bool arch_vmap_p4d_supported(pgprot_t prot);
> +bool arch_vmap_pud_supported(pgprot_t prot);
> +bool arch_vmap_pmd_supported(pgprot_t prot);
> +#endif
> +
> #endif /* _ASM_POWERPC_VMALLOC_H */
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 3adcf730f478..ab426fc0cd4b 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -1121,13 +1121,13 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
> set_pte_at(mm, addr, ptep, pte);
> }
>
> -int __init arch_ioremap_pud_supported(void)
> +bool arch_vmap_pud_supported(pgprot_t prot)
> {
> /* HPT does not cope with large pages in the vmalloc area */
> return radix_enabled();
> }
>
> -int __init arch_ioremap_pmd_supported(void)
> +bool arch_vmap_pmd_supported(pgprot_t prot)
> {
> return radix_enabled();
> }
> @@ -1221,7 +1221,7 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
> return 1;
> }
>
> -int __init arch_ioremap_p4d_supported(void)
> +bool arch_vmap_p4d_supported(pgprot_t prot)
> {
> - return 0;
> + return false;
> }
> diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
> index 29837740b520..094ea2b565f3 100644
> --- a/arch/x86/include/asm/vmalloc.h
> +++ b/arch/x86/include/asm/vmalloc.h
> @@ -1,6 +1,13 @@
> #ifndef _ASM_X86_VMALLOC_H
> #define _ASM_X86_VMALLOC_H
>
> +#include <asm/page.h>
> #include <asm/pgtable_areas.h>
>
> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> +bool arch_vmap_p4d_supported(pgprot_t prot);
> +bool arch_vmap_pud_supported(pgprot_t prot);
> +bool arch_vmap_pmd_supported(pgprot_t prot);
> +#endif
> +
> #endif /* _ASM_X86_VMALLOC_H */
> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
> index 9e5ccc56f8e0..762b5ff4edad 100644
> --- a/arch/x86/mm/ioremap.c
> +++ b/arch/x86/mm/ioremap.c
> @@ -481,21 +481,21 @@ void iounmap(volatile void __iomem *addr)
> }
> EXPORT_SYMBOL(iounmap);
>
> -int __init arch_ioremap_p4d_supported(void)
> +bool arch_vmap_p4d_supported(pgprot_t prot)
> {
> - return 0;
> + return false;
> }
>
> -int __init arch_ioremap_pud_supported(void)
> +bool arch_vmap_pud_supported(pgprot_t prot)
> {
> #ifdef CONFIG_X86_64
> return boot_cpu_has(X86_FEATURE_GBPAGES);
> #else
> - return 0;
> + return false;
> #endif
> }
>
> -int __init arch_ioremap_pmd_supported(void)
> +bool arch_vmap_pmd_supported(pgprot_t prot)
> {
> return boot_cpu_has(X86_FEATURE_PSE);
> }
> diff --git a/include/linux/io.h b/include/linux/io.h
> index 8394c56babc2..f1effd4d7a3c 100644
> --- a/include/linux/io.h
> +++ b/include/linux/io.h
> @@ -31,15 +31,6 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end,
> }
> #endif
>
> -#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> -void __init ioremap_huge_init(void);
> -int arch_ioremap_p4d_supported(void);
> -int arch_ioremap_pud_supported(void);
> -int arch_ioremap_pmd_supported(void);
> -#else
> -static inline void ioremap_huge_init(void) { }
> -#endif
> -
> /*
> * Managed iomap interface
> */
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 938eaf9517e2..b3218ba0904d 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -85,6 +85,12 @@ struct vmap_area {
> };
> };
>
> +#ifndef CONFIG_HAVE_ARCH_HUGE_VMAP
> +static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; }
> +static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; }
> +static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; }
> +#endif
> +
> /*
> * Highlevel APIs for driver use
> */
> diff --git a/init/main.c b/init/main.c
> index 20baced721ad..5bd2f4f41d30 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -833,7 +833,6 @@ static void __init mm_init(void)
> pgtable_init();
> debug_objects_mem_init();
> vmalloc_init();
> - ioremap_huge_init();
> /* Should be run before the first non-init thread is created */
> init_espfix_bsp();
> /* Should be run after espfix64 is set up. */
> diff --git a/mm/ioremap.c b/mm/ioremap.c
> index 3f4d36f9745a..c67f91164401 100644
> --- a/mm/ioremap.c
> +++ b/mm/ioremap.c
> @@ -16,49 +16,16 @@
> #include "pgalloc-track.h"
>
> #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> -static int __read_mostly ioremap_p4d_capable;
> -static int __read_mostly ioremap_pud_capable;
> -static int __read_mostly ioremap_pmd_capable;
> -static int __read_mostly ioremap_huge_disabled;
> +static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT;
>
> static int __init set_nohugeiomap(char *str)
> {
> - ioremap_huge_disabled = 1;
> + iomap_max_page_shift = P4D_SHIFT;
> return 0;
> }
> early_param("nohugeiomap", set_nohugeiomap);
> -
> -void __init ioremap_huge_init(void)
> -{
> - if (!ioremap_huge_disabled) {
> - if (arch_ioremap_p4d_supported())
> - ioremap_p4d_capable = 1;
> - if (arch_ioremap_pud_supported())
> - ioremap_pud_capable = 1;
> - if (arch_ioremap_pmd_supported())
> - ioremap_pmd_capable = 1;
> - }
> -}
> -
> -static inline int ioremap_p4d_enabled(void)
> -{
> - return ioremap_p4d_capable;
> -}
> -
> -static inline int ioremap_pud_enabled(void)
> -{
> - return ioremap_pud_capable;
> -}
> -
> -static inline int ioremap_pmd_enabled(void)
> -{
> - return ioremap_pmd_capable;
> -}
> -
> -#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
> -static inline int ioremap_p4d_enabled(void) { return 0; }
> -static inline int ioremap_pud_enabled(void) { return 0; }
> -static inline int ioremap_pmd_enabled(void) { return 0; }
> +#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
> +static const bool iomap_max_page_shift = PAGE_SHIFT;
> #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
>
> static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
> @@ -82,9 +49,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
> }
>
> static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
> - phys_addr_t phys_addr, pgprot_t prot)
> + phys_addr_t phys_addr, pgprot_t prot,
> + unsigned int max_page_shift)
> {
> - if (!ioremap_pmd_enabled())
> + if (max_page_shift < PMD_SHIFT)
> + return 0;
> +
> + if (!arch_vmap_pmd_supported(prot))
> return 0;
>
> if ((end - addr) != PMD_SIZE)
> @@ -104,7 +75,7 @@ static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
>
> static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
> phys_addr_t phys_addr, pgprot_t prot,
> - pgtbl_mod_mask *mask)
> + unsigned int max_page_shift, pgtbl_mod_mask *mask)
> {
> pmd_t *pmd;
> unsigned long next;
> @@ -115,7 +86,7 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
> do {
> next = pmd_addr_end(addr, end);
>
> - if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) {
> + if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot, max_page_shift)) {
> *mask |= PGTBL_PMD_MODIFIED;
> continue;
> }
> @@ -127,9 +98,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
> }
>
> static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
> - phys_addr_t phys_addr, pgprot_t prot)
> + phys_addr_t phys_addr, pgprot_t prot,
> + unsigned int max_page_shift)
> {
> - if (!ioremap_pud_enabled())
> + if (max_page_shift < PUD_SHIFT)
> + return 0;
> +
> + if (!arch_vmap_pud_supported(prot))
> return 0;
>
> if ((end - addr) != PUD_SIZE)
> @@ -149,7 +124,7 @@ static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
>
> static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
> phys_addr_t phys_addr, pgprot_t prot,
> - pgtbl_mod_mask *mask)
> + unsigned int max_page_shift, pgtbl_mod_mask *mask)
> {
> pud_t *pud;
> unsigned long next;
> @@ -160,21 +135,25 @@ static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
> do {
> next = pud_addr_end(addr, end);
>
> - if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot)) {
> + if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot, max_page_shift)) {
> *mask |= PGTBL_PUD_MODIFIED;
> continue;
> }
>
> - if (vmap_pmd_range(pud, addr, next, phys_addr, prot, mask))
> + if (vmap_pmd_range(pud, addr, next, phys_addr, prot, max_page_shift, mask))
> return -ENOMEM;
> } while (pud++, phys_addr += (next - addr), addr = next, addr != end);
> return 0;
> }
>
> static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
> - phys_addr_t phys_addr, pgprot_t prot)
> + phys_addr_t phys_addr, pgprot_t prot,
> + unsigned int max_page_shift)
> {
> - if (!ioremap_p4d_enabled())
> + if (max_page_shift < P4D_SHIFT)
> + return 0;
> +
> + if (!arch_vmap_p4d_supported(prot))
> return 0;
>
> if ((end - addr) != P4D_SIZE)
> @@ -194,7 +173,7 @@ static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
>
> static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
> phys_addr_t phys_addr, pgprot_t prot,
> - pgtbl_mod_mask *mask)
> + unsigned int max_page_shift, pgtbl_mod_mask *mask)
> {
> p4d_t *p4d;
> unsigned long next;
> @@ -205,19 +184,20 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
> do {
> next = p4d_addr_end(addr, end);
>
> - if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) {
> + if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot, max_page_shift)) {
> *mask |= PGTBL_P4D_MODIFIED;
> continue;
> }
>
> - if (vmap_pud_range(p4d, addr, next, phys_addr, prot, mask))
> + if (vmap_pud_range(p4d, addr, next, phys_addr, prot, max_page_shift, mask))
> return -ENOMEM;
> } while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
> return 0;
> }
>
> static int vmap_range(unsigned long addr, unsigned long end,
> - phys_addr_t phys_addr, pgprot_t prot)
> + phys_addr_t phys_addr, pgprot_t prot,
> + unsigned int max_page_shift)
> {
> pgd_t *pgd;
> unsigned long start;
> @@ -232,7 +212,7 @@ static int vmap_range(unsigned long addr, unsigned long end,
> pgd = pgd_offset_k(addr);
> do {
> next = pgd_addr_end(addr, end);
> - err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, &mask);
> + err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, max_page_shift, &mask);
> if (err)
> break;
> } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
> @@ -248,7 +228,7 @@ static int vmap_range(unsigned long addr, unsigned long end,
> int ioremap_page_range(unsigned long addr,
> unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
> {
> - return vmap_range(addr, end, phys_addr, prot);
> + return vmap_range(addr, end, phys_addr, prot, iomap_max_page_shift);
> }
>
> #ifdef CONFIG_GENERIC_IOREMAP
>
^ permalink raw reply
* Patch "asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE" has been added to the 4.19-stable tree
From: gregkh @ 2021-01-04 12:55 UTC (permalink / raw)
To: greg, gregkh, linuxppc-dev, mpe, peterz, santosh, sashal; +Cc: stable-commits
In-Reply-To: <20200312132740.225241-4-santosh@fossix.org>
This is a note to let you know that I've just added the patch titled
asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE
to the 4.19-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
The filename of the patch is:
asm-generic-tlb-arch-invert-config_have_rcu_table_invalidate.patch
and it can be found in the queue-4.19 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.
From foo@baz Mon Jan 4 01:45:29 PM CET 2021
From: Santosh Sivaraj <santosh@fossix.org>
Date: Thu, 12 Mar 2020 18:57:37 +0530
Subject: asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE
To: <stable@vger.kernel.org>, linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>, Greg KH <greg@kroah.com>, Sasha Levin <sashal@kernel.org>, Peter Zijlstra <peterz@infradead.org>
Message-ID: <20200312132740.225241-4-santosh@fossix.org>
From: Peter Zijlstra <peterz@infradead.org>
commit 96bc9567cbe112e9320250f01b9c060c882e8619 upstream.
Make issuing a TLB invalidate for page-table pages the normal case.
The reason is twofold:
- too many invalidates is safer than too few,
- most architectures use the linux page-tables natively
and would thus require this.
Make it an opt-out, instead of an opt-in.
No change in behavior intended.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org> # 4.19
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
[santosh: prerequisite for upcoming tlbflush backports]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
arch/Kconfig | 2 +-
arch/powerpc/Kconfig | 1 +
arch/sparc/Kconfig | 1 +
arch/x86/Kconfig | 1 -
mm/memory.c | 2 +-
5 files changed, 4 insertions(+), 3 deletions(-)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -363,7 +363,7 @@ config HAVE_ARCH_JUMP_LABEL
config HAVE_RCU_TABLE_FREE
bool
-config HAVE_RCU_TABLE_INVALIDATE
+config HAVE_RCU_TABLE_NO_INVALIDATE
bool
config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -217,6 +217,7 @@ config PPC
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -64,6 +64,7 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -181,7 +181,6 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if PARAVIRT
- select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -339,7 +339,7 @@ bool __tlb_remove_page_size(struct mmu_g
*/
static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
-#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
/*
* Invalidate page-table caches used by hardware walkers. Then we still
* need to RCU-sched wait while freeing the pages because software
Patches currently in stable-queue which might be from santosh@fossix.org are
queue-4.19/asm-generic-tlb-track-which-levels-of-the-page-tables-have-been-cleared.patch
queue-4.19/asm-generic-tlb-track-freeing-of-page-table-directories-in-struct-mmu_gather.patch
queue-4.19/asm-generic-tlb-avoid-potential-double-flush.patch
queue-4.19/mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush.patch
queue-4.19/powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case.patch
queue-4.19/asm-generic-tlb-arch-invert-config_have_rcu_table_invalidate.patch
^ permalink raw reply
* Patch "asm-generic/tlb: avoid potential double flush" has been added to the 4.19-stable tree
From: gregkh @ 2021-01-04 12:55 UTC (permalink / raw)
To: aneesh.kumar, greg, gregkh, linuxppc-dev, mpe, peterz, santosh,
sashal
Cc: stable-commits
In-Reply-To: <20200312132740.225241-7-santosh@fossix.org>
This is a note to let you know that I've just added the patch titled
asm-generic/tlb: avoid potential double flush
to the 4.19-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
The filename of the patch is:
asm-generic-tlb-avoid-potential-double-flush.patch
and it can be found in the queue-4.19 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.
From foo@baz Mon Jan 4 01:45:29 PM CET 2021
From: Santosh Sivaraj <santosh@fossix.org>
Date: Thu, 12 Mar 2020 18:57:40 +0530
Subject: asm-generic/tlb: avoid potential double flush
To: <stable@vger.kernel.org>, linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>, Greg KH <greg@kroah.com>, Sasha Levin <sashal@kernel.org>, Peter Zijlstra <peterz@infradead.org>, "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Message-ID: <20200312132740.225241-7-santosh@fossix.org>
From: Peter Zijlstra <peterz@infradead.org>
commit 0758cd8304942292e95a0f750c374533db378b32 upstream.
Aneesh reported that:
tlb_flush_mmu()
tlb_flush_mmu_tlbonly()
tlb_flush() <-- #1
tlb_flush_mmu_free()
tlb_table_flush()
tlb_table_invalidate()
tlb_flush_mmu_tlbonly()
tlb_flush() <-- #2
does two TLBIs when tlb->fullmm, because __tlb_reset_range() will not
clear tlb->end in that case.
Observe that any caller to __tlb_adjust_range() also sets at least one of
the tlb->freed_tables || tlb->cleared_p* bits, and those are
unconditionally cleared by __tlb_reset_range().
Change the condition for actually issuing TLBI to having one of those bits
set, as opposed to having tlb->end != 0.
Link: http://lkml.kernel.org/r/20200116064531.483522-4-aneesh.kumar@linux.ibm.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reported-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: <stable@vger.kernel.org> # 4.19
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
[santosh: backported to 4.19 stable]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/asm-generic/tlb.h | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -179,7 +179,12 @@ static inline void __tlb_reset_range(str
static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
- if (!tlb->end)
+ /*
+ * Anything calling __tlb_adjust_range() also sets at least one of
+ * these bits.
+ */
+ if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
+ tlb->cleared_puds || tlb->cleared_p4ds))
return;
tlb_flush(tlb);
Patches currently in stable-queue which might be from santosh@fossix.org are
queue-4.19/asm-generic-tlb-track-which-levels-of-the-page-tables-have-been-cleared.patch
queue-4.19/asm-generic-tlb-track-freeing-of-page-table-directories-in-struct-mmu_gather.patch
queue-4.19/asm-generic-tlb-avoid-potential-double-flush.patch
queue-4.19/mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush.patch
queue-4.19/powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case.patch
queue-4.19/asm-generic-tlb-arch-invert-config_have_rcu_table_invalidate.patch
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox