* [RFC PATCH 04/12] powerpc: add interrupt_cond_local_irq_enable helper
From: Nicholas Piggin @ 2020-09-05 17:43 UTC (permalink / raw)
Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20200905174335.3161229-1-npiggin@gmail.com>
Simple helper for synchronous interrupt handlers to use to enable
interrupts if they were taken in interrupt-enabled context.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 7 +++++++
arch/powerpc/kernel/traps.c | 24 +++++++-----------------
arch/powerpc/mm/fault.c | 4 +---
3 files changed, 15 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 7c7e58541171..7231949fc1c8 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_INTERRUPT_H
#include <linux/context_tracking.h>
+#include <linux/hardirq.h>
#include <asm/ftrace.h>
/**
@@ -198,4 +199,10 @@ DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception);
void replay_system_reset(void);
void replay_soft_interrupts(void);
+static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
+{
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+}
+
#endif /* _ASM_POWERPC_INTERRUPT_H */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 96fa2d7e088c..a850647b7062 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -343,8 +343,8 @@ static bool exception_common(int signr, struct pt_regs *regs, int code,
show_signal_msg(signr, regs, code, addr);
- if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
- local_irq_enable();
+ if (arch_irqs_disabled())
+ interrupt_cond_local_irq_enable(regs);
current->thread.trap_nr = code;
@@ -1579,9 +1579,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
if (!user_mode(regs))
goto sigill;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
/* (reason & REASON_ILLEGAL) would be the obvious thing here,
* but there seems to be a hardware bug on the 405GP (RevD)
@@ -1635,9 +1633,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
int sig, code, fixed = 0;
unsigned long reason;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
reason = get_reason(regs);
@@ -1798,9 +1794,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
die("Unexpected facility unavailable exception", regs, SIGABRT);
}
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
if (status == FSCR_DSCR_LG) {
/*
@@ -2145,9 +2139,7 @@ void SPEFloatingPointException(struct pt_regs *regs)
int code = FPE_FLTUNK;
int err;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
flush_spe_to_thread(current);
@@ -2194,9 +2186,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
extern int speround_handler(struct pt_regs *regs);
int err;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
preempt_disable();
if (regs->msr & MSR_SPE)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7d63b5512068..fd8e28944293 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -441,9 +441,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
return bad_area_nosemaphore(regs, address);
}
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
--
2.23.0
^ permalink raw reply related
* [RFC PATCH 03/12] powerpc: interrupt handler wrapper functions
From: Nicholas Piggin @ 2020-09-05 17:43 UTC (permalink / raw)
Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20200905174335.3161229-1-npiggin@gmail.com>
Add wrapper functions (derived from x86 macros) for interrupt handler
functions. This allows interrupt entry code to be written in C.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/asm-prototypes.h | 28 ---
arch/powerpc/include/asm/bug.h | 1 -
arch/powerpc/include/asm/hw_irq.h | 9 -
arch/powerpc/include/asm/interrupt.h | 201 ++++++++++++++++++++++
arch/powerpc/include/asm/time.h | 2 +
arch/powerpc/kernel/dbell.c | 3 +-
arch/powerpc/kernel/exceptions-64s.S | 8 +-
arch/powerpc/kernel/irq.c | 3 +-
arch/powerpc/kernel/mce.c | 5 +-
arch/powerpc/kernel/tau_6xx.c | 2 +-
arch/powerpc/kernel/time.c | 3 +-
arch/powerpc/kernel/traps.c | 78 ++++++---
arch/powerpc/kernel/watchdog.c | 7 +-
arch/powerpc/kvm/book3s_hv_builtin.c | 1 +
arch/powerpc/mm/book3s64/hash_utils.c | 3 +-
arch/powerpc/mm/book3s64/slb.c | 5 +-
arch/powerpc/mm/fault.c | 10 +-
arch/powerpc/platforms/powernv/idle.c | 1 +
18 files changed, 290 insertions(+), 80 deletions(-)
create mode 100644 arch/powerpc/include/asm/interrupt.h
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index fffac9de2922..de4dad05e272 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -56,34 +56,6 @@ int exit_vmx_usercopy(void);
int enter_vmx_ops(void);
void *exit_vmx_ops(void *dest);
-/* Traps */
-long machine_check_early(struct pt_regs *regs);
-long hmi_exception_realmode(struct pt_regs *regs);
-void SMIException(struct pt_regs *regs);
-void handle_hmi_exception(struct pt_regs *regs);
-void instruction_breakpoint_exception(struct pt_regs *regs);
-void RunModeException(struct pt_regs *regs);
-void single_step_exception(struct pt_regs *regs);
-void program_check_exception(struct pt_regs *regs);
-void alignment_exception(struct pt_regs *regs);
-void StackOverflow(struct pt_regs *regs);
-void kernel_fp_unavailable_exception(struct pt_regs *regs);
-void altivec_unavailable_exception(struct pt_regs *regs);
-void vsx_unavailable_exception(struct pt_regs *regs);
-void fp_unavailable_tm(struct pt_regs *regs);
-void altivec_unavailable_tm(struct pt_regs *regs);
-void vsx_unavailable_tm(struct pt_regs *regs);
-void facility_unavailable_exception(struct pt_regs *regs);
-void TAUException(struct pt_regs *regs);
-void altivec_assist_exception(struct pt_regs *regs);
-void unrecoverable_exception(struct pt_regs *regs);
-void kernel_bad_stack(struct pt_regs *regs);
-void system_reset_exception(struct pt_regs *regs);
-void machine_check_exception(struct pt_regs *regs);
-void emulation_assist_interrupt(struct pt_regs *regs);
-long do_slb_fault(struct pt_regs *regs);
-void do_bad_slb_fault(struct pt_regs *regs);
-
/* signals, syscalls and interrupts */
long sys_swapcontext(struct ucontext __user *old_ctx,
struct ucontext __user *new_ctx,
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 2fa0cf6c6011..7b89cdbbb789 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -111,7 +111,6 @@
#ifndef __ASSEMBLY__
struct pt_regs;
-extern long do_page_fault(struct pt_regs *);
extern long hash__do_page_fault(struct pt_regs *);
extern void bad_page_fault(struct pt_regs *, unsigned long, int);
extern void _exception(int, struct pt_regs *, int, unsigned long);
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3a0db7b0b46e..19420e48bcd5 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -51,15 +51,6 @@
#ifndef __ASSEMBLY__
-extern void replay_system_reset(void);
-extern void replay_soft_interrupts(void);
-
-extern void timer_interrupt(struct pt_regs *);
-extern void timer_broadcast_interrupt(void);
-extern void performance_monitor_exception(struct pt_regs *regs);
-extern void WatchdogException(struct pt_regs *regs);
-extern void unknown_exception(struct pt_regs *regs);
-
#ifdef CONFIG_PPC64
#include <asm/paca.h>
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
new file mode 100644
index 000000000000..7c7e58541171
--- /dev/null
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_INTERRUPT_H
+#define _ASM_POWERPC_INTERRUPT_H
+
+#include <linux/context_tracking.h>
+#include <asm/ftrace.h>
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_RAW(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_RAW - Define raw interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * This is a plain function which does no tracing, reconciling, etc.
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_RAW(func) \
+static __always_inline long ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr long func(struct pt_regs *regs) \
+{ \
+ long ret; \
+ \
+ ret = ___##func (regs); \
+ \
+ return ret; \
+} \
+ \
+static __always_inline long ___##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER - Declare synchronous interrupt handler function
+ * @func: Function name of the entry point
+ */
+#define DECLARE_INTERRUPT_HANDLER(func) \
+ __visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER - Define synchronous interrupt handler function
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER(func) \
+static __always_inline void ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ ___##func (regs); \
+} \
+ \
+static __always_inline void ___##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_RET - Declare synchronous interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_RET(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_RET - Define synchronous interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_RET(func) \
+static __always_inline long ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr long func(struct pt_regs *regs) \
+{ \
+ long ret; \
+ \
+ ret = ___##func (regs); \
+ \
+ return ret; \
+} \
+ \
+static __always_inline long ___##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_ASYNC - Declare asynchronous interrupt handler function
+ * @func: Function name of the entry point
+ */
+#define DECLARE_INTERRUPT_HANDLER_ASYNC(func) \
+ __visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_ASYNC - Define asynchronous interrupt handler function
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_ASYNC(func) \
+static __always_inline void ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ ___##func (regs); \
+} \
+ \
+static __always_inline void ___##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_NMI - Declare NMI interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_NMI(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_NMI - Define NMI interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_NMI(func) \
+static __always_inline long ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr long func(struct pt_regs *regs) \
+{ \
+ long ret; \
+ \
+ ret = ___##func (regs); \
+ \
+ return ret; \
+} \
+ \
+static __always_inline long ___##func(struct pt_regs *regs)
+
+
+/* Interrupt handlers */
+DECLARE_INTERRUPT_HANDLER_NMI(machine_check_early);
+DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
+DECLARE_INTERRUPT_HANDLER(SMIException);
+DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
+DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception);
+DECLARE_INTERRUPT_HANDLER(RunModeException);
+DECLARE_INTERRUPT_HANDLER(single_step_exception);
+DECLARE_INTERRUPT_HANDLER(program_check_exception);
+DECLARE_INTERRUPT_HANDLER(alignment_exception);
+DECLARE_INTERRUPT_HANDLER(StackOverflow);
+DECLARE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(altivec_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(vsx_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(fp_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(altivec_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(vsx_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(facility_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(TAUException);
+DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
+DECLARE_INTERRUPT_HANDLER(unrecoverable_exception);
+DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
+DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
+#ifdef CONFIG_PPC_BOOK3S_64
+DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception);
+#else
+DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
+#endif
+DECLARE_INTERRUPT_HANDLER(emulation_assist_interrupt);
+DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault);
+DECLARE_INTERRUPT_HANDLER_RET(do_page_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_page_fault);
+
+DECLARE_INTERRUPT_HANDLER_ASYNC(timer_interrupt);
+DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception);
+DECLARE_INTERRUPT_HANDLER(WatchdogException);
+DECLARE_INTERRUPT_HANDLER(unknown_exception);
+DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception);
+
+void replay_system_reset(void);
+void replay_soft_interrupts(void);
+
+#endif /* _ASM_POWERPC_INTERRUPT_H */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index cb326720a8a1..27dca4d96fcc 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -181,6 +181,8 @@ DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
+void timer_broadcast_interrupt(void);
+
/* SPLPAR */
void accumulate_stolen_time(void);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 52680cf07c9d..2c59dee7ec90 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -12,13 +12,14 @@
#include <linux/hardirq.h>
#include <asm/dbell.h>
+#include <asm/interrupt.h>
#include <asm/irq_regs.h>
#include <asm/kvm_ppc.h>
#include <asm/trace.h>
#ifdef CONFIG_SMP
-void doorbell_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
struct pt_regs *old_regs = set_irq_regs(regs);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1f34cfd1887c..f6989321136d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1922,7 +1922,7 @@ EXC_COMMON_BEGIN(doorbell_super_common)
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
#else
- bl unknown_exception
+ bl unknown_async_exception
#endif
b interrupt_return
@@ -2135,9 +2135,7 @@ EXC_COMMON_BEGIN(h_data_storage_common)
GEN_COMMON h_data_storage
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r4,_DAR(r1)
- li r5,SIGSEGV
- bl bad_page_fault
+ bl do_bad_page_fault
MMU_FTR_SECTION_ELSE
bl unknown_exception
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
@@ -2310,7 +2308,7 @@ EXC_COMMON_BEGIN(h_doorbell_common)
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
#else
- bl unknown_exception
+ bl unknown_async_exception
#endif
b interrupt_return
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bf21ebd36190..cb4559bb4878 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -54,6 +54,7 @@
#include <linux/pgtable.h>
#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/cache.h>
@@ -725,7 +726,7 @@ void __do_irq(struct pt_regs *regs)
irq_exit();
}
-void do_IRQ(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
{
struct pt_regs *old_regs = set_irq_regs(regs);
void *cursp, *irqsp, *sirqsp;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ada59f6c4298..d0bbcc4fe13c 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -18,6 +18,7 @@
#include <linux/extable.h>
#include <linux/ftrace.h>
+#include <asm/interrupt.h>
#include <asm/machdep.h>
#include <asm/mce.h>
#include <asm/nmi.h>
@@ -588,7 +589,7 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
*
* regs->nip and regs->msr contains srr0 and ssr1.
*/
-long notrace machine_check_early(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
{
long handled = 0;
bool nested = in_nmi();
@@ -723,7 +724,7 @@ long hmi_handle_debugtrig(struct pt_regs *regs)
/*
* Return values:
*/
-long hmi_exception_realmode(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
{
int ret;
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index e2ab8a111b69..4ce3f7319140 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -124,7 +124,7 @@ static void TAUupdate(int cpu)
* with interrupts disabled
*/
-void TAUException(struct pt_regs * regs)
+DEFINE_INTERRUPT_HANDLER(TAUException) /* XXX async? */
{
int cpu = smp_processor_id();
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index f85539ebb513..6e26bc2aa24a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -56,6 +56,7 @@
#include <linux/processor.h>
#include <asm/trace.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/nvram.h>
#include <asm/cache.h>
@@ -564,7 +565,7 @@ void arch_irq_work_raise(void)
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
*/
-void timer_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
{
struct clock_event_device *evt = this_cpu_ptr(&decrementers);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d1ebe152f210..96fa2d7e088c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -41,6 +41,7 @@
#include <asm/emulated_ops.h>
#include <linux/uaccess.h>
#include <asm/debugfs.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -436,8 +437,9 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
regs->msr &= ~MSR_RI;
#endif
}
+/* XXX make this nokprobe? */
-void system_reset_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
{
unsigned long hsrr0, hsrr1;
bool saved_hsrrs = false;
@@ -522,6 +524,8 @@ void system_reset_exception(struct pt_regs *regs)
this_cpu_set_ftrace_enabled(ftrace_enabled);
/* What should we do here? We could issue a shutdown or hard reset. */
+
+ return 0;
}
/*
@@ -823,7 +827,12 @@ int machine_check_generic(struct pt_regs *regs)
}
#endif /* everything else */
-void machine_check_exception(struct pt_regs *regs)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception)
+#else
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
+#endif
{
int recover = 0;
@@ -873,13 +882,20 @@ void machine_check_exception(struct pt_regs *regs)
if (!(regs->msr & MSR_RI))
die("Unrecoverable Machine check", regs, SIGBUS);
+#ifdef CONFIG_PPC_BOOK3S_64
+bail:
return;
+#else
+ return 0;
bail:
if (nmi) nmi_exit();
+
+ return 0;
+#endif
}
-void SMIException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
{
die("System Management Interrupt", regs, SIGABRT);
}
@@ -1065,7 +1081,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
}
#endif /* CONFIG_VSX */
-void handle_hmi_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
{
struct pt_regs *old_regs;
@@ -1094,7 +1110,19 @@ void handle_hmi_exception(struct pt_regs *regs)
set_irq_regs(old_regs);
}
-void unknown_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(unknown_exception)
+{
+ enum ctx_state prev_state = exception_enter();
+
+ printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+ regs->nip, regs->msr, regs->trap);
+
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
+
+ exception_exit(prev_state);
+}
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1106,7 +1134,7 @@ void unknown_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void instruction_breakpoint_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1121,12 +1149,12 @@ void instruction_breakpoint_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void RunModeException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(RunModeException)
{
_exception(SIGTRAP, regs, TRAP_UNK, 0);
}
-void single_step_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(single_step_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1469,7 +1497,7 @@ static int emulate_math(struct pt_regs *regs)
static inline int emulate_math(struct pt_regs *regs) { return -1; }
#endif
-void program_check_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(program_check_exception)
{
enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
@@ -1594,14 +1622,14 @@ NOKPROBE_SYMBOL(program_check_exception);
* This occurs when running in hypervisor mode on POWER6 or later
* and an illegal instruction is encountered.
*/
-void emulation_assist_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
{
regs->msr |= REASON_ILLEGAL;
program_check_exception(regs);
}
NOKPROBE_SYMBOL(emulation_assist_interrupt);
-void alignment_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(alignment_exception)
{
enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
@@ -1651,7 +1679,7 @@ void alignment_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void StackOverflow(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(StackOverflow)
{
pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
current->comm, task_pid_nr(current), regs->gpr[1]);
@@ -1660,7 +1688,7 @@ void StackOverflow(struct pt_regs *regs)
panic("kernel stack overflow");
}
-void stack_overflow_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1669,7 +1697,7 @@ void stack_overflow_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void kernel_fp_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1680,7 +1708,7 @@ void kernel_fp_unavailable_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void altivec_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1699,7 +1727,7 @@ void altivec_unavailable_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void vsx_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
{
if (user_mode(regs)) {
/* A user program has executed an vsx instruction,
@@ -1730,7 +1758,7 @@ static void tm_unavailable(struct pt_regs *regs)
die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
}
-void facility_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
{
static char *facility_strings[] = {
[FSCR_FP_LG] = "FPU",
@@ -1850,7 +1878,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void fp_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm)
{
/* Note: This does not handle any kind of FP laziness. */
@@ -1883,7 +1911,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
tm_recheckpoint(¤t->thread);
}
-void altivec_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm)
{
/* See the comments in fp_unavailable_tm(). This function operates
* the same way.
@@ -1898,7 +1926,7 @@ void altivec_unavailable_tm(struct pt_regs *regs)
current->thread.used_vr = 1;
}
-void vsx_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
{
/* See the comments in fp_unavailable_tm(). This works similarly,
* though we're loading both FP and VEC registers in here.
@@ -1923,7 +1951,7 @@ void vsx_unavailable_tm(struct pt_regs *regs)
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-void performance_monitor_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception)
{
__this_cpu_inc(irq_stat.pmu_irqs);
@@ -2061,7 +2089,7 @@ NOKPROBE_SYMBOL(DebugException);
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
#ifdef CONFIG_ALTIVEC
-void altivec_assist_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
{
int err;
@@ -2203,7 +2231,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
* in the MSR is 0. This indicates that SRR0/1 are live, and that
* we therefore lost state by taking this exception.
*/
-void unrecoverable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(unrecoverable_exception)
{
pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
regs->trap, regs->nip, regs->msr);
@@ -2223,7 +2251,7 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
return;
}
-void WatchdogException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */
{
printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
WatchdogHandler(regs);
@@ -2234,7 +2262,7 @@ void WatchdogException(struct pt_regs *regs)
* We enter here if we discover during exception entry that we are
* running in supervisor mode with a userspace value in the stack pointer.
*/
-void kernel_bad_stack(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
{
printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
regs->gpr[1], regs->nip);
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index af3c15a1d41e..824b9376ac35 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -26,6 +26,7 @@
#include <linux/delay.h>
#include <linux/smp.h>
+#include <asm/interrupt.h>
#include <asm/paca.h>
/*
@@ -247,14 +248,14 @@ static void watchdog_timer_interrupt(int cpu)
watchdog_smp_panic(cpu, tb);
}
-void soft_nmi_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
{
unsigned long flags;
int cpu = raw_smp_processor_id();
u64 tb;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
- return;
+ return 0;
nmi_enter();
@@ -291,6 +292,8 @@ void soft_nmi_interrupt(struct pt_regs *regs)
out:
nmi_exit();
+
+ return 0;
}
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 073617ce83e0..17102ea50cf4 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -17,6 +17,7 @@
#include <asm/asm-prototypes.h>
#include <asm/cputable.h>
+#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/archrandom.h>
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 8d36b47b3f5a..360748a8042c 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -38,6 +38,7 @@
#include <linux/pgtable.h>
#include <asm/debugfs.h>
+#include <asm/interrupt.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -1500,7 +1501,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-long do_hash_fault(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_RET(do_hash_fault)
{
unsigned long ea = regs->dar;
unsigned long dsisr = regs->dsisr;
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 21832a2f15d7..fdbae002339e 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -10,6 +10,7 @@
*/
#include <asm/asm-prototypes.h>
+#include <asm/interrupt.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
@@ -837,7 +838,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
return slb_insert_entry(ea, context, flags, ssize, false);
}
-long do_slb_fault(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
{
unsigned long ea = regs->dar;
unsigned long id = get_region_id(ea);
@@ -890,7 +891,7 @@ long do_slb_fault(struct pt_regs *regs)
}
}
-void do_bad_slb_fault(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)
{
int err = regs->result;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 826f84311ae9..7d63b5512068 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -34,6 +34,7 @@
#include <linux/uaccess.h>
#include <asm/firmware.h>
+#include <asm/interrupt.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -547,7 +548,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
}
NOKPROBE_SYMBOL(__do_page_fault);
-long do_page_fault(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
{
enum ctx_state prev_state = exception_enter();
unsigned long address = regs->dar;
@@ -654,3 +655,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
die("Kernel access of bad area", regs, sig);
}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER(do_bad_page_fault)
+{
+ bad_page_fault(regs, regs->dar, SIGSEGV);
+}
+#endif
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 77513a80cef9..fef30a0da30b 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -14,6 +14,7 @@
#include <asm/asm-prototypes.h>
#include <asm/firmware.h>
+#include <asm/interrupt.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
--
2.23.0
^ permalink raw reply related
* [RFC PATCH 02/12] powerpc: remove arguments from interrupt handler functions
From: Nicholas Piggin @ 2020-09-05 17:43 UTC (permalink / raw)
Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20200905174335.3161229-1-npiggin@gmail.com>
Make interrupt handlers all just take the pt_regs * argument and load
DAR/DSISR etc from that. Make those that return a value return long.
This is done to make the function signatures match more closely, which
will help with a future patch to add wrappers. Explicit arguments could
be re-added for performance in future but that would require more
complex wrapper macros.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/asm-prototypes.h | 4 ++--
arch/powerpc/include/asm/bug.h | 4 ++--
arch/powerpc/kernel/exceptions-64e.S | 2 --
arch/powerpc/kernel/exceptions-64s.S | 14 ++------------
arch/powerpc/mm/book3s64/hash_utils.c | 8 +++++---
arch/powerpc/mm/book3s64/slb.c | 11 +++++++----
arch/powerpc/mm/fault.c | 16 +++++++++-------
7 files changed, 27 insertions(+), 32 deletions(-)
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index de14b1a34d56..fffac9de2922 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -81,8 +81,8 @@ void kernel_bad_stack(struct pt_regs *regs);
void system_reset_exception(struct pt_regs *regs);
void machine_check_exception(struct pt_regs *regs);
void emulation_assist_interrupt(struct pt_regs *regs);
-long do_slb_fault(struct pt_regs *regs, unsigned long ea);
-void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err);
+long do_slb_fault(struct pt_regs *regs);
+void do_bad_slb_fault(struct pt_regs *regs);
/* signals, syscalls and interrupts */
long sys_swapcontext(struct ucontext __user *old_ctx,
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index d714d83bbc7c..2fa0cf6c6011 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -111,8 +111,8 @@
#ifndef __ASSEMBLY__
struct pt_regs;
-extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
-extern int hash__do_page_fault(struct pt_regs *, unsigned long, unsigned long);
+extern long do_page_fault(struct pt_regs *);
+extern long hash__do_page_fault(struct pt_regs *);
extern void bad_page_fault(struct pt_regs *, unsigned long, int);
extern void _exception(int, struct pt_regs *, int, unsigned long);
extern void _exception_pkey(struct pt_regs *, unsigned long, int);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index d9ed79415100..5988d61783b5 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1012,8 +1012,6 @@ storage_fault_common:
std r14,_DAR(r1)
std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
- mr r5,r15
ld r14,PACA_EXGEN+EX_R14(r13)
ld r15,PACA_EXGEN+EX_R15(r13)
bl do_page_fault
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f830b893fe03..1f34cfd1887c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1437,8 +1437,6 @@ EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
EXC_VIRT_END(data_access, 0x4300, 0x80)
EXC_COMMON_BEGIN(data_access_common)
GEN_COMMON data_access
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
@@ -1491,10 +1489,9 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
EXC_COMMON_BEGIN(data_access_slb_common)
GEN_COMMON data_access_slb
- ld r4,_DAR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl do_slb_fault
cmpdi r3,0
bne- 1f
@@ -1506,8 +1503,6 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_DAR(r1)
- ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b interrupt_return
@@ -1542,8 +1537,6 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
EXC_VIRT_END(instruction_access, 0x4400, 0x80)
EXC_COMMON_BEGIN(instruction_access_common)
GEN_COMMON instruction_access
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
@@ -1587,10 +1580,9 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
EXC_COMMON_BEGIN(instruction_access_slb_common)
GEN_COMMON instruction_access_slb
- ld r4,_DAR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl do_slb_fault
cmpdi r3,0
bne- 1f
@@ -1602,8 +1594,6 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_DAR(r1)
- ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b interrupt_return
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index fd7b6bb7030d..8d36b47b3f5a 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1500,13 +1500,15 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr)
+long do_hash_fault(struct pt_regs *regs)
{
+ unsigned long ea = regs->dar;
+ unsigned long dsisr = regs->dsisr;
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
struct mm_struct *mm;
unsigned int region_id;
- int err;
+ long err;
if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)))
goto _do_page_fault;
@@ -1570,7 +1572,7 @@ int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr)
} else if (err) {
_do_page_fault:
- err = hash__do_page_fault(regs, ea, dsisr);
+ err = hash__do_page_fault(regs);
}
return err;
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 156c38f89511..21832a2f15d7 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -837,8 +837,9 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
return slb_insert_entry(ea, context, flags, ssize, false);
}
-long do_slb_fault(struct pt_regs *regs, unsigned long ea)
+long do_slb_fault(struct pt_regs *regs)
{
+ unsigned long ea = regs->dar;
unsigned long id = get_region_id(ea);
/* IRQs are not reconciled here, so can't check irqs_disabled */
@@ -889,13 +890,15 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea)
}
}
-void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
+void do_bad_slb_fault(struct pt_regs *regs)
{
+ int err = regs->result;
+
if (err == -EFAULT) {
if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+ _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
else
- bad_page_fault(regs, ea, SIGSEGV);
+ bad_page_fault(regs, regs->dar, SIGSEGV);
} else if (err == -EINVAL) {
unrecoverable_exception(regs);
} else {
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ce43e401e0e0..826f84311ae9 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -547,11 +547,12 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
}
NOKPROBE_SYMBOL(__do_page_fault);
-int do_page_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+long do_page_fault(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
- int err;
+ unsigned long address = regs->dar;
+ unsigned long error_code = regs->dsisr;
+ long err;
err = __do_page_fault(regs, address, error_code);
@@ -578,11 +579,12 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
NOKPROBE_SYMBOL(do_page_fault);
#ifdef CONFIG_PPC_BOOK3S_64
-/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
-int hash__do_page_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+/* Same as do_page_fault but no interrupt entry */
+long hash__do_page_fault(struct pt_regs *regs)
{
- int err;
+ unsigned long address = regs->dar;
+ unsigned long error_code = regs->dsisr;
+ long err;
err = __do_page_fault(regs, address, error_code);
if (unlikely(err)) {
--
2.23.0
^ permalink raw reply related
* [RFC PATCH 01/12] powerpc/64s: move the last of the page fault handling logic to C
From: Nicholas Piggin @ 2020-09-05 17:43 UTC (permalink / raw)
Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20200905174335.3161229-1-npiggin@gmail.com>
The page fault handling still has some complex logic particularly around
hash table handling, in asm. Implement this in C instead.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/bug.h | 1 +
arch/powerpc/kernel/exceptions-64s.S | 131 +++++---------------------
arch/powerpc/mm/book3s64/hash_utils.c | 77 +++++++++------
arch/powerpc/mm/fault.c | 55 ++++++++++-
4 files changed, 124 insertions(+), 140 deletions(-)
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 338f36cd9934..d714d83bbc7c 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -112,6 +112,7 @@
struct pt_regs;
extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
+extern int hash__do_page_fault(struct pt_regs *, unsigned long, unsigned long);
extern void bad_page_fault(struct pt_regs *, unsigned long, int);
extern void _exception(int, struct pt_regs *, int, unsigned long);
extern void _exception_pkey(struct pt_regs *, unsigned long, int);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f7d748b88705..f830b893fe03 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1403,14 +1403,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
*
* Handling:
* - Hash MMU
- * Go to do_hash_page first to see if the HPT can be filled from an entry in
- * the Linux page table. Hash faults can hit in kernel mode in a fairly
+ * Go to do_hash_fault, which attempts to fill the HPT from an entry in the
+ * Linux page table. Hash faults can hit in kernel mode in a fairly
* arbitrary state (e.g., interrupts disabled, locks held) when accessing
* "non-bolted" regions, e.g., vmalloc space. However these should always be
- * backed by Linux page tables.
+ * backed by Linux page table entries.
*
- * If none is found, do a Linux page fault. Linux page faults can happen in
- * kernel mode due to user copy operations of course.
+ * If no entry is found the Linux page fault handler is invoked (by
+ * do_hash_fault). Linux page faults can happen in kernel mode due to user
+ * copy operations of course.
*
* - Radix MMU
* The hardware loads from the Linux page table directly, so a fault goes
@@ -1438,13 +1439,17 @@ EXC_COMMON_BEGIN(data_access_common)
GEN_COMMON data_access
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r6,_MSR(r1)
- li r3,0x300
- b do_hash_page /* Try to handle as hpte fault */
+ bl do_hash_fault
MMU_FTR_SECTION_ELSE
- b handle_page_fault
+ bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ cmpdi r3,0
+ beq+ interrupt_return
+ /* We need to restore NVGPRS */
+ REST_NVGPRS(r1)
+ b interrupt_return
GEN_KVM data_access
@@ -1539,13 +1544,17 @@ EXC_COMMON_BEGIN(instruction_access_common)
GEN_COMMON instruction_access
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r6,_MSR(r1)
- li r3,0x400
- b do_hash_page /* Try to handle as hpte fault */
+ bl do_hash_fault
MMU_FTR_SECTION_ELSE
- b handle_page_fault
+ bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ cmpdi r3,0
+ beq+ interrupt_return
+ /* We need to restore NVGPRS */
+ REST_NVGPRS(r1)
+ b interrupt_return
GEN_KVM instruction_access
@@ -3197,99 +3206,3 @@ disable_machine_check:
RFI_TO_KERNEL
1: mtlr r0
blr
-
-/*
- * Hash table stuff
- */
- .balign IFETCH_ALIGN_BYTES
-do_hash_page:
-#ifdef CONFIG_PPC_BOOK3S_64
- lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
- ori r0,r0,DSISR_BAD_FAULT_64S@l
- and. r0,r5,r0 /* weird error? */
- bne- handle_page_fault /* if not, try to insert a HPTE */
-
- /*
- * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
- * don't call hash_page, just fail the fault. This is required to
- * prevent re-entrancy problems in the hash code, namely perf
- * interrupts hitting while something holds H_PAGE_BUSY, and taking a
- * hash fault. See the comment in hash_preload().
- */
- ld r11, PACA_THREAD_INFO(r13)
- lwz r0,TI_PREEMPT(r11)
- andis. r0,r0,NMI_MASK@h
- bne 77f
-
- /*
- * r3 contains the trap number
- * r4 contains the faulting address
- * r5 contains dsisr
- * r6 msr
- *
- * at return r3 = 0 for success, 1 for page fault, negative for error
- */
- bl __hash_page /* build HPTE if possible */
- cmpdi r3,0 /* see if __hash_page succeeded */
-
- /* Success */
- beq interrupt_return /* Return from exception on success */
-
- /* Error */
- blt- 13f
-
- /* Reload DAR/DSISR into r4/r5 for the DABR check below */
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-/* Here we have a page fault that hash_page can't handle. */
-handle_page_fault:
-11: andis. r0,r5,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_page_fault
- cmpdi r3,0
- beq+ interrupt_return
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl bad_page_fault
- b interrupt_return
-
-/* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_break
- /*
- * do_break() may have changed the NV GPRS while handling a breakpoint.
- * If so, we need to restore them with their updated values.
- */
- REST_NVGPRS(r1)
- b interrupt_return
-
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/* We have a page fault that hash_page could handle but HV refused
- * the PTE insertion
- */
-13: mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl low_hash_fault
- b interrupt_return
-#endif
-
-/*
- * We come here as a result of a DSI at a point where we don't want
- * to call hash_page, such as when we are accessing memory (possibly
- * user memory) inside a PMU interrupt that occurred while interrupts
- * were soft-disabled. We want to invoke the exception handler for
- * the access, or panic if there isn't a handler.
- */
-77: addi r3,r1,STACK_FRAME_OVERHEAD
- li r5,SIGSEGV
- bl bad_page_fault
- b interrupt_return
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 1da9dbba9217..fd7b6bb7030d 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1500,16 +1500,40 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr,
- unsigned long msr)
+int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr)
{
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
- struct mm_struct *mm = current->mm;
- unsigned int region_id = get_region_id(ea);
+ struct mm_struct *mm;
+ unsigned int region_id;
+ int err;
+
+ if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)))
+ goto _do_page_fault;
+
+ /*
+ * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
+ * don't call hash_page, just fail the fault. This is required to
+ * prevent re-entrancy problems in the hash code, namely perf
+ * interrupts hitting while something holds H_PAGE_BUSY, and taking a
+ * hash fault. See the comment in hash_preload().
+ *
+ * We come here as a result of a DSI at a point where we don't want
+ * to call hash_page, such as when we are accessing memory (possibly
+ * user memory) inside a PMU interrupt that occurred while interrupts
+ * were soft-disabled. We want to invoke the exception handler for
+ * the access, or panic if there isn't a handler.
+ */
+ if (unlikely(in_nmi())) {
+ bad_page_fault(regs, ea, SIGSEGV);
+ return 0;
+ }
+ region_id = get_region_id(ea);
if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
mm = &init_mm;
+ else
+ mm = current->mm;
if (dsisr & DSISR_NOHPTE)
flags |= HPTE_NOHPTE_UPDATE;
@@ -1525,13 +1549,31 @@ int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr,
* 2) user space access kernel space.
*/
access |= _PAGE_PRIVILEGED;
- if ((msr & MSR_PR) || (region_id == USER_REGION_ID))
+ if (user_mode(regs) || (region_id == USER_REGION_ID))
access &= ~_PAGE_PRIVILEGED;
- if (trap == 0x400)
+ if (regs->trap == 0x400)
access |= _PAGE_EXEC;
- return hash_page_mm(mm, ea, access, trap, flags);
+ err = hash_page_mm(mm, ea, access, regs->trap, flags);
+ if (unlikely(err < 0)) {
+ // failed to instert a hash PTE due to an hypervisor error
+ if (user_mode(regs)) {
+ if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2)
+ _exception(SIGSEGV, regs, SEGV_ACCERR, ea);
+ else
+ _exception(SIGBUS, regs, BUS_ADRERR, ea);
+ } else {
+ bad_page_fault(regs, ea, SIGBUS);
+ }
+ err = 0;
+
+ } else if (err) {
+_do_page_fault:
+ err = hash__do_page_fault(regs, ea, dsisr);
+ }
+
+ return err;
}
#ifdef CONFIG_PPC_MM_SLICES
@@ -1831,27 +1873,6 @@ void flush_hash_range(unsigned long number, int local)
}
}
-/*
- * low_hash_fault is called when we the low level hash code failed
- * to instert a PTE due to an hypervisor error
- */
-void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
-{
- enum ctx_state prev_state = exception_enter();
-
- if (user_mode(regs)) {
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- if (rc == -2)
- _exception(SIGSEGV, regs, SEGV_ACCERR, address);
- else
-#endif
- _exception(SIGBUS, regs, BUS_ADRERR, address);
- } else
- bad_page_fault(regs, address, SIGBUS);
-
- exception_exit(prev_state);
-}
-
long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
unsigned long pa, unsigned long rflags,
unsigned long vflags, int psize, int ssize)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 0add963a849b..ce43e401e0e0 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -405,7 +405,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
return 0;
- if (unlikely(page_fault_is_bad(error_code))) {
+ if (unlikely(page_fault_is_bad(error_code) || (error_code & DSISR_DABRMATCH))) {
+ if (error_code & DSISR_DABRMATCH)
+ return -1;
+
if (is_user) {
_exception(SIGBUS, regs, BUS_OBJERR, address);
return 0;
@@ -548,12 +551,58 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
enum ctx_state prev_state = exception_enter();
- int rc = __do_page_fault(regs, address, error_code);
+ int err;
+
+ err = __do_page_fault(regs, address, error_code);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* 32 and 64e handle errors in their asm code */
+ if (unlikely(err)) {
+ if (err > 0) {
+ bad_page_fault(regs, address, err);
+ err = 0;
+ } else {
+ /*
+ * do_break() may change NV GPRS while handling the
+ * breakpoint. Return -ve to caller to do that.
+ */
+ do_break(regs, address, error_code);
+ }
+ }
+#endif
+
exception_exit(prev_state);
- return rc;
+
+ return err;
}
NOKPROBE_SYMBOL(do_page_fault);
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
+int hash__do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ int err;
+
+ err = __do_page_fault(regs, address, error_code);
+ if (unlikely(err)) {
+ if (err > 0) {
+ bad_page_fault(regs, address, err);
+ err = 0;
+ } else {
+ /*
+ * do_break() may change NV GPRS while handling the
+ * breakpoint. Return -ve to caller to do that.
+ */
+ do_break(regs, address, error_code);
+ }
+ }
+
+ return err;
+}
+NOKPROBE_SYMBOL(hash__do_page_fault);
+#endif
+
/*
* bad_page_fault is called when we have a bad access from the kernel.
* It is called from the DSI and ISI handlers in head.S and from some
--
2.23.0
^ permalink raw reply related
* [RFC PATCH 00/12] interrupt entry wrappers
From: Nicholas Piggin @ 2020-09-05 17:43 UTC (permalink / raw)
Cc: linuxppc-dev, Nicholas Piggin
This series moves more stuff to C, and fixes context tracking on
64s.
Nicholas Piggin (12):
powerpc/64s: move the last of the page fault handling logic to C
powerpc: remove arguments from interrupt handler functions
powerpc: interrupt handler wrapper functions
powerpc: add interrupt_cond_local_irq_enable helper
powerpc/64s: Do context tracking in interrupt entry wrapper
powerpc/64s: reconcile interrupts in C
powerpc/64: move account_stolen_time into its own function
powerpc/64: entry cpu time accounting in C
powerpc: move NMI entry/exit code into wrapper
powerpc/64s: move NMI soft-mask handling to C
powerpc/64s: runlatch interrupt handling in C
powerpc/64s: power4 nap fixup in C
arch/powerpc/Kconfig | 2 +-
arch/powerpc/include/asm/asm-prototypes.h | 28 --
arch/powerpc/include/asm/bug.h | 2 +-
arch/powerpc/include/asm/cputime.h | 15 +
arch/powerpc/include/asm/hw_irq.h | 9 -
arch/powerpc/include/asm/interrupt.h | 316 ++++++++++++++++++++++
arch/powerpc/include/asm/ppc_asm.h | 24 --
arch/powerpc/include/asm/processor.h | 1 +
arch/powerpc/include/asm/thread_info.h | 6 +
arch/powerpc/include/asm/time.h | 2 +
arch/powerpc/kernel/dbell.c | 3 +-
arch/powerpc/kernel/exceptions-64e.S | 3 -
arch/powerpc/kernel/exceptions-64s.S | 307 ++-------------------
arch/powerpc/kernel/idle_book3s.S | 4 +
arch/powerpc/kernel/irq.c | 3 +-
arch/powerpc/kernel/mce.c | 17 +-
arch/powerpc/kernel/ptrace/ptrace.c | 4 -
arch/powerpc/kernel/signal.c | 4 -
arch/powerpc/kernel/syscall_64.c | 24 +-
arch/powerpc/kernel/tau_6xx.c | 2 +-
arch/powerpc/kernel/time.c | 3 +-
arch/powerpc/kernel/traps.c | 198 ++++++--------
arch/powerpc/kernel/watchdog.c | 15 +-
arch/powerpc/kvm/book3s_hv_builtin.c | 1 +
arch/powerpc/mm/book3s64/hash_utils.c | 82 ++++--
arch/powerpc/mm/book3s64/slb.c | 12 +-
arch/powerpc/mm/fault.c | 74 ++++-
arch/powerpc/platforms/powernv/idle.c | 1 +
28 files changed, 608 insertions(+), 554 deletions(-)
create mode 100644 arch/powerpc/include/asm/interrupt.h
--
2.23.0
^ permalink raw reply
* Re: [PATCH 5/5] powerpc: use the generic dma_ops_bypass mode
From: Alexey Kardashevskiy @ 2020-09-05 15:45 UTC (permalink / raw)
To: Christoph Hellwig, Cédric Le Goater
Cc: linuxppc-dev, Daniel Borkmann, Björn Töpel,
Joerg Roedel, Jesper Dangaard Brouer, linux-kernel, iommu,
Oliver O'Halloran, Greg Kroah-Hartman, aacraid, Robin Murphy,
Lu Baolu
In-Reply-To: <20200831064038.GB27617@lst.de>
On 31/08/2020 16:40, Christoph Hellwig wrote:
> On Sun, Aug 30, 2020 at 11:04:21AM +0200, Cédric Le Goater wrote:
>> Hello,
>>
>> On 7/8/20 5:24 PM, Christoph Hellwig wrote:
>>> Use the DMA API bypass mechanism for direct window mappings. This uses
>>> common code and speed up the direct mapping case by avoiding indirect
>>> calls just when not using dma ops at all. It also fixes a problem where
>>> the sync_* methods were using the bypass check for DMA allocations, but
>>> those are part of the streaming ops.
>>>
>>> Note that this patch loses the DMA_ATTR_WEAK_ORDERING override, which
>>> has never been well defined, as is only used by a few drivers, which
>>> IIRC never showed up in the typical Cell blade setups that are affected
>>> by the ordering workaround.
>>>
>>> Fixes: efd176a04bef ("powerpc/pseries/dma: Allow SWIOTLB")
>>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>>> ---
>>> arch/powerpc/Kconfig | 1 +
>>> arch/powerpc/include/asm/device.h | 5 --
>>> arch/powerpc/kernel/dma-iommu.c | 90 ++++---------------------------
>>> 3 files changed, 10 insertions(+), 86 deletions(-)
>>
>> I am seeing corruptions on a couple of POWER9 systems (boston) when
>> stressed with IO. stress-ng gives some results but I have first seen
>> it when compiling the kernel in a guest and this is still the best way
>> to raise the issue.
>>
>> These systems have of a SAS Adaptec controller :
>>
>> 0003:01:00.0 Serial Attached SCSI controller: Adaptec Series 8 12G SAS/PCIe 3 (rev 01)
>>
>> When the failure occurs, the POWERPC EEH interrupt fires and dumps
>> lowlevel PHB4 registers among which :
>>
>> [ 2179.251069490,3] PHB#0003[0:3]: phbErrorStatus = 0000028000000000
>> [ 2179.251117476,3] PHB#0003[0:3]: phbFirstErrorStatus = 0000020000000000
>>
>> The bits raised identify a PPC 'TCE' error, which means it is related
>> to DMAs. See below for more details.
>>
>>
>> Reverting this patch "fixes" the issue but it is probably else where,
>> in some other layers or in the aacraid driver. How should I proceed
>> to get more information ?
>
> The aacraid DMA masks look like a mess.
It kinds does and is. The thing is that after f1565c24b596 the driver
sets 32 bit DMA mask which in turn enables the small DMA window (not
bypass) and since the aacraid driver has at least one bug with double
unmap of the same DMA handle, this somehow leads to EEH (PCI DMA error).
The driver sets 32but mask because it callis dma_get_required_mask()
_before_ setting the mask so dma_get_required_mask() does not go the
dma_alloc_direct() path and calls the powerpc's
dma_iommu_get_required_mask() which:
1. does the math like this (spot 2 bugs):
mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1)
2. but even after fixing that, the driver crashes as f1565c24b596
removed the call to dma_iommu_bypass_supported() so it enforces IOMMU.
The patch below (the first hunk to be precise) brings the things back to
where they were (64bit mask). The double unmap bug in the driver is
still to be investigated.
diff --git a/arch/powerpc/kernel/dma-iommu.c
b/arch/powerpc/kernel/dma-iommu.c
index 569fecd7b5b2..785abccb90fc 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -117,10 +117,18 @@ u64 dma_iommu_get_required_mask(struct device *dev)
struct iommu_table *tbl = get_iommu_table_base(dev);
u64 mask;
+ if (dev_is_pci(dev)) {
+ u64 bypass_mask = dma_direct_get_required_mask(dev);
+
+ if (dma_iommu_bypass_supported(dev, bypass_mask))
+ return bypass_mask;
+ }
+
if (!tbl)
return 0;
- mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
+ mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) +
+ tbl->it_page_shift - 1);
mask += mask - 1;
return mask;
--
Alexey
^ permalink raw reply related
* RE: remove the last set_fs() in common code, and remove it for x86 and powerpc v3
From: David Laight @ 2020-09-05 10:13 UTC (permalink / raw)
To: 'Christophe Leroy', 'Alexey Dobriyan',
Ingo Molnar
Cc: linux-arch@vger.kernel.org, Kees Cook, Linus Torvalds,
x86@kernel.org, linux-kernel@vger.kernel.org, Luis Chamberlain,
Al Viro, linux-fsdevel@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org, Christoph Hellwig
In-Reply-To: <4500d8d9-7318-4505-6086-2d2dc41f3866@csgroup.eu>
From: Christophe Leroy
> Sent: 05 September 2020 08:16
>
> Le 04/09/2020 à 23:01, David Laight a écrit :
> > From: Alexey Dobriyan
> >> Sent: 04 September 2020 18:58
...
> > What is this strange %fs register you are talking about.
> > Figure 2-4 only has CS, DS, SS and ES.
> >
>
> Intel added registers FS and GS in the i386
I know, I've got both the 'iAPX 286 Programmer's Reference Manual'
and the '80386 Programmer's Reference Manual' on my shelf.
I don't have the 8088 book though - which I used in 1982.
The old books are a lot easier to read if, for instance,
you are trying to work out how to back and forth to real mode
to do bios calls.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply
* Re: [PATCH v4 00/13] mm/debug_vm_pgtable fixes
From: Gerald Schaefer @ 2020-09-04 17:53 UTC (permalink / raw)
To: Anshuman Khandual
Cc: linux-s390@vger.kernel.org, Aneesh Kumar K.V, linux-mm,
Vineet Gupta, akpm, linux-snps-arc@lists.infradead.org,
linuxppc-dev, linux-riscv, Gerald Schaefer
In-Reply-To: <20200904180115.07ee5f00@thinkpad>
On Fri, 4 Sep 2020 18:01:15 +0200
Gerald Schaefer <gerald.schaefer@linux.ibm.com> wrote:
> On Fri, 4 Sep 2020 17:26:47 +0200
> Gerald Schaefer <gerald.schaefer@linux.ibm.com> wrote:
>
> > On Fri, 4 Sep 2020 12:18:05 +0530
> > Anshuman Khandual <anshuman.khandual@arm.com> wrote:
> >
> > >
> > >
> > > On 09/02/2020 05:12 PM, Aneesh Kumar K.V wrote:
> > > > This patch series includes fixes for debug_vm_pgtable test code so that
> > > > they follow page table updates rules correctly. The first two patches introduce
> > > > changes w.r.t ppc64. The patches are included in this series for completeness. We can
> > > > merge them via ppc64 tree if required.
> > > >
> > > > Hugetlb test is disabled on ppc64 because that needs larger change to satisfy
> > > > page table update rules.
> > > >
> > > > These tests are broken w.r.t page table update rules and results in kernel
> > > > crash as below.
> > > >
> > > > [ 21.083519] kernel BUG at arch/powerpc/mm/pgtable.c:304!
> > > > cpu 0x0: Vector: 700 (Program Check) at [c000000c6d1e76c0]
> > > > pc: c00000000009a5ec: assert_pte_locked+0x14c/0x380
> > > > lr: c0000000005eeeec: pte_update+0x11c/0x190
> > > > sp: c000000c6d1e7950
> > > > msr: 8000000002029033
> > > > current = 0xc000000c6d172c80
> > > > paca = 0xc000000003ba0000 irqmask: 0x03 irq_happened: 0x01
> > > > pid = 1, comm = swapper/0
> > > > kernel BUG at arch/powerpc/mm/pgtable.c:304!
> > > > [link register ] c0000000005eeeec pte_update+0x11c/0x190
> > > > [c000000c6d1e7950] 0000000000000001 (unreliable)
> > > > [c000000c6d1e79b0] c0000000005eee14 pte_update+0x44/0x190
> > > > [c000000c6d1e7a10] c000000001a2ca9c pte_advanced_tests+0x160/0x3d8
> > > > [c000000c6d1e7ab0] c000000001a2d4fc debug_vm_pgtable+0x7e8/0x1338
> > > > [c000000c6d1e7ba0] c0000000000116ec do_one_initcall+0xac/0x5f0
> > > > [c000000c6d1e7c80] c0000000019e4fac kernel_init_freeable+0x4dc/0x5a4
> > > > [c000000c6d1e7db0] c000000000012474 kernel_init+0x24/0x160
> > > > [c000000c6d1e7e20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c
> > > >
> > > > With DEBUG_VM disabled
> > > >
> > > > [ 20.530152] BUG: Kernel NULL pointer dereference on read at 0x00000000
> > > > [ 20.530183] Faulting instruction address: 0xc0000000000df330
> > > > cpu 0x33: Vector: 380 (Data SLB Access) at [c000000c6d19f700]
> > > > pc: c0000000000df330: memset+0x68/0x104
> > > > lr: c00000000009f6d8: hash__pmdp_huge_get_and_clear+0xe8/0x1b0
> > > > sp: c000000c6d19f990
> > > > msr: 8000000002009033
> > > > dar: 0
> > > > current = 0xc000000c6d177480
> > > > paca = 0xc00000001ec4f400 irqmask: 0x03 irq_happened: 0x01
> > > > pid = 1, comm = swapper/0
> > > > [link register ] c00000000009f6d8 hash__pmdp_huge_get_and_clear+0xe8/0x1b0
> > > > [c000000c6d19f990] c00000000009f748 hash__pmdp_huge_get_and_clear+0x158/0x1b0 (unreliable)
> > > > [c000000c6d19fa10] c0000000019ebf30 pmd_advanced_tests+0x1f0/0x378
> > > > [c000000c6d19fab0] c0000000019ed088 debug_vm_pgtable+0x79c/0x1244
> > > > [c000000c6d19fba0] c0000000000116ec do_one_initcall+0xac/0x5f0
> > > > [c000000c6d19fc80] c0000000019a4fac kernel_init_freeable+0x4dc/0x5a4
> > > > [c000000c6d19fdb0] c000000000012474 kernel_init+0x24/0x160
> > > > [c000000c6d19fe20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c
> > > >
> > > > Changes from v3:
> > > > * Address review feedback
> > > > * Move page table depost and withdraw patch after adding pmdlock to avoid bisect failure.
> > >
> > > This version
> > >
> > > - Builds on x86, arm64, s390, arc, powerpc and riscv (defconfig with DEBUG_VM_PGTABLE)
> > > - Runs on arm64 and x86 without any regression, atleast nothing that I have noticed
> > > - Will be great if this could get tested on s390, arc, riscv, ppc32 platforms as well
> >
> > When I quickly tested v3, it worked fine, but now it turned out to
> > only work fine "sometimes", both v3 and v4. I need to look into it
> > further, but so far it seems related to the hugetlb_advanced_tests().
> >
> > I guess there was already some discussion on this test, but we did
> > not receive all of the thread(s). Please always add at least
> > linux-s390@vger.kernel.org and maybe myself and Vasily Gorbik <gor@linux.ibm.com>
> > for further discussions.
>
> BTW, with myself I mean the new address gerald.schaefer@linux.ibm.com.
> The old gerald.schaefer@de.ibm.com seems to work (again), but is not
> very reliable.
>
> BTW2, a quick test with this change (so far) made the issues on s390
> go away:
>
> @@ -1069,7 +1074,7 @@ static int __init debug_vm_pgtable(void)
> spin_unlock(ptl);
>
> #ifndef CONFIG_PPC_BOOK3S_64
> - hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
> + hugetlb_advanced_tests(mm, vma, (pte_t *) pmdp, pmd_aligned, vaddr, prot);
> #endif
>
> spin_lock(&mm->page_table_lock);
>
> That would more match the "pte_t pointer" usage for hugetlb code,
> i.e. just cast a pmd_t pointer to it. Also changed to pmd_aligned,
> but I think the root cause is the pte_t pointer.
>
> Not entirely sure though if that would really be the correct fix.
> I somehow lost whatever little track I had about what these tests
> really want to check, and if that would still be valid with that
> change.
Another potential issue, apparently not for s390, but maybe for
others, is that the vaddr passed to hugetlb_advanced_tests() is
also not pmd/pud size aligned, like you did in pmd/pud_advanced_tests().
I guess for the hugetlb_advanced_tests() you need to choose if
you want to test pmd or pud hugepages, and accordingly prepare
the *ptep, pfn and vaddr input. If you only check for CONFIG_HUGETLB_PAGE,
then probably only pmd hugepages would be safe, there might be
architectures only supporting one hugepage size.
So, for s390, at least the ptep input value is a problem. Still
need to better understand how it goes wrong, but it seems to be
fixed when using proper pmdp, and also works with pudp.
For others, especially the apparent issues on ppc64, the other
non-hugepage aligned input pfn and vaddr might also be an issue,
e.g. power at least seems to use the vaddr in its set_huge_pte_at()
implementation for some pmd_off(mm, addr) calculation.
Again, sorry if this was already discussed, I missed most of it
and honestly didn't properly look at the scarce mails that we did
receive...
^ permalink raw reply
* Re: fsl_espi errors on v5.7.15
From: Heiner Kallweit @ 2020-09-04 17:23 UTC (permalink / raw)
To: Chris Packham
Cc: linux-kernel@vger.kernel.org, Nicholas Piggin,
linux-spi@vger.kernel.org, broonie@kernel.org, paulus@samba.org,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <6054f0ec-d994-105b-6399-6cdb65ddd1b6@alliedtelesis.co.nz>
[-- Attachment #1: Type: text/plain, Size: 10949 bytes --]
On Fri 4. Sep 2020 at 01:58, Chris Packham <
Chris.Packham@alliedtelesis.co.nz> wrote:
>
>
> On 1/09/20 6:14 pm, Nicholas Piggin wrote:
>
> > Excerpts from Chris Packham's message of September 1, 2020 11:25 am:
>
> >> On 1/09/20 12:33 am, Heiner Kallweit wrote:
>
> >>> On 30.08.2020 23:59, Chris Packham wrote:
>
> >>>> On 31/08/20 9:41 am, Heiner Kallweit wrote:
>
> >>>>> On 30.08.2020 23:00, Chris Packham wrote:
>
> >>>>>> On 31/08/20 12:30 am, Nicholas Piggin wrote:
>
> >>>>>>> Excerpts from Chris Packham's message of August 28, 2020 8:07 am:
>
> >>>>>> <snip>
>
> >>>>>>
>
> >>>>>>>>>>>> I've also now seen the RX FIFO not empty error on the T2080RDB
>
> >>>>>>>>>>>>
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't
> empty!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>
> >>>>>>>>>>>>
>
> >>>>>>>>>>>> With my current workaround of emptying the RX FIFO. It seems
>
> >>>>>>>>>>>> survivable. Interestingly it only ever seems to be 1 extra
> byte in the
>
> >>>>>>>>>>>> RX FIFO and it seems to be after either a READ_SR or a
> READ_FSR.
>
> >>>>>>>>>>>>
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: tx 70
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: rx 03
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 00
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't
> empty!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: tx 05
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: rx 00
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 03
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't
> empty!
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: tx 05
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: rx 00
>
> >>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 03
>
> >>>>>>>>>>>>
>
> >>>>>>>>>>>> From all the Micron SPI-NOR datasheets I've got access
> to it is
>
> >>>>>>>>>>>> possible to continually read the SR/FSR. But I've no idea why
> it
>
> >>>>>>>>>>>> happens some times and not others.
>
> >>>>>>>>>>> So I think I've got a reproduction and I think I've bisected
> the problem
>
> >>>>>>>>>>> to commit 3282a3da25bd ("powerpc/64: Implement soft interrupt
> replay in
>
> >>>>>>>>>>> C"). My day is just finishing now so I haven't applied too
> much scrutiny
>
> >>>>>>>>>>> to this result. Given the various rabbit holes I've been down
> on this
>
> >>>>>>>>>>> issue already I'd take this information with a good degree of
> skepticism.
>
> >>>>>>>>>>>
>
> >>>>>>>>>> OK, so an easy test should be to re-test with a 5.4 kernel.
>
> >>>>>>>>>> It doesn't have yet the change you're referring to, and the
> fsl-espi driver
>
> >>>>>>>>>> is basically the same as in 5.7 (just two small changes in 5.7).
>
> >>>>>>>>> There's 6cc0c16d82f88 and maybe also other interrupt related
> patches
>
> >>>>>>>>> around this time that could affect book E, so it's good if that
> exact
>
> >>>>>>>>> patch is confirmed.
>
> >>>>>>>> My confirmation is basically that I can induce the issue in a 5.4
> kernel
>
> >>>>>>>> by cherry-picking 3282a3da25bd. I'm also able to "fix" the issue
> in
>
> >>>>>>>> 5.9-rc2 by reverting that one commit.
>
> >>>>>>>>
>
> >>>>>>>> I both cases it's not exactly a clean cherry-pick/revert so I also
>
> >>>>>>>> confirmed the bisection result by building at 3282a3da25bd (which
> sees
>
> >>>>>>>> the issue) and the commit just before (which does not).
>
> >>>>>>> Thanks for testing, that confirms it well.
>
> >>>>>>>
>
> >>>>>>> [snip patch]
>
> >>>>>>>
>
> >>>>>>>> I still saw the issue with this change applied.
> PPC_IRQ_SOFT_MASK_DEBUG
>
> >>>>>>>> didn't report anything (either with or without the change above).
>
> >>>>>>> Okay, it was a bit of a shot in the dark. I still can't see what
>
> >>>>>>> else has changed.
>
> >>>>>>>
>
> >>>>>>> What would cause this, a lost interrupt? A spurious interrupt? Or
>
> >>>>>>> higher interrupt latency?
>
> >>>>>>>
>
> >>>>>>> I don't think the patch should cause significantly worse latency,
>
> >>>>>>> (it's supposed to be a bit better if anything because it doesn't
> set
>
> >>>>>>> up the full interrupt frame). But it's possible.
>
> >>>>>> My working theory is that the SPI_DON indication is all about the TX
>
> >>>>>> direction an now that the interrupts are faster we're hitting an
> error
>
> >>>>>> because there is still RX activity going on. Heiner disagrees with
> my
>
> >>>>>> interpretation of the SPI_DON indication and the fact that it
> doesn't
>
> >>>>>> happen every time does throw doubt on it.
>
> >>>>>>
>
> >>>>> It's right that the eSPI spec can be interpreted that SPI_DON refers
> to
>
> >>>>> TX only. However this wouldn't really make sense, because also for RX
>
> >>>>> we program the frame length, and therefore want to be notified once
> the
>
> >>>>> full frame was received. Also practical experience shows that SPI_DON
>
> >>>>> is set also after RX-only transfers.
>
> >>>>> Typical SPI NOR use case is that you write read command + start
> address,
>
> >>>>> followed by a longer read. If the TX-only interpretation would be
> right,
>
> >>>>> we'd always end up with SPI_DON not being set.
>
> >>>>>
>
> >>>>>> I can't really explain the extra RX byte in the fifo. We know how
> many
>
> >>>>>> bytes to expect and we pull that many from the fifo so it's not as
> if
>
> >>>>>> we're missing an interrupt causing us to skip the last byte. I've
> been
>
> >>>>>> looking for some kind of off-by-one calculation but again if it were
>
> >>>>>> something like that it'd happen all the time.
>
> >>>>>>
>
> >>>>> Maybe it helps to know what value this extra byte in the FIFO has.
> Is it:
>
> >>>>> - a duplicate of the last read byte
>
> >>>>> - or the next byte (at <end address> + 1)
>
> >>>>> - or a fixed value, e.g. always 0x00 or 0xff
>
> >>>> The values were up thread a bit but I'll repeat them here
>
> >>>>
>
> >>>> fsl_espi ffe110000.spi: tx 70
>
> >>>> fsl_espi ffe110000.spi: rx 03
>
> >>>> fsl_espi ffe110000.spi: Extra RX 00
>
> >>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>
> >>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>
> >>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>
> >>>> fsl_espi ffe110000.spi: tx 05
>
> >>>> fsl_espi ffe110000.spi: rx 00
>
> >>>> fsl_espi ffe110000.spi: Extra RX 03
>
> >>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>
> >>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>
> >>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>
> >>>> fsl_espi ffe110000.spi: tx 05
>
> >>>> fsl_espi ffe110000.spi: rx 00
>
> >>>> fsl_espi ffe110000.spi: Extra RX 03
>
> >>>>
>
> >>>>
>
> >>>> The rx 00 Extra RX 03 is a bit concerning. I've only ever seen them
> with
>
> >>>> either a READ_SR or a READ_FSR. Never a data read.
>
> >>>>
>
> >>> Just remembered something about SPIE_DON:
>
> >>> Transfers are always full duplex, therefore in case of a read the chip
>
> >>> sends dummy zero's. Having said that in case of a read SPIE_DON means
>
> >>> that the last dummy zero was shifted out.
>
> >>>
>
> >>> READ_SR and READ_FSR are the shortest transfers, 1 byte out and 1 byte
> in.
>
> >>> So the issue may have a dependency on the length of the transfer.
>
> >>> However I see no good explanation so far. You can try adding a delay of
>
> >>> a few miroseconds between the following to commands in fsl_espi_bufs().
>
> >>>
>
> >>> fsl_espi_write_reg(espi, ESPI_SPIM, mask);
>
> >>>
>
> >>> /* Prevent filling the fifo from getting interrupted */
>
> >>> spin_lock_irq(&espi->lock);
>
> >>>
>
> >>> Maybe enabling interrupts and seeing the SPIE_DON interrupt are too
> close.
>
> >> I think this might be heading in the right direction. Playing about with
>
> >> a delay does seem to make the two symptoms less likely. Although I have
>
> >> to set it quite high (i.e. msleep(100)) to completely avoid any
>
> >> possibility of seeing either message.
>
> > The patch might replay the interrupt a little bit faster, but it would
>
> > be a few microseconds at most I think (just from improved code).
>
> >
>
> > Would you be able to ftrace the interrupt handler function and see if you
>
> > can see a difference in number or timing of interrupts? I'm at a bit of
>
> > a loss.
>
>
>
> I tried ftrace but I really wasn't sure what I was looking for.
>
> Capturing a "bad" case was pretty tricky. But I think I've identified a
>
> fix (I'll send it as a proper patch shortly). The gist is
>
>
>
> diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
>
> index 7e7c92cafdbb..cb120b68c0e2 100644
>
> --- a/drivers/spi/spi-fsl-espi.c
>
> +++ b/drivers/spi/spi-fsl-espi.c
>
> @@ -574,13 +574,14 @@ static void fsl_espi_cpu_irq(struct fsl_espi
>
> *espi, u32 events)
>
> static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
>
> {
>
> struct fsl_espi *espi = context_data;
>
> - u32 events;
>
> + u32 events, mask;
>
>
>
> spin_lock(&espi->lock);
>
>
>
> /* Get interrupt events(tx/rx) */
>
> events = fsl_espi_read_reg(espi, ESPI_SPIE);
>
> - if (!events) {
>
> + mask = fsl_espi_read_reg(espi, ESPI_SPIM);
>
> + if (!(events & mask)) {
>
> spin_unlock(&espi->lock);
>
> return IRQ_NONE;
>
> }
>
>
>
> The SPIE register contains the TXCNT so events is pretty much always
>
> going to have something set. By checking events against what we've
>
> actually requested interrupts for we don't see any spurious events.
>
>
Usually we shouldn’t receive interrupts we’re not interested in, except the
interrupt is shared. This leads to the question: is the SPI interrupt
shared with another device on your system? Do you see spurious interrupts
with the patch under /proc/irq/(irq)/spurious?
>
> I've tested this on the T2080RDB and on our custom hardware and it seems
>
> to resolve the problem.
>
>
>
>
[-- Attachment #2: Type: text/html, Size: 14916 bytes --]
^ permalink raw reply
* Re: [PATCH v4 00/13] mm/debug_vm_pgtable fixes
From: Gerald Schaefer @ 2020-09-04 16:01 UTC (permalink / raw)
To: Anshuman Khandual
Cc: linux-s390@vger.kernel.org, Aneesh Kumar K.V, linux-mm,
Vineet Gupta, akpm, linux-snps-arc@lists.infradead.org,
linuxppc-dev, linux-riscv, Gerald Schaefer
In-Reply-To: <20200904172647.002113d3@thinkpad>
On Fri, 4 Sep 2020 17:26:47 +0200
Gerald Schaefer <gerald.schaefer@linux.ibm.com> wrote:
> On Fri, 4 Sep 2020 12:18:05 +0530
> Anshuman Khandual <anshuman.khandual@arm.com> wrote:
>
> >
> >
> > On 09/02/2020 05:12 PM, Aneesh Kumar K.V wrote:
> > > This patch series includes fixes for debug_vm_pgtable test code so that
> > > they follow page table updates rules correctly. The first two patches introduce
> > > changes w.r.t ppc64. The patches are included in this series for completeness. We can
> > > merge them via ppc64 tree if required.
> > >
> > > Hugetlb test is disabled on ppc64 because that needs larger change to satisfy
> > > page table update rules.
> > >
> > > These tests are broken w.r.t page table update rules and results in kernel
> > > crash as below.
> > >
> > > [ 21.083519] kernel BUG at arch/powerpc/mm/pgtable.c:304!
> > > cpu 0x0: Vector: 700 (Program Check) at [c000000c6d1e76c0]
> > > pc: c00000000009a5ec: assert_pte_locked+0x14c/0x380
> > > lr: c0000000005eeeec: pte_update+0x11c/0x190
> > > sp: c000000c6d1e7950
> > > msr: 8000000002029033
> > > current = 0xc000000c6d172c80
> > > paca = 0xc000000003ba0000 irqmask: 0x03 irq_happened: 0x01
> > > pid = 1, comm = swapper/0
> > > kernel BUG at arch/powerpc/mm/pgtable.c:304!
> > > [link register ] c0000000005eeeec pte_update+0x11c/0x190
> > > [c000000c6d1e7950] 0000000000000001 (unreliable)
> > > [c000000c6d1e79b0] c0000000005eee14 pte_update+0x44/0x190
> > > [c000000c6d1e7a10] c000000001a2ca9c pte_advanced_tests+0x160/0x3d8
> > > [c000000c6d1e7ab0] c000000001a2d4fc debug_vm_pgtable+0x7e8/0x1338
> > > [c000000c6d1e7ba0] c0000000000116ec do_one_initcall+0xac/0x5f0
> > > [c000000c6d1e7c80] c0000000019e4fac kernel_init_freeable+0x4dc/0x5a4
> > > [c000000c6d1e7db0] c000000000012474 kernel_init+0x24/0x160
> > > [c000000c6d1e7e20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c
> > >
> > > With DEBUG_VM disabled
> > >
> > > [ 20.530152] BUG: Kernel NULL pointer dereference on read at 0x00000000
> > > [ 20.530183] Faulting instruction address: 0xc0000000000df330
> > > cpu 0x33: Vector: 380 (Data SLB Access) at [c000000c6d19f700]
> > > pc: c0000000000df330: memset+0x68/0x104
> > > lr: c00000000009f6d8: hash__pmdp_huge_get_and_clear+0xe8/0x1b0
> > > sp: c000000c6d19f990
> > > msr: 8000000002009033
> > > dar: 0
> > > current = 0xc000000c6d177480
> > > paca = 0xc00000001ec4f400 irqmask: 0x03 irq_happened: 0x01
> > > pid = 1, comm = swapper/0
> > > [link register ] c00000000009f6d8 hash__pmdp_huge_get_and_clear+0xe8/0x1b0
> > > [c000000c6d19f990] c00000000009f748 hash__pmdp_huge_get_and_clear+0x158/0x1b0 (unreliable)
> > > [c000000c6d19fa10] c0000000019ebf30 pmd_advanced_tests+0x1f0/0x378
> > > [c000000c6d19fab0] c0000000019ed088 debug_vm_pgtable+0x79c/0x1244
> > > [c000000c6d19fba0] c0000000000116ec do_one_initcall+0xac/0x5f0
> > > [c000000c6d19fc80] c0000000019a4fac kernel_init_freeable+0x4dc/0x5a4
> > > [c000000c6d19fdb0] c000000000012474 kernel_init+0x24/0x160
> > > [c000000c6d19fe20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c
> > >
> > > Changes from v3:
> > > * Address review feedback
> > > * Move page table depost and withdraw patch after adding pmdlock to avoid bisect failure.
> >
> > This version
> >
> > - Builds on x86, arm64, s390, arc, powerpc and riscv (defconfig with DEBUG_VM_PGTABLE)
> > - Runs on arm64 and x86 without any regression, atleast nothing that I have noticed
> > - Will be great if this could get tested on s390, arc, riscv, ppc32 platforms as well
>
> When I quickly tested v3, it worked fine, but now it turned out to
> only work fine "sometimes", both v3 and v4. I need to look into it
> further, but so far it seems related to the hugetlb_advanced_tests().
>
> I guess there was already some discussion on this test, but we did
> not receive all of the thread(s). Please always add at least
> linux-s390@vger.kernel.org and maybe myself and Vasily Gorbik <gor@linux.ibm.com>
> for further discussions.
BTW, with myself I mean the new address gerald.schaefer@linux.ibm.com.
The old gerald.schaefer@de.ibm.com seems to work (again), but is not
very reliable.
BTW2, a quick test with this change (so far) made the issues on s390
go away:
@@ -1069,7 +1074,7 @@ static int __init debug_vm_pgtable(void)
spin_unlock(ptl);
#ifndef CONFIG_PPC_BOOK3S_64
- hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
+ hugetlb_advanced_tests(mm, vma, (pte_t *) pmdp, pmd_aligned, vaddr, prot);
#endif
spin_lock(&mm->page_table_lock);
That would more match the "pte_t pointer" usage for hugetlb code,
i.e. just cast a pmd_t pointer to it. Also changed to pmd_aligned,
but I think the root cause is the pte_t pointer.
Not entirely sure though if that would really be the correct fix.
I somehow lost whatever little track I had about what these tests
really want to check, and if that would still be valid with that
change.
^ permalink raw reply
* Re: [PATCH v4 00/13] mm/debug_vm_pgtable fixes
From: Gerald Schaefer @ 2020-09-04 15:26 UTC (permalink / raw)
To: Anshuman Khandual
Cc: linux-s390@vger.kernel.org, Aneesh Kumar K.V, linux-mm,
Vineet Gupta, akpm, linux-snps-arc@lists.infradead.org,
linuxppc-dev, linux-riscv, Gerald Schaefer
In-Reply-To: <bb0f3427-e2bd-f713-3ea8-d264be0e690b@arm.com>
On Fri, 4 Sep 2020 12:18:05 +0530
Anshuman Khandual <anshuman.khandual@arm.com> wrote:
>
>
> On 09/02/2020 05:12 PM, Aneesh Kumar K.V wrote:
> > This patch series includes fixes for debug_vm_pgtable test code so that
> > they follow page table updates rules correctly. The first two patches introduce
> > changes w.r.t ppc64. The patches are included in this series for completeness. We can
> > merge them via ppc64 tree if required.
> >
> > Hugetlb test is disabled on ppc64 because that needs larger change to satisfy
> > page table update rules.
> >
> > These tests are broken w.r.t page table update rules and results in kernel
> > crash as below.
> >
> > [ 21.083519] kernel BUG at arch/powerpc/mm/pgtable.c:304!
> > cpu 0x0: Vector: 700 (Program Check) at [c000000c6d1e76c0]
> > pc: c00000000009a5ec: assert_pte_locked+0x14c/0x380
> > lr: c0000000005eeeec: pte_update+0x11c/0x190
> > sp: c000000c6d1e7950
> > msr: 8000000002029033
> > current = 0xc000000c6d172c80
> > paca = 0xc000000003ba0000 irqmask: 0x03 irq_happened: 0x01
> > pid = 1, comm = swapper/0
> > kernel BUG at arch/powerpc/mm/pgtable.c:304!
> > [link register ] c0000000005eeeec pte_update+0x11c/0x190
> > [c000000c6d1e7950] 0000000000000001 (unreliable)
> > [c000000c6d1e79b0] c0000000005eee14 pte_update+0x44/0x190
> > [c000000c6d1e7a10] c000000001a2ca9c pte_advanced_tests+0x160/0x3d8
> > [c000000c6d1e7ab0] c000000001a2d4fc debug_vm_pgtable+0x7e8/0x1338
> > [c000000c6d1e7ba0] c0000000000116ec do_one_initcall+0xac/0x5f0
> > [c000000c6d1e7c80] c0000000019e4fac kernel_init_freeable+0x4dc/0x5a4
> > [c000000c6d1e7db0] c000000000012474 kernel_init+0x24/0x160
> > [c000000c6d1e7e20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c
> >
> > With DEBUG_VM disabled
> >
> > [ 20.530152] BUG: Kernel NULL pointer dereference on read at 0x00000000
> > [ 20.530183] Faulting instruction address: 0xc0000000000df330
> > cpu 0x33: Vector: 380 (Data SLB Access) at [c000000c6d19f700]
> > pc: c0000000000df330: memset+0x68/0x104
> > lr: c00000000009f6d8: hash__pmdp_huge_get_and_clear+0xe8/0x1b0
> > sp: c000000c6d19f990
> > msr: 8000000002009033
> > dar: 0
> > current = 0xc000000c6d177480
> > paca = 0xc00000001ec4f400 irqmask: 0x03 irq_happened: 0x01
> > pid = 1, comm = swapper/0
> > [link register ] c00000000009f6d8 hash__pmdp_huge_get_and_clear+0xe8/0x1b0
> > [c000000c6d19f990] c00000000009f748 hash__pmdp_huge_get_and_clear+0x158/0x1b0 (unreliable)
> > [c000000c6d19fa10] c0000000019ebf30 pmd_advanced_tests+0x1f0/0x378
> > [c000000c6d19fab0] c0000000019ed088 debug_vm_pgtable+0x79c/0x1244
> > [c000000c6d19fba0] c0000000000116ec do_one_initcall+0xac/0x5f0
> > [c000000c6d19fc80] c0000000019a4fac kernel_init_freeable+0x4dc/0x5a4
> > [c000000c6d19fdb0] c000000000012474 kernel_init+0x24/0x160
> > [c000000c6d19fe20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c
> >
> > Changes from v3:
> > * Address review feedback
> > * Move page table depost and withdraw patch after adding pmdlock to avoid bisect failure.
>
> This version
>
> - Builds on x86, arm64, s390, arc, powerpc and riscv (defconfig with DEBUG_VM_PGTABLE)
> - Runs on arm64 and x86 without any regression, atleast nothing that I have noticed
> - Will be great if this could get tested on s390, arc, riscv, ppc32 platforms as well
When I quickly tested v3, it worked fine, but now it turned out to
only work fine "sometimes", both v3 and v4. I need to look into it
further, but so far it seems related to the hugetlb_advanced_tests().
I guess there was already some discussion on this test, but we did
not receive all of the thread(s). Please always add at least
linux-s390@vger.kernel.org and maybe myself and Vasily Gorbik <gor@linux.ibm.com>
for further discussions.
That being said, sorry for duplications, this might already have been
discussed. Preliminary analysis showed that it only seems to go wrong
for certain random vaddr values. I cannot make any sense of that yet,
but what seems strange to me is that the hugetlb_advanced_tests()
take a (real) pte_t pointer as input, and also use that for all
kinds of operations (set_huge_pte_at, huge_ptep_get_and_clear, etc.).
Although all the hugetlb code in the kernel is (mis)using pte_t
pointers instead of the correct pmd/pud_t pointers like THP, that
is just for historic reasons. The pointers will actually never point
to a real pte_t (i.e. page table entry), but of course to a pmd
or pud entry, depending on hugepage size.
What is passed in as ptep to hugetlb_advanced_tests() seems to be
the result from the previous ptep = pte_alloc_map(mm, pmdp, vaddr),
so I would expect that it points to a real page table entry. Need
to investigate further, but IIUC, using such a pointer for adding
large pte entries (i.e. pmd/pud entries) at least feels very wrong
to me, and I assume it is related to the issues we see on s390.
We actually see different issues, e.g. once a panic directly in
hugetlb_advanced_tests() -> huge_ptep_get_and_clear(), but also
indirect symptoms after debug_vm_pgtable() completes, like this:
[ 10.533901] BUG task_struct (Not tainted): Padding overwritten. 0x0000000019f798c7-0x0000000019f798c7 @offset=30087
Last but not least, what I said about the pte vs. pmd/pud of
course also should apply to the hugetlb_basic_tests(), although
they are not directly using a pte_t pointer, and especially
also not writing to it. Still, the pte_aligned pfn parameter
is not guaranteed to also be pmd/pud_aligned, which doesn't
feel right.
So, for now, until this is sorted out, I guess we also need
to exclude s390 at least from the hugetlb_advanced_tests().
The hugetlb_basic_tests() seem to work fine so far (probably
by chance :-))
^ permalink raw reply
* Re: remove the last set_fs() in common code, and remove it for x86 and powerpc v3
From: Christophe Leroy @ 2020-09-05 7:16 UTC (permalink / raw)
To: David Laight, 'Alexey Dobriyan', Ingo Molnar
Cc: linux-arch@vger.kernel.org, Kees Cook, Linus Torvalds,
x86@kernel.org, linux-kernel@vger.kernel.org, Luis Chamberlain,
Al Viro, linux-fsdevel@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org, Christoph Hellwig
In-Reply-To: <63f3c9342a784a0890b3b641a71a8aa1@AcuMS.aculab.com>
Le 04/09/2020 à 23:01, David Laight a écrit :
> From: Alexey Dobriyan
>> Sent: 04 September 2020 18:58
>>
>> On Fri, Sep 04, 2020 at 08:00:24AM +0200, Ingo Molnar wrote:
>>> * Christoph Hellwig <hch@lst.de> wrote:
>>>> this series removes the last set_fs() used to force a kernel address
>>>> space for the uaccess code in the kernel read/write/splice code, and then
>>>> stops implementing the address space overrides entirely for x86 and
>>>> powerpc.
>>>
>>> Cool! For the x86 bits:
>>>
>>> Acked-by: Ingo Molnar <mingo@kernel.org>
>>
>> set_fs() is older than some kernel hackers!
>>
>> $ cd linux-0.11/
>> $ find . -type f -name '*.h' | xargs grep -e set_fs -w -n -A3
>> ./include/asm/segment.h:61:extern inline void set_fs(unsigned long val)
>> ./include/asm/segment.h-62-{
>> ./include/asm/segment.h-63- __asm__("mov %0,%%fs"::"a" ((unsigned short) val));
>> ./include/asm/segment.h-64-}
>
> What is this strange %fs register you are talking about.
> Figure 2-4 only has CS, DS, SS and ES.
>
Intel added registers FS and GS in the i386
Christophe
^ permalink raw reply
* [PATCH v3 2/2] scsi: ibmvfc: interface updates for future FPIN and MQ support
From: Tyrel Datwyler @ 2020-09-04 23:29 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
linuxppc-dev
In-Reply-To: <20200904232936.840193-1-tyreld@linux.ibm.com>
VIOS partitions with SLI-4 enabled Emulex adapters will be capable of
driving IO in parallel through mulitple work queues or channels, and
with new hyperviosr firmware that supports multiple interrupt sources
an ibmvfc NPIV single initiator can be modified to exploit end to end
channelization in a PowerVM environment.
VIOS hosts will also be able to expose fabric perfromance impact
notifications (FPIN) via a new asynchronous event to ibmvfc clients that
advertise support via IBMVFC_CAN_HANDLE_FPIN in their capabilities flag
during NPIV_LOGIN.
This patch introduces three new Management Datagrams (MADs) for
channelization support negotiation as well as the FPIN asynchronous
event and FPIN status flags. Follow up work is required to plumb the
ibmvfc client driver to use these new interfaces.
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
v2 -> v3:
Fixup checkpatch warnings about using __attribute__()
v1 -> v2:
Fixup complier errors from neglected commit --amend
---
drivers/scsi/ibmvscsi/ibmvfc.h | 66 +++++++++++++++++++++++++++++++++-
1 file changed, 65 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 6da23666f5be..e6e1c255a79c 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -124,6 +124,9 @@ enum ibmvfc_mad_types {
IBMVFC_PASSTHRU = 0x0200,
IBMVFC_TMF_MAD = 0x0100,
IBMVFC_NPIV_LOGOUT = 0x0800,
+ IBMVFC_CHANNEL_ENQUIRY = 0x1000,
+ IBMVFC_CHANNEL_SETUP = 0x2000,
+ IBMVFC_CONNECTION_INFO = 0x4000,
};
struct ibmvfc_mad_common {
@@ -162,6 +165,8 @@ struct ibmvfc_npiv_login {
__be32 max_cmds;
__be64 capabilities;
#define IBMVFC_CAN_MIGRATE 0x01
+#define IBMVFC_CAN_USE_CHANNELS 0x02
+#define IBMVFC_CAN_HANDLE_FPIN 0x04
__be64 node_name;
struct srp_direct_buf async;
u8 partition_name[IBMVFC_MAX_NAME];
@@ -204,6 +209,7 @@ struct ibmvfc_npiv_login_resp {
__be64 capabilities;
#define IBMVFC_CAN_FLUSH_ON_HALT 0x08
#define IBMVFC_CAN_SUPPRESS_ABTS 0x10
+#define IBMVFC_CAN_SUPPORT_CHANNELS 0x20
__be32 max_cmds;
__be32 scsi_id_sz;
__be64 max_dma_len;
@@ -482,6 +488,52 @@ struct ibmvfc_passthru_mad {
struct ibmvfc_passthru_fc_iu fc_iu;
} __packed __aligned(8);
+struct ibmvfc_channel_enquiry {
+ struct ibmvfc_mad_common common;
+ __be32 flags;
+#define IBMVFC_NO_CHANNELS_TO_CRQ_SUPPORT 0x01
+#define IBMVFC_SUPPORT_VARIABLE_SUBQ_MSG 0x02
+#define IBMVFC_NO_N_TO_M_CHANNELS_SUPPORT 0x04
+ __be32 num_scsi_subq_channels;
+ __be32 num_nvmeof_subq_channels;
+ __be32 num_scsi_vas_channels;
+ __be32 num_nvmeof_vas_channels;
+} __packed __aligned(8);
+
+struct ibmvfc_channel_setup_mad {
+ struct ibmvfc_mad_common common;
+ struct srp_direct_buf buffer;
+} __packed __aligned(8);
+
+#define IBMVFC_MAX_CHANNELS 502
+
+struct ibmvfc_channel_setup {
+ __be32 flags;
+#define IBMVFC_CANCEL_CHANNELS 0x01
+#define IBMVFC_USE_BUFFER 0x02
+#define IBMVFC_CHANNELS_CANCELED 0x04
+ __be32 reserved;
+ __be32 num_scsi_subq_channels;
+ __be32 num_nvmeof_subq_channels;
+ __be32 num_scsi_vas_channels;
+ __be32 num_nvmeof_vas_channels;
+ struct srp_direct_buf buffer;
+ __be64 reserved2[5];
+ __be64 channel_handles[IBMVFC_MAX_CHANNELS];
+} __packed __aligned(8);
+
+struct ibmvfc_connection_info {
+ struct ibmvfc_mad_common common;
+ __be64 information_bits;
+#define IBMVFC_NO_FC_IO_CHANNEL 0x01
+#define IBMVFC_NO_PHYP_VAS 0x02
+#define IBMVFC_NO_PHYP_SUBQ 0x04
+#define IBMVFC_PHYP_DEPRECATED_SUBQ 0x08
+#define IBMVFC_PHYP_PRESERVED_SUBQ 0x10
+#define IBMVFC_PHYP_FULL_SUBQ 0x20
+ __be64 reserved[16];
+} __packed __aligned(8);
+
struct ibmvfc_trace_start_entry {
u32 xfer_len;
} __packed;
@@ -532,6 +584,7 @@ enum ibmvfc_async_event {
IBMVFC_AE_HALT = 0x0400,
IBMVFC_AE_RESUME = 0x0800,
IBMVFC_AE_ADAPTER_FAILED = 0x1000,
+ IBMVFC_AE_FPIN = 0x2000,
};
struct ibmvfc_async_desc {
@@ -560,10 +613,18 @@ enum ibmvfc_ae_link_state {
IBMVFC_AE_LS_LINK_DEAD = 0x08,
};
+enum ibmvfc_ae_fpin_status {
+ IBMVFC_AE_FPIN_LINK_CONGESTED = 0x1,
+ IBMVFC_AE_FPIN_PORT_CONGESTED = 0x2,
+ IBMVFC_AE_FPIN_PORT_CLEARED = 0x3,
+ IBMVFC_AE_FPIN_PORT_DEGRADED = 0x4,
+};
+
struct ibmvfc_async_crq {
volatile u8 valid;
u8 link_state;
- u8 pad[2];
+ u8 fpin_status;
+ u8 pad;
__be32 pad2;
volatile __be64 event;
volatile __be64 scsi_id;
@@ -590,6 +651,9 @@ union ibmvfc_iu {
struct ibmvfc_tmf tmf;
struct ibmvfc_cmd cmd;
struct ibmvfc_passthru_mad passthru;
+ struct ibmvfc_channel_enquiry channel_enquiry;
+ struct ibmvfc_channel_setup_mad channel_setup;
+ struct ibmvfc_connection_info connection_info;
} __packed __aligned(8);
enum ibmvfc_target_action {
--
2.27.0
^ permalink raw reply related
* [PATCH v3 1/2] scsi: ibmvfc: use compiler attribute defines instead of __attribute__()
From: Tyrel Datwyler @ 2020-09-04 23:29 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
linuxppc-dev
Update ibmvfc.h structs to use the preferred __packed and __aligned()
attribute macros defined in include/linux/compiler_attributes.h in place
of __attribute__().
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
drivers/scsi/ibmvscsi/ibmvfc.h | 56 +++++++++++++++++-----------------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 907889f1fa9d..6da23666f5be 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -133,16 +133,16 @@ struct ibmvfc_mad_common {
__be16 status;
__be16 length;
__be64 tag;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_npiv_login_mad {
struct ibmvfc_mad_common common;
struct srp_direct_buf buffer;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_npiv_logout_mad {
struct ibmvfc_mad_common common;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
#define IBMVFC_MAX_NAME 256
@@ -168,7 +168,7 @@ struct ibmvfc_npiv_login {
u8 device_name[IBMVFC_MAX_NAME];
u8 drc_name[IBMVFC_MAX_NAME];
__be64 reserved2[2];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_common_svc_parms {
__be16 fcph_version;
@@ -177,7 +177,7 @@ struct ibmvfc_common_svc_parms {
__be16 bb_rcv_sz; /* upper nibble is BB_SC_N */
__be32 ratov;
__be32 edtov;
-}__attribute__((packed, aligned (4)));
+} __packed __aligned(4);
struct ibmvfc_service_parms {
struct ibmvfc_common_svc_parms common;
@@ -192,7 +192,7 @@ struct ibmvfc_service_parms {
__be32 ext_len;
__be32 reserved[30];
__be32 clk_sync_qos[2];
-}__attribute__((packed, aligned (4)));
+} __packed __aligned(4);
struct ibmvfc_npiv_login_resp {
__be32 version;
@@ -217,12 +217,12 @@ struct ibmvfc_npiv_login_resp {
u8 drc_name[IBMVFC_MAX_NAME];
struct ibmvfc_service_parms service_parms;
__be64 reserved2;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
union ibmvfc_npiv_login_data {
struct ibmvfc_npiv_login login;
struct ibmvfc_npiv_login_resp resp;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_discover_targets_buf {
__be32 scsi_id[1];
@@ -239,7 +239,7 @@ struct ibmvfc_discover_targets {
__be32 num_avail;
__be32 num_written;
__be64 reserved[2];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
enum ibmvfc_fc_reason {
IBMVFC_INVALID_ELS_CMD_CODE = 0x01,
@@ -283,7 +283,7 @@ struct ibmvfc_port_login {
struct ibmvfc_service_parms service_parms;
struct ibmvfc_service_parms service_parms_change;
__be64 reserved3[2];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_prli_svc_parms {
u8 type;
@@ -303,7 +303,7 @@ struct ibmvfc_prli_svc_parms {
#define IBMVFC_PRLI_TARGET_FUNC 0x00000010
#define IBMVFC_PRLI_READ_FCP_XFER_RDY_DISABLED 0x00000002
#define IBMVFC_PRLI_WR_FCP_XFER_RDY_DISABLED 0x00000001
-}__attribute__((packed, aligned (4)));
+} __packed __aligned(4);
struct ibmvfc_process_login {
struct ibmvfc_mad_common common;
@@ -314,7 +314,7 @@ struct ibmvfc_process_login {
__be16 error; /* also fc_reason */
__be32 reserved2;
__be64 reserved3[2];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_query_tgt {
struct ibmvfc_mad_common common;
@@ -325,13 +325,13 @@ struct ibmvfc_query_tgt {
__be16 fc_explain;
__be16 fc_type;
__be64 reserved[2];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_implicit_logout {
struct ibmvfc_mad_common common;
__be64 old_scsi_id;
__be64 reserved[2];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_tmf {
struct ibmvfc_mad_common common;
@@ -348,7 +348,7 @@ struct ibmvfc_tmf {
__be32 my_cancel_key;
__be32 pad;
__be64 reserved[2];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
enum ibmvfc_fcp_rsp_info_codes {
RSP_NO_FAILURE = 0x00,
@@ -361,7 +361,7 @@ struct ibmvfc_fcp_rsp_info {
u8 reserved[3];
u8 rsp_code;
u8 reserved2[4];
-}__attribute__((packed, aligned (2)));
+} __packed __aligned(2);
enum ibmvfc_fcp_rsp_flags {
FCP_BIDI_RSP = 0x80,
@@ -377,7 +377,7 @@ enum ibmvfc_fcp_rsp_flags {
union ibmvfc_fcp_rsp_data {
struct ibmvfc_fcp_rsp_info info;
u8 sense[SCSI_SENSE_BUFFERSIZE + sizeof(struct ibmvfc_fcp_rsp_info)];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_fcp_rsp {
__be64 reserved;
@@ -388,7 +388,7 @@ struct ibmvfc_fcp_rsp {
__be32 fcp_sense_len;
__be32 fcp_rsp_len;
union ibmvfc_fcp_rsp_data data;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
enum ibmvfc_cmd_flags {
IBMVFC_SCATTERLIST = 0x0001,
@@ -422,7 +422,7 @@ struct ibmvfc_fcp_cmd_iu {
#define IBMVFC_WRDATA 0x01
u8 cdb[IBMVFC_MAX_CDB_LEN];
__be32 xfer_len;
-}__attribute__((packed, aligned (4)));
+} __packed __aligned(4);
struct ibmvfc_cmd {
__be64 task_tag;
@@ -446,7 +446,7 @@ struct ibmvfc_cmd {
__be64 reserved3[2];
struct ibmvfc_fcp_cmd_iu iu;
struct ibmvfc_fcp_rsp rsp;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_passthru_fc_iu {
__be32 payload[7];
@@ -473,18 +473,18 @@ struct ibmvfc_passthru_iu {
__be64 scsi_id;
__be64 tag;
__be64 reserved2[2];
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_passthru_mad {
struct ibmvfc_mad_common common;
struct srp_direct_buf cmd_ioba;
struct ibmvfc_passthru_iu iu;
struct ibmvfc_passthru_fc_iu fc_iu;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_trace_start_entry {
u32 xfer_len;
-}__attribute__((packed));
+} __packed;
struct ibmvfc_trace_end_entry {
u16 status;
@@ -493,7 +493,7 @@ struct ibmvfc_trace_end_entry {
u8 rsp_code;
u8 scsi_status;
u8 reserved;
-}__attribute__((packed));
+} __packed;
struct ibmvfc_trace_entry {
struct ibmvfc_event *evt;
@@ -510,7 +510,7 @@ struct ibmvfc_trace_entry {
struct ibmvfc_trace_start_entry start;
struct ibmvfc_trace_end_entry end;
} u;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
enum ibmvfc_crq_formats {
IBMVFC_CMD_FORMAT = 0x01,
@@ -545,7 +545,7 @@ struct ibmvfc_crq {
volatile u8 format;
u8 reserved[6];
volatile __be64 ioba;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_crq_queue {
struct ibmvfc_crq *msgs;
@@ -570,7 +570,7 @@ struct ibmvfc_async_crq {
volatile __be64 wwpn;
volatile __be64 node_name;
__be64 reserved;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
struct ibmvfc_async_crq_queue {
struct ibmvfc_async_crq *msgs;
@@ -590,7 +590,7 @@ union ibmvfc_iu {
struct ibmvfc_tmf tmf;
struct ibmvfc_cmd cmd;
struct ibmvfc_passthru_mad passthru;
-}__attribute__((packed, aligned (8)));
+} __packed __aligned(8);
enum ibmvfc_target_action {
IBMVFC_TGT_ACTION_NONE = 0,
--
2.27.0
^ permalink raw reply related
* [PATCH 5/5] powerpc/tau: Disable TAU between measurements
From: Finn Thain @ 2020-09-04 23:02 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1599260540.git.fthain@telegraphics.com.au>
Enabling CONFIG_TAU_INT causes random crashes:
Unrecoverable exception 1700 at c0009414 (msr=1000)
Oops: Unrecoverable exception, sig: 6 [#1]
BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac-00043-gd5f545e1a8593 #5
NIP: c0009414 LR: c0009414 CTR: c00116fc
REGS: c0799eb8 TRAP: 1700 Not tainted (5.7.0-pmac-00043-gd5f545e1a8593)
MSR: 00001000 <ME> CR: 22000228 XER: 00000100
GPR00: 00000000 c0799f70 c076e300 00800000 0291c0ac 00e00000 c076e300 00049032
GPR08: 00000001 c00116fc 00000000 dfbd3200 ffffffff 007f80a8 00000000 00000000
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 c075ce04
GPR24: c075ce04 dfff8880 c07b0000 c075ce04 00080000 00000001 c079ef98 c079ef5c
NIP [c0009414] arch_cpu_idle+0x24/0x6c
LR [c0009414] arch_cpu_idle+0x24/0x6c
Call Trace:
[c0799f70] [00000001] 0x1 (unreliable)
[c0799f80] [c0060990] do_idle+0xd8/0x17c
[c0799fa0] [c0060ba4] cpu_startup_entry+0x20/0x28
[c0799fb0] [c072d220] start_kernel+0x434/0x44c
[c0799ff0] [00003860] 0x3860
Instruction dump:
XXXXXXXX XXXXXXXX XXXXXXXX 3d20c07b XXXXXXXX XXXXXXXX XXXXXXXX 7c0802a6
XXXXXXXX XXXXXXXX XXXXXXXX 4e800421 XXXXXXXX XXXXXXXX XXXXXXXX 7d2000a6
---[ end trace 3a0c9b5cb216db6b ]---
Resolve this problem by disabling each THRMn comparator when handling
the associated THRMn interrupt and by disabling the TAU entirely when
updating THRMn thresholds.
Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
arch/powerpc/kernel/tau_6xx.c | 65 +++++++++++++---------------------
arch/powerpc/platforms/Kconfig | 9 ++---
2 files changed, 26 insertions(+), 48 deletions(-)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 614b5b272d9c6..0b4694b8d2482 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -42,8 +42,6 @@ static struct tau_temp
static bool tau_int_enable;
-#undef DEBUG
-
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
* dynamic adjustment to minimize # of interrupts */
/* configurable values for step size and how much to expand the window when
@@ -67,42 +65,33 @@ static void set_thresholds(unsigned long cpu)
static void TAUupdate(int cpu)
{
- unsigned thrm;
-
-#ifdef DEBUG
- printk("TAUupdate ");
-#endif
+ u32 thrm;
+ u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
/* if both thresholds are crossed, the step_sizes cancel out
* and the window winds up getting expanded twice. */
- if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed low threshold */
- if (tau[cpu].low >= step_size){
- tau[cpu].low -= step_size;
- tau[cpu].high -= (step_size - window_expand);
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("low threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM1);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM1, 0);
+
+ if (tau[cpu].low >= step_size) {
+ tau[cpu].low -= step_size;
+ tau[cpu].high -= (step_size - window_expand);
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: low threshold crossed\n", __func__);
}
- if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed high threshold */
- if (tau[cpu].high <= 127-step_size){
- tau[cpu].low += (step_size - window_expand);
- tau[cpu].high += step_size;
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("high threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM2);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM2, 0);
+
+ if (tau[cpu].high <= 127 - step_size) {
+ tau[cpu].low += (step_size - window_expand);
+ tau[cpu].high += step_size;
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: high threshold crossed\n", __func__);
}
-
-#ifdef DEBUG
- printk("grew = %d\n", tau[cpu].grew);
-#endif
}
#ifdef CONFIG_TAU_INT
@@ -127,17 +116,17 @@ void TAUException(struct pt_regs * regs)
static void tau_timeout(void * info)
{
int cpu;
- unsigned long flags;
int size;
int shrink;
- /* disabling interrupts *should* be okay */
- local_irq_save(flags);
cpu = smp_processor_id();
if (!tau_int_enable)
TAUupdate(cpu);
+ /* Stop thermal sensor comparisons and interrupts */
+ mtspr(SPRN_THRM3, 0);
+
size = tau[cpu].high - tau[cpu].low;
if (size > min_window && ! tau[cpu].grew) {
/* do an exponential shrink of half the amount currently over size */
@@ -159,18 +148,12 @@ static void tau_timeout(void * info)
set_thresholds(cpu);
- /*
- * Do the enable every time, since otherwise a bunch of (relatively)
- * complex sleep code needs to be added. One mtspr every time
- * tau_timeout is called is probably not a big deal.
- *
+ /* Restart thermal sensor comparisons and interrupts.
* The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
* recommends that "the maximum value be set in THRM3 under all
* conditions."
*/
mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
-
- local_irq_restore(flags);
}
static struct workqueue_struct *tau_workq;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 9fe36f0b54c1a..b439b027a42f1 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -227,7 +227,7 @@ config TAU
don't assume the cpu temp is actually what /proc/cpuinfo says it is.
config TAU_INT
- bool "Interrupt driven TAU driver (DANGEROUS)"
+ bool "Interrupt driven TAU driver (EXPERIMENTAL)"
depends on TAU
help
The TAU supports an interrupt driven mode which causes an interrupt
@@ -235,12 +235,7 @@ config TAU_INT
to get notified the temp has exceeded a range. With this option off,
a timer is used to re-check the temperature periodically.
- However, on some cpus it appears that the TAU interrupt hardware
- is buggy and can cause a situation which would lead unexplained hard
- lockups.
-
- Unless you are extending the TAU driver, or enjoy kernel/hardware
- debugging, leave this option off.
+ If in doubt, say N here.
config TAU_AVERAGE
bool "Average high and low temp"
--
2.26.2
^ permalink raw reply related
* [PATCH 4/5] powerpc/tau: Check processor type before enabling TAU interrupt
From: Finn Thain @ 2020-09-04 23:02 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1599260540.git.fthain@telegraphics.com.au>
According to Freescale's documentation, MPC74XX processors have an
erratum that prevents the TAU interrupt from working, so don't try to
use it when running on those processors.
Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
arch/powerpc/kernel/tau_6xx.c | 33 ++++++++++++++-------------------
arch/powerpc/platforms/Kconfig | 5 ++---
2 files changed, 16 insertions(+), 22 deletions(-)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index b8d7e7d498e0a..614b5b272d9c6 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -40,6 +40,8 @@ static struct tau_temp
unsigned char grew;
} tau[NR_CPUS];
+static bool tau_int_enable;
+
#undef DEBUG
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
@@ -54,22 +56,13 @@ static struct tau_temp
static void set_thresholds(unsigned long cpu)
{
-#ifdef CONFIG_TAU_INT
- /*
- * setup THRM1,
- * threshold, valid bit, enable interrupts, interrupt when below threshold
- */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
+ u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
- /* setup THRM2,
- * threshold, valid bit, enable interrupts, interrupt when above threshold
- */
- mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
-#else
- /* same thing but don't enable interrupts */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID);
- mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V);
-#endif
+ /* setup THRM1, threshold, valid bit, interrupt when below threshold */
+ mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
+
+ /* setup THRM2, threshold, valid bit, interrupt when above threshold */
+ mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
}
static void TAUupdate(int cpu)
@@ -142,9 +135,8 @@ static void tau_timeout(void * info)
local_irq_save(flags);
cpu = smp_processor_id();
-#ifndef CONFIG_TAU_INT
- TAUupdate(cpu);
-#endif
+ if (!tau_int_enable)
+ TAUupdate(cpu);
size = tau[cpu].high - tau[cpu].low;
if (size > min_window && ! tau[cpu].grew) {
@@ -225,6 +217,9 @@ static int __init TAU_init(void)
return 1;
}
+ tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
+ !strcmp(cur_cpu_spec->platform, "ppc750");
+
tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
if (!tau_workq)
return -ENOMEM;
@@ -234,7 +229,7 @@ static int __init TAU_init(void)
queue_work(tau_workq, &tau_work);
pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
- IS_ENABLED(CONFIG_TAU_INT) ? "interrupts" : "workqueue", shrink_timer);
+ tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
tau_initialized = 1;
return 0;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index fb7515b4fa9c6..9fe36f0b54c1a 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -223,9 +223,8 @@ config TAU
temperature within 2-4 degrees Celsius. This option shows the current
on-die temperature in /proc/cpuinfo if the cpu supports it.
- Unfortunately, on some chip revisions, this sensor is very inaccurate
- and in many cases, does not work at all, so don't assume the cpu
- temp is actually what /proc/cpuinfo says it is.
+ Unfortunately, this sensor is very inaccurate when uncalibrated, so
+ don't assume the cpu temp is actually what /proc/cpuinfo says it is.
config TAU_INT
bool "Interrupt driven TAU driver (DANGEROUS)"
--
2.26.2
^ permalink raw reply related
* [PATCH 2/5] powerpc/tau: Convert from timer to workqueue
From: Finn Thain @ 2020-09-04 23:02 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1599260540.git.fthain@telegraphics.com.au>
Since commit 19dbdcb8039cf ("smp: Warn on function calls from softirq
context") the Thermal Assist Unit driver causes a warning like the
following when CONFIG_SMP is enabled.
------------[ cut here ]------------
WARNING: CPU: 0 PID: 0 at kernel/smp.c:428 smp_call_function_many_cond+0xf4/0x38c
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac #3
NIP: c00b37a8 LR: c00b3abc CTR: c001218c
REGS: c0799c60 TRAP: 0700 Not tainted (5.7.0-pmac)
MSR: 00029032 <EE,ME,IR,DR,RI> CR: 42000224 XER: 00000000
GPR00: c00b3abc c0799d18 c076e300 c079ef5c c0011fec 00000000 00000000 00000000
GPR08: 00000100 00000100 00008000 ffffffff 42000224 00000000 c079d040 c079d044
GPR16: 00000001 00000000 00000004 c0799da0 c079f054 c07a0000 c07a0000 00000000
GPR24: c0011fec 00000000 c079ef5c c079ef5c 00000000 00000000 00000000 00000000
NIP [c00b37a8] smp_call_function_many_cond+0xf4/0x38c
LR [c00b3abc] on_each_cpu+0x38/0x68
Call Trace:
[c0799d18] [ffffffff] 0xffffffff (unreliable)
[c0799d68] [c00b3abc] on_each_cpu+0x38/0x68
[c0799d88] [c0096704] call_timer_fn.isra.26+0x20/0x7c
[c0799d98] [c0096b40] run_timer_softirq+0x1d4/0x3fc
[c0799df8] [c05b4368] __do_softirq+0x118/0x240
[c0799e58] [c0039c44] irq_exit+0xc4/0xcc
[c0799e68] [c000ade8] timer_interrupt+0x1b0/0x230
[c0799ea8] [c0013520] ret_from_except+0x0/0x14
--- interrupt: 901 at arch_cpu_idle+0x24/0x6c
LR = arch_cpu_idle+0x24/0x6c
[c0799f70] [00000001] 0x1 (unreliable)
[c0799f80] [c0060990] do_idle+0xd8/0x17c
[c0799fa0] [c0060ba8] cpu_startup_entry+0x24/0x28
[c0799fb0] [c072d220] start_kernel+0x434/0x44c
[c0799ff0] [00003860] 0x3860
Instruction dump:
8129f204 2f890000 40beff98 3d20c07a 8929eec4 2f890000 40beff88 0fe00000
81220000 552805de 550802ef 4182ff84 <0fe00000> 3860ffff 7f65db78 7f44d378
---[ end trace 34a886e47819c2eb ]---
Don't call on_each_cpu() from a timer callback, call it from a worker
thread instead.
Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
arch/powerpc/kernel/tau_6xx.c | 38 +++++++++++++++++------------------
1 file changed, 18 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 976d5bc1b5176..268205cc347da 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -13,13 +13,14 @@
*/
#include <linux/errno.h>
-#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
#include <asm/io.h>
#include <asm/reg.h>
@@ -39,8 +40,6 @@ static struct tau_temp
unsigned char grew;
} tau[NR_CPUS];
-struct timer_list tau_timer;
-
#undef DEBUG
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
@@ -50,7 +49,7 @@ struct timer_list tau_timer;
#define step_size 2 /* step size when temp goes out of range */
#define window_expand 1 /* expand the window by this much */
/* configurable values for shrinking the window */
-#define shrink_timer 2*HZ /* period between shrinking the window */
+#define shrink_timer 2000 /* period between shrinking the window */
#define min_window 2 /* minimum window size, degrees C */
static void set_thresholds(unsigned long cpu)
@@ -187,14 +186,18 @@ static void tau_timeout(void * info)
local_irq_restore(flags);
}
-static void tau_timeout_smp(struct timer_list *unused)
-{
+static struct workqueue_struct *tau_workq;
- /* schedule ourselves to be run again */
- mod_timer(&tau_timer, jiffies + shrink_timer) ;
+static void tau_work_func(struct work_struct *work)
+{
+ msleep(shrink_timer);
on_each_cpu(tau_timeout, NULL, 0);
+ /* schedule ourselves to be run again */
+ queue_work(tau_workq, work);
}
+DECLARE_WORK(tau_work, tau_work_func);
+
/*
* setup the TAU
*
@@ -227,21 +230,16 @@ static int __init TAU_init(void)
return 1;
}
-
- /* first, set up the window shrinking timer */
- timer_setup(&tau_timer, tau_timeout_smp, 0);
- tau_timer.expires = jiffies + shrink_timer;
- add_timer(&tau_timer);
+ tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
+ if (!tau_workq)
+ return -ENOMEM;
on_each_cpu(TAU_init_smp, NULL, 0);
- printk("Thermal assist unit ");
-#ifdef CONFIG_TAU_INT
- printk("using interrupts, ");
-#else
- printk("using timers, ");
-#endif
- printk("shrink_timer: %d jiffies\n", shrink_timer);
+ queue_work(tau_workq, &tau_work);
+
+ pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
+ IS_ENABLED(CONFIG_TAU_INT) ? "interrupts" : "workqueue", shrink_timer);
tau_initialized = 1;
return 0;
--
2.26.2
^ permalink raw reply related
* [PATCH 3/5] powerpc/tau: Remove duplicated set_thresholds() call
From: Finn Thain @ 2020-09-04 23:02 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1599260540.git.fthain@telegraphics.com.au>
The commentary at the call site seems to disagree with the code. The
conditional prevents calling set_thresholds() via the exception handler,
which appears to crash. Perhaps that's because it immediately triggers
another TAU exception. Anyway, calling set_thresholds() from TAUupdate()
is redundant because tau_timeout() does so.
Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
arch/powerpc/kernel/tau_6xx.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 268205cc347da..b8d7e7d498e0a 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -110,11 +110,6 @@ static void TAUupdate(int cpu)
#ifdef DEBUG
printk("grew = %d\n", tau[cpu].grew);
#endif
-
-#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
- set_thresholds(cpu);
-#endif
-
}
#ifdef CONFIG_TAU_INT
--
2.26.2
^ permalink raw reply related
* [PATCH 1/5] powerpc/tau: Use appropriate temperature sample interval
From: Finn Thain @ 2020-09-04 23:02 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1599260540.git.fthain@telegraphics.com.au>
According to the MPC750 Users Manual, the SITV value in Thermal
Management Register 3 is 13 bits long. The present code calculates the
SITV value as 60 * 500 cycles. This would overflow to give 10 us on
a 500 MHz CPU rather than the intended 60 us. (But according to the
Microprocessor Datasheet, there is also a factor of 266 that has to be
applied to this value on certain parts i.e. speed sort above 266 MHz.)
Always use the maximum cycle count, as recommended by the Datasheet.
Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
arch/powerpc/include/asm/reg.h | 2 +-
arch/powerpc/kernel/tau_6xx.c | 12 ++++--------
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 88e6c78100d9b..c750afc62887c 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -815,7 +815,7 @@
#define THRM1_TIN (1 << 31)
#define THRM1_TIV (1 << 30)
#define THRM1_THRES(x) ((x&0x7f)<<23)
-#define THRM3_SITV(x) ((x&0x3fff)<<1)
+#define THRM3_SITV(x) ((x & 0x1fff) << 1)
#define THRM1_TID (1<<2)
#define THRM1_TIE (1<<1)
#define THRM1_V (1<<0)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index e2ab8a111b693..976d5bc1b5176 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -178,15 +178,11 @@ static void tau_timeout(void * info)
* complex sleep code needs to be added. One mtspr every time
* tau_timeout is called is probably not a big deal.
*
- * Enable thermal sensor and set up sample interval timer
- * need 20 us to do the compare.. until a nice 'cpu_speed' function
- * call is implemented, just assume a 500 mhz clock. It doesn't really
- * matter if we take too long for a compare since it's all interrupt
- * driven anyway.
- *
- * use a extra long time.. (60 us @ 500 mhz)
+ * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
+ * recommends that "the maximum value be set in THRM3 under all
+ * conditions."
*/
- mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
+ mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
local_irq_restore(flags);
}
--
2.26.2
^ permalink raw reply related
* [PATCH 0/5] powerpc/tau: TAU driver fixes
From: Finn Thain @ 2020-09-04 23:02 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
This patch series fixes various bugs in the Thermal Assist Unit driver.
It was tested on 266 MHz and 292 MHz PowerBook G3 laptops.
Finn Thain (5):
powerpc/tau: Use appropriate temperature sample interval
powerpc/tau: Convert from timer to workqueue
powerpc/tau: Remove duplicated set_thresholds() call
powerpc/tau: Check processor type before enabling TAU interrupt
powerpc/tau: Disable TAU between measurements
arch/powerpc/include/asm/reg.h | 2 +-
arch/powerpc/kernel/tau_6xx.c | 147 +++++++++++++--------------------
arch/powerpc/platforms/Kconfig | 14 +---
3 files changed, 62 insertions(+), 101 deletions(-)
--
2.26.2
^ permalink raw reply
* [Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8
From: bugzilla-daemon @ 2020-09-04 22:57 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-208181-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=208181
Erhard F. (erhard_f@mailbox.org) changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
Resolution|--- |OBSOLETE
--- Comment #19 from Erhard F. (erhard_f@mailbox.org) ---
I noticed that I covered the "do_IRQ: stack overflow: ...." problem already in
bug #207129 so closing this one as suggested before.
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [Bug 205099] KASAN hit at raid6_pq: BUG: Unable to handle kernel data access at 0x00f0fd0d
From: bugzilla-daemon @ 2020-09-04 22:54 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-205099-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=205099
Erhard F. (erhard_f@mailbox.org) changed:
What |Removed |Added
----------------------------------------------------------------------------
Attachment #288413|0 |1
is obsolete| |
--- Comment #31 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 292347
--> https://bugzilla.kernel.org/attachment.cgi?id=292347&action=edit
kernel .config (5.9-rc3, OUTLINE KASAN, PowerMac G4 DP)
Does happen even if RAID support is not actively selected in the config as
btrfs pulls in RAID6_PQ on its own.
# CONFIG_DM_RAID is not set
CONFIG_RAID6_PQ=m
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [Bug 205099] KASAN hit at raid6_pq: BUG: Unable to handle kernel data access at 0x00f0fd0d
From: bugzilla-daemon @ 2020-09-04 22:50 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-205099-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=205099
Erhard F. (erhard_f@mailbox.org) changed:
What |Removed |Added
----------------------------------------------------------------------------
Attachment #287625|0 |1
is obsolete| |
Attachment #288411|0 |1
is obsolete| |
--- Comment #30 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 292345
--> https://bugzilla.kernel.org/attachment.cgi?id=292345&action=edit
dmesg (5.9-rc3, OUTLINE KASAN, PowerMac G4 DP)
Re-tested with v5.9-rc3 out of curiosity. Not much change here, the bug shows
up with OUTLINE KASAN but not with INLINE KASAN, everything else being equal:
==================================================================
BUG: KASAN: user-memory-access in raid6_altivec8_gen_syndrome_real+0x2b0/0x480
[raid6_pq]
Read of size 4 at addr 5764b118 by task modprobe/126
CPU: 1 PID: 126 Comm: modprobe Tainted: G W 5.9.0-rc3-PowerMacG4
#2
Call Trace:
[e32cb7b8] [c0517aac] dump_stack+0xc4/0xf8 (unreliable)
[e32cb7e8] [c026e73c] kasan_report+0x16c/0x170
[e32cb828] [b02004e0] raid6_altivec8_gen_syndrome_real+0x2b0/0x480 [raid6_pq]
[e32cba18] [b02006fc] raid6_altivec8_gen_syndrome+0x4c/0x88 [raid6_pq]
[e32cba38] [b021a42c] init_module+0x42c/0x590 [raid6_pq]
[e32cbb08] [c00058a0] do_one_initcall+0xb8/0x3dc
[e32cbbd8] [c011c0fc] do_init_module+0xa8/0x2c4
[e32cbc08] [c011f02c] load_module+0x2b98/0x2d4c
[e32cbe18] [c011f448] sys_finit_module+0x100/0x138
[e32cbf38] [c001a1cc] ret_from_syscall+0x0/0x34
--- interrupt: c01 at 0x3d2068
LR = 0x506104
==================================================================
BUG: Unable to handle kernel data access on read at 0x5764b118
Faulting instruction address: 0xb02004e0
Oops: Kernel access of bad area, sig: 11 [#1]
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8
From: bugzilla-daemon @ 2020-09-04 22:21 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-208181-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=208181
--- Comment #18 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 292339
--> https://bugzilla.kernel.org/attachment.cgi?id=292339&action=edit
kernel .config (5.9-rc3, PowerMac G4 DP)
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8
From: bugzilla-daemon @ 2020-09-04 22:19 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-208181-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=208181
--- Comment #17 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 292337
--> https://bugzilla.kernel.org/attachment.cgi?id=292337&action=edit
dmesg (5.9-rc3, INLINE KASAN, PowerMac G4 DP)
Re-tried with 5.9-rc3 (inline KASAN). The original problem (stack-out-of-bounds
in strcmp+0x58/0xd8) is gone, but still problems with stack usage when doing
larger build jobs:
[...]
[ 1929.683510] do_IRQ: stack overflow: 1696
[ 1929.690727] CPU: 1 PID: 735 Comm: mount.nfs Tainted: G W
5.9.0-rc3-PowerMacG4 #1
[ 1929.697847] Call Trace:
[ 1929.704633] [d0ca4670] [c0a75518] dump_stack+0xfc/0x130 (unreliable)
[ 1929.711507] [d0ca46a0] [c000b094] do_IRQ+0x128/0x180
[ 1929.717998] [d0ca46d0] [c002e560] ret_from_except+0x0/0x14
[ 1929.724652] --- interrupt: 501 at _raw_spin_unlock_irqrestore+0x3c/0xa4
LR = _raw_spin_unlock_irqrestore+0x38/0xa4
[ 1929.738722] [d0ca47b8] [c0a6dc90] stack_depot_save+0x20c/0x390
[ 1929.746132] [d0ca4818] [c04d4b70] kasan_save_stack+0x40/0x48
[ 1929.753675] [d0ca4928] [c04d4b9c] kasan_set_track+0x24/0x30
[ 1929.761298] [d0ca4938] [c04d710c] kasan_set_free_info+0x28/0x3c
[ 1929.769073] [d0ca4948] [c04d4f74] __kasan_slab_free+0x104/0x118
[ 1929.776983] [d0ca4968] [c04ce800] slab_free_freelist_hook+0xec/0x17c
[ 1929.785111] [d0ca49a8] [c04d3468] kmem_cache_free+0x58/0x2a0
[ 1929.793391] [d0ca49f8] [c11b251c] packet_rcv+0xb9c/0xbb4
[ 1929.801797] [d0ca4a48] [c0dbfd98] dev_queue_xmit_nit+0x6e4/0x748
[ 1929.810434] [d0ca4ab8] [c0dcaf80] dev_hard_start_xmit+0xec/0x880
[ 1929.819207] [d0ca4b18] [c0ea4814] sch_direct_xmit+0x1f8/0x818
[ 1929.828111] [d0ca4bf8] [c0dcc884] __dev_queue_xmit+0xed4/0x136c
[ 1929.837202] [d0ca4d28] [c0f256dc] ip_finish_output2+0xfcc/0x1028
[ 1929.846472] [d0ca4d88] [c0f2d848] __ip_queue_xmit+0xde0/0x1018
[ 1929.855892] [d0ca4df8] [c0f929d8] __tcp_transmit_skb+0x2550/0x2cb8
[ 1929.865486] [d0ca4ee8] [c0f98470] tcp_write_xmit+0x1d28/0x3498
[ 1929.875216] [d0ca4f78] [c0f99c8c] __tcp_push_pending_frames+0xac/0x1c4
[ 1929.885189] [d0ca4f98] [c0f5a970] tcp_sendmsg_locked+0x1c50/0x2294
[ 1929.895338] [d0ca5098] [c0f5afe4] tcp_sendmsg+0x30/0x48
[ 1929.905564] [d0ca50b8] [c0d598b0] sock_sendmsg_nosec+0xf4/0x10c
[ 1929.916463] [d0ca50d8] [b0a31840] xprt_sock_sendmsg+0x2c0/0x6e8 [sunrpc]
[ 1929.927494] [d0ca51b8] [b0a34ce8] xs_tcp_send_request+0x360/0x580 [sunrpc]
[ 1929.938699] [d0ca52e8] [b0a2eae8] xprt_transmit+0x4f8/0xe30 [sunrpc]
[ 1929.950044] [d0ca5368] [b0a1dcd8] call_transmit+0x238/0x25c [sunrpc]
[ 1929.961450] [d0ca5388] [b0a6641c] __rpc_execute+0x35c/0xbf8 [sunrpc]
[ 1929.972996] [d0ca5448] [b0a21d18] rpc_run_task+0x790/0x79c [sunrpc]
[ 1929.984850] [d0ca5498] [b1282e50] nfs4_call_sync_custom+0x14/0x80 [nfsv4]
[ 1929.996821] [d0ca54b8] [b128302c] nfs4_do_call_sync+0x170/0x1a8 [nfsv4]
[ 1930.008922] [d0ca55a8] [b12b3570] nfs4_proc_lookup_common+0x314/0xc54
[nfsv4]
[ 1930.020820] [d0ca5758] [b12b4244] nfs4_proc_lookup+0x158/0x2f0 [nfsv4]
[ 1930.032753] [d0ca57f8] [b0b49544] nfs_lookup+0x2ac/0x9ac [nfs]
[ 1930.044062] [d0ca5838] [c052c984] __lookup_slow+0x278/0x2a8
[ 1930.055461] [d0ca5958] [c05340a0] walk_component+0x288/0x30c
[ 1930.066816] [d0ca5a08] [c0534e5c] path_lookupat.isra.0+0x1b8/0x438
[ 1930.078282] [d0ca5a48] [c05372a0] filename_lookup+0x144/0x1c4
[ 1930.089834] [d0ca5b98] [c05373fc] vfs_path_lookup+0x94/0xc0
[ 1930.101389] [d0ca5c18] [c05714b8] mount_subtree+0x1c4/0x250
[ 1930.113267] [d0ca5ca8] [b12e1b2c] do_nfs4_mount+0x570/0x7fc [nfsv4]
[ 1930.125298] [d0ca5d68] [b12e202c] nfs4_try_get_tree+0xfc/0x16c [nfsv4]
[ 1930.137200] [d0ca5d88] [c050e434] vfs_get_tree+0xf8/0x398
[ 1930.149133] [d0ca5db8] [c056f968] path_mount+0x1074/0x113c
[ 1930.161107] [d0ca5e78] [c056fad8] do_mount+0xa8/0xe4
[ 1930.173109] [d0ca5f08] [c0570054] sys_mount+0xa8/0xb8
[ 1930.185160] [d0ca5f38] [c002e1cc] ret_from_syscall+0x0/0x34
[ 1930.197313] --- interrupt: c01 at 0x8b5754
LR = 0xac0be0
[ 1930.222896] Kernel panic - not syncing: corrupted stack end detected inside
scheduler
But feel free to close this bug if appropriate as the original issue is solved.
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox