* [PATCH v2 2/8] powerpc/signal: Add unsafe_copy_{vsx,fpr}_from_user()
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Reuse the "safe" implementation from signal.c except for calling
unsafe_copy_from_user() to copy into a local buffer. Unlike the
unsafe_copy_{vsx,fpr}_to_user() functions the "copy from" functions
cannot use unsafe_get_user() directly to bypass the local buffer since
doing so significantly reduces signal handling performance.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal.h | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 2559a681536e..e9aaeac0da37 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -53,6 +53,33 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
&buf[i], label);\
} while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *__f = (u64 __user *)from; \
+ u64 buf[ELF_NFPREG]; \
+ int i; \
+ \
+ unsafe_copy_from_user(buf, __f, ELF_NFPREG * sizeof(double), \
+ label); \
+ for (i = 0; i < ELF_NFPREG - 1; i++) \
+ __t->thread.TS_FPR(i) = buf[i]; \
+ __t->thread.fp_state.fpscr = buf[i]; \
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *__f = (u64 __user *)from; \
+ u64 buf[ELF_NVSRHALFREG]; \
+ int i; \
+ \
+ unsafe_copy_from_user(buf, __f, \
+ ELF_NVSRHALFREG * sizeof(double), \
+ label); \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ __t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; \
+} while (0)
+
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
#define unsafe_copy_ckfpr_to_user(to, task, label) do { \
struct task_struct *__t = task; \
@@ -80,6 +107,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \
ELF_NFPREG * sizeof(double), label)
+#define unsafe_copy_fpr_from_user(task, from, label) \
+ unsafe_copy_from_user((task)->thread.fp_state.fpr, from \
+ ELF_NFPREG * sizeof(double), label)
+
static inline unsigned long
copy_fpr_to_user(void __user *to, struct task_struct *task)
{
@@ -115,6 +146,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
#else
#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+
static inline unsigned long
copy_fpr_to_user(void __user *to, struct task_struct *task)
{
--
2.29.0
^ permalink raw reply related
* [PATCH v2 3/8] powerpc/signal64: Move non-inline functions out of setup_sigcontext()
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
There are non-inline functions which get called in setup_sigcontext() to
save register state to the thread struct. Move these functions into a
separate prepare_setup_sigcontext() function so that
setup_sigcontext() can be refactored later into an "unsafe" version
which assumes an open uaccess window. Non-inline functions should be
avoided when uaccess is open.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 32 +++++++++++++++++++++-----------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 7df088b9ad0f..ece1f982dd05 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -79,6 +79,24 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
}
#endif
+static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_region)
+{
+#ifdef CONFIG_ALTIVEC
+ /* save altivec registers */
+ if (tsk->thread.used_vr)
+ flush_altivec_to_thread(tsk);
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+ flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+ if (tsk->thread.used_vsr && ctx_has_vsx_region)
+ flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
+
/*
* Set up the sigcontext for the signal frame.
*/
@@ -97,7 +115,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
*/
#ifdef CONFIG_ALTIVEC
elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
- unsigned long vrsave;
#endif
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
@@ -112,7 +129,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
/* save altivec registers */
if (tsk->thread.used_vr) {
- flush_altivec_to_thread(tsk);
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
33 * sizeof(vector128));
@@ -124,17 +140,10 @@ static long setup_sigcontext(struct sigcontext __user *sc,
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec.
*/
- vrsave = 0;
- if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
- vrsave = mfspr(SPRN_VRSAVE);
- tsk->thread.vrsave = vrsave;
- }
-
- err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
+ err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
#else /* CONFIG_ALTIVEC */
err |= __put_user(0, &sc->v_regs);
#endif /* CONFIG_ALTIVEC */
- flush_fp_to_thread(tsk);
/* copy fpr regs and fpscr */
err |= copy_fpr_to_user(&sc->fp_regs, tsk);
@@ -150,7 +159,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
* VMX data.
*/
if (tsk->thread.used_vsr && ctx_has_vsx_region) {
- flush_vsx_to_thread(tsk);
v_regs += ELF_NVRREG;
err |= copy_vsx_to_user(v_regs, tsk);
/* set MSR_VSX in the MSR value in the frame to
@@ -655,6 +663,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
ctx_has_vsx_region = 1;
if (old_ctx != NULL) {
+ prepare_setup_sigcontext(current, ctx_has_vsx_region);
if (!access_ok(old_ctx, ctx_size)
|| setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
ctx_has_vsx_region)
@@ -842,6 +851,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
#endif
{
err |= __put_user(0, &frame->uc.uc_link);
+ prepare_setup_sigcontext(tsk, 1);
err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
NULL, (unsigned long)ksig->ka.sa.sa_handler,
1);
--
2.29.0
^ permalink raw reply related
* [PATCH v2 4/8] powerpc/signal64: Remove TM ifdefery in middle of if/else block
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Similar to commit 1c32940f5220 ("powerpc/signal32: Remove ifdefery in
middle of if/else") for PPC32, remove the messy ifdef. Unlike PPC32, the
ifdef cannot be removed entirely since the uc_transact member of the
sigframe depends on CONFIG_PPC_TRANSACTIONAL_MEM=y.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ece1f982dd05..d3e9519b2e62 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -710,9 +710,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
struct pt_regs *regs = current_pt_regs();
struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
unsigned long msr;
-#endif
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -762,10 +760,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
* restore_tm_sigcontexts.
*/
regs->msr &= ~MSR_TS_MASK;
+#endif
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
@@ -778,9 +778,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
- } else
#endif
- {
+ } else {
/*
* Fall through, for non-TM restore
*
@@ -818,10 +817,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsigned long newsp = 0;
long err = 0;
struct pt_regs *regs = tsk->thread.regs;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* Save the thread's msr before get_tm_stackpointer() changes it */
- unsigned long msr = regs->msr;
-#endif
+ unsigned long msr __maybe_unused = regs->msr;
frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
if (!access_ok(frame, sizeof(*frame)))
@@ -836,8 +833,9 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+
if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
@@ -847,9 +845,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
tsk, ksig->sig, NULL,
(unsigned long)ksig->ka.sa.sa_handler,
msr);
- } else
#endif
- {
+ } else {
err |= __put_user(0, &frame->uc.uc_link);
prepare_setup_sigcontext(tsk, 1);
err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
--
2.29.0
^ permalink raw reply related
* [PATCH v2 0/8] Improve signal performance on PPC64 with KUAP
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
As reported by Anton, there is a large penalty to signal handling
performance on radix systems using KUAP. The signal handling code
performs many user access operations, each of which needs to switch the
KUAP permissions bit to open and then close user access. This involves a
costly 'mtspr' operation [0].
There is existing work done on x86 and by Christopher Leroy for PPC32 to
instead open up user access in "blocks" using user_*_access_{begin,end}.
We can do the same in PPC64 to bring performance back up on KUAP-enabled
radix systems.
This series applies on top of Christophe Leroy's work for PPC32 [1] (I'm
sure patchwork won't be too happy about that).
The first two patches add some needed 'unsafe' versions of copy-from
functions. While these do not make use of asm-goto they still allow for
avoiding the repeated uaccess switches.
The third patch moves functions called by setup_sigcontext() into a new
prepare_setup_sigcontext() to simplify converting setup_sigcontext()
into an 'unsafe' version which assumes an open uaccess window later.
The fourth patch cleans-up some of the Transactional Memory ifdef stuff
to simplify using uaccess blocks later.
The next two patches rewrite some of the signal64 helper functions to
be 'unsafe'. Finally, the last two patches update the main signal
handling functions to make use of the new 'unsafe' helpers and eliminate
some additional uaccess switching.
I used the will-it-scale signal1 benchmark to measure and compare
performance [2]. The below results are from a P9 Blackbird system. Note
that currently hash does not support KUAP and is therefore used as the
"baseline" comparison. Bigger numbers are better:
signal1_threads -t1 -s10
| | hash | radix |
| --------------- | ------ | ------ |
| linuxppc/next | 289014 | 158408 |
| unsafe-signal64 | 298506 | 253053 |
[0]: https://github.com/linuxppc/issues/issues/277
[1]: https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=196278
[2]: https://github.com/antonblanchard/will-it-scale/blob/master/tests/signal1.c
v2: * Rebase on latest linuxppc/next + Christophe Leroy's PPC32
signal series
* Simplify/remove TM ifdefery similar to PPC32 series and clean
up the uaccess begin/end calls
* Isolate non-inline functions so they are not called when
uaccess window is open
Christopher M. Riedl (6):
powerpc/uaccess: Add unsafe_copy_from_user
powerpc/signal: Add unsafe_copy_{vsx,fpr}_from_user()
powerpc/signal64: Move non-inline functions out of setup_sigcontext()
powerpc/signal64: Remove TM ifdefery in middle of if/else block
powerpc/signal64: Replace setup_sigcontext() w/
unsafe_setup_sigcontext()
powerpc/signal64: Replace restore_sigcontext() w/
unsafe_restore_sigcontext()
Daniel Axtens (2):
powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess
switches
powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches
arch/powerpc/include/asm/uaccess.h | 28 ++--
arch/powerpc/kernel/signal.h | 33 ++++
arch/powerpc/kernel/signal_64.c | 239 ++++++++++++++++++-----------
3 files changed, 201 insertions(+), 99 deletions(-)
--
2.29.0
^ permalink raw reply
* [PATCH v2 8/8] powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches
From: Christopher M. Riedl @ 2020-11-05 5:17 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
From: Daniel Axtens <dja@axtens.net>
Add uaccess blocks and use the 'unsafe' versions of functions doing user
access where possible to reduce the number of times uaccess has to be
opened/closed.
Signed-off-by: Daniel Axtens <dja@axtens.net>
Co-developed-by: Christopher M. Riedl <cmr@codefail.de>
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d17f2d5436d2..82e68a508e5c 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -784,8 +784,11 @@ SYSCALL_DEFINE0(rt_sigreturn)
regs->msr &= ~MSR_TS_MASK;
#endif
- if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+ if (!user_read_access_begin(uc, sizeof(*uc)))
goto badframe;
+
+ unsafe_get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR], badframe_block);
+
if (MSR_TM_ACTIVE(msr)) {
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* We recheckpoint on return. */
@@ -793,10 +796,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
/* Trying to start TM on non TM system */
if (!cpu_has_feature(CPU_FTR_TM))
- goto badframe;
+ goto badframe_block;
+
+ unsafe_get_user(uc_transact, &uc->uc_link, badframe_block);
+
+ user_read_access_end();
- if (__get_user(uc_transact, &uc->uc_link))
- goto badframe;
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
@@ -815,12 +820,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
* causing a TM bad thing.
*/
current->thread.regs->msr &= ~MSR_TS_MASK;
- if (!user_read_access_begin(uc, sizeof(*uc)))
- return -EFAULT;
- if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) {
- user_read_access_end();
- goto badframe;
- }
+ unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
+ badframe_block);
+
user_read_access_end();
}
@@ -830,6 +832,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
set_thread_flag(TIF_RESTOREALL);
return 0;
+badframe_block:
+ user_read_access_end();
badframe:
signal_fault(current, regs, "rt_sigreturn", uc);
--
2.29.0
^ permalink raw reply related
* [PATCH v2 6/8] powerpc/signal64: Replace restore_sigcontext() w/ unsafe_restore_sigcontext()
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Previously restore_sigcontext() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.
Rewrite restore_sigcontext() to assume that a userspace read access
window is open. Replace all uaccess functions with their 'unsafe'
versions which avoid the repeated uaccess switches.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 68 ++++++++++++++++++++-------------
1 file changed, 41 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 3f25309826b6..d72153825719 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -326,14 +326,14 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
/*
* Restore the sigcontext from the signal frame.
*/
-
-static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
- struct sigcontext __user *sc)
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, e) \
+ unsafe_op_wrap(__unsafe_restore_sigcontext(tsk, set, sig, sc), e)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+ int sig, struct sigcontext __user *sc)
{
#ifdef CONFIG_ALTIVEC
elf_vrreg_t __user *v_regs;
#endif
- unsigned long err = 0;
unsigned long save_r13 = 0;
unsigned long msr;
struct pt_regs *regs = tsk->thread.regs;
@@ -348,27 +348,28 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
save_r13 = regs->gpr[13];
/* copy the GPRs */
- err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
- err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+ unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr),
+ efault_out);
+ unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
/* get MSR separately, transfer the LE bit if doing signal return */
- err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+ unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
if (sig)
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
- err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
- err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
- err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
- err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
- err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
+ unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+ unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+ unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+ unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+ unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
/* Don't allow userspace to set SOFTE */
set_trap_norestart(regs);
- err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
- err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
- err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+ unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+ unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+ unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
if (!sig)
regs->gpr[13] = save_r13;
if (set != NULL)
- err |= __get_user(set->sig[0], &sc->oldmask);
+ unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
/*
* Force reload of FP/VEC.
@@ -378,29 +379,28 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
#ifdef CONFIG_ALTIVEC
- err |= __get_user(v_regs, &sc->v_regs);
- if (err)
- return err;
+ unsafe_get_user(v_regs, &sc->v_regs, efault_out);
if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
if (v_regs != NULL && (msr & MSR_VEC) != 0) {
- err |= __copy_from_user(&tsk->thread.vr_state, v_regs,
- 33 * sizeof(vector128));
+ unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+ 33 * sizeof(vector128), efault_out);
tsk->thread.used_vr = true;
} else if (tsk->thread.used_vr) {
memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
}
/* Always get VRSAVE back */
if (v_regs != NULL)
- err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+ unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33],
+ efault_out);
else
tsk->thread.vrsave = 0;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
- err |= copy_fpr_from_user(tsk, &sc->fp_regs);
+ unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
#ifdef CONFIG_VSX
/*
* Get additional VSX data. Update v_regs to point after the
@@ -409,14 +409,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
*/
v_regs += ELF_NVRREG;
if ((msr & MSR_VSX) != 0) {
- err |= copy_vsx_from_user(tsk, v_regs);
+ unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
tsk->thread.used_vsr = true;
} else {
for (i = 0; i < 32 ; i++)
tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
#endif
- return err;
+ return 0;
+
+efault_out:
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -701,8 +704,14 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
do_exit(SIGSEGV);
set_current_blocked(&set);
- if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext))
+
+ if (!user_read_access_begin(new_ctx, ctx_size))
+ return -EFAULT;
+ if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+ user_read_access_end();
do_exit(SIGSEGV);
+ }
+ user_read_access_end();
/* This returns like rt_sigreturn */
set_thread_flag(TIF_RESTOREALL);
@@ -806,8 +815,13 @@ SYSCALL_DEFINE0(rt_sigreturn)
* causing a TM bad thing.
*/
current->thread.regs->msr &= ~MSR_TS_MASK;
- if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+ if (!user_read_access_begin(uc, sizeof(*uc)))
+ return -EFAULT;
+ if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) {
+ user_read_access_end();
goto badframe;
+ }
+ user_read_access_end();
}
if (restore_altstack(&uc->uc_stack))
--
2.29.0
^ permalink raw reply related
* [PATCH v2 1/8] powerpc/uaccess: Add unsafe_copy_from_user
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Implement raw_copy_from_user_allowed() which assumes that userspace read
access is open. Use this new function to implement raw_copy_from_user().
Finally, wrap the new function to follow the usual "unsafe_" convention
of taking a label argument. The new raw_copy_from_user_allowed() calls
__copy_tofrom_user() internally, but this is still safe to call in user
access blocks formed with user_*_access_begin()/user_*_access_end()
since asm functions are not instrumented for tracing.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/include/asm/uaccess.h | 28 +++++++++++++++++++---------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index ef5bbb705c08..96b4abab4f5a 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -403,38 +403,45 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
}
#endif /* __powerpc64__ */
-static inline unsigned long raw_copy_from_user(void *to,
- const void __user *from, unsigned long n)
+static inline unsigned long
+raw_copy_from_user_allowed(void *to, const void __user *from, unsigned long n)
{
- unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- ret = 1;
+ unsigned long ret = 1;
switch (n) {
case 1:
barrier_nospec();
- __get_user_size(*(u8 *)to, from, 1, ret);
+ __get_user_size_allowed(*(u8 *)to, from, 1, ret);
break;
case 2:
barrier_nospec();
- __get_user_size(*(u16 *)to, from, 2, ret);
+ __get_user_size_allowed(*(u16 *)to, from, 2, ret);
break;
case 4:
barrier_nospec();
- __get_user_size(*(u32 *)to, from, 4, ret);
+ __get_user_size_allowed(*(u32 *)to, from, 4, ret);
break;
case 8:
barrier_nospec();
- __get_user_size(*(u64 *)to, from, 8, ret);
+ __get_user_size_allowed(*(u64 *)to, from, 8, ret);
break;
}
if (ret == 0)
return 0;
}
+ return __copy_tofrom_user((__force void __user *)to, from, n);
+}
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ unsigned long ret;
+
barrier_nospec();
allow_read_from_user(from, n);
- ret = __copy_tofrom_user((__force void __user *)to, from, n);
+ ret = raw_copy_from_user_allowed(to, from, n);
prevent_read_from_user(from, n);
return ret;
}
@@ -542,6 +549,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
+#define unsafe_copy_from_user(d, s, l, e) \
+ unsafe_op_wrap(raw_copy_from_user_allowed(d, s, l), e)
+
#define unsafe_copy_to_user(d, s, l, e) \
do { \
u8 __user *_dst = (u8 __user *)(d); \
--
2.29.0
^ permalink raw reply related
* [PATCH v2 5/8] powerpc/signal64: Replace setup_sigcontext() w/ unsafe_setup_sigcontext()
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Previously setup_sigcontext() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.
Rewrite setup_sigcontext() to assume that a userspace write access window
is open. Replace all uaccess functions with their 'unsafe' versions
which avoid the repeated uaccess switches.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 70 ++++++++++++++++++++-------------
1 file changed, 43 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d3e9519b2e62..3f25309826b6 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -101,9 +101,13 @@ static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_re
* Set up the sigcontext for the signal frame.
*/
-static long setup_sigcontext(struct sigcontext __user *sc,
- struct task_struct *tsk, int signr, sigset_t *set,
- unsigned long handler, int ctx_has_vsx_region)
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, \
+ ctx_has_vsx_region, e) \
+ unsafe_op_wrap(__unsafe_setup_sigcontext(sc, tsk, signr, set, \
+ handler, ctx_has_vsx_region), e)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+ struct task_struct *tsk, int signr, sigset_t *set,
+ unsigned long handler, int ctx_has_vsx_region)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -118,20 +122,19 @@ static long setup_sigcontext(struct sigcontext __user *sc,
#endif
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
- long err = 0;
/* Force usr to alway see softe as 1 (interrupts enabled) */
unsigned long softe = 0x1;
BUG_ON(tsk != current);
#ifdef CONFIG_ALTIVEC
- err |= __put_user(v_regs, &sc->v_regs);
+ unsafe_put_user(v_regs, &sc->v_regs, efault_out);
/* save altivec registers */
if (tsk->thread.used_vr) {
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
- err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
- 33 * sizeof(vector128));
+ unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+ 33 * sizeof(vector128), efault_out);
/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
* contains valid data.
*/
@@ -140,12 +143,12 @@ static long setup_sigcontext(struct sigcontext __user *sc,
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec.
*/
- err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+ unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
#else /* CONFIG_ALTIVEC */
- err |= __put_user(0, &sc->v_regs);
+ unsafe_put_user(0, &sc->v_regs, efault_out);
#endif /* CONFIG_ALTIVEC */
/* copy fpr regs and fpscr */
- err |= copy_fpr_to_user(&sc->fp_regs, tsk);
+ unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
/*
* Clear the MSR VSX bit to indicate there is no valid state attached
@@ -160,24 +163,27 @@ static long setup_sigcontext(struct sigcontext __user *sc,
*/
if (tsk->thread.used_vsr && ctx_has_vsx_region) {
v_regs += ELF_NVRREG;
- err |= copy_vsx_to_user(v_regs, tsk);
+ unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
/* set MSR_VSX in the MSR value in the frame to
* indicate that sc->vs_reg) contains valid data.
*/
msr |= MSR_VSX;
}
#endif /* CONFIG_VSX */
- err |= __put_user(&sc->gp_regs, &sc->regs);
+ unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
WARN_ON(!FULL_REGS(regs));
- err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
- err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
- err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
- err |= __put_user(signr, &sc->signal);
- err |= __put_user(handler, &sc->handler);
+ unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+ unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+ unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+ unsafe_put_user(signr, &sc->signal, efault_out);
+ unsafe_put_user(handler, &sc->handler, efault_out);
if (set != NULL)
- err |= __put_user(set->sig[0], &sc->oldmask);
+ unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
- return err;
+ return 0;
+
+efault_out:
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -664,12 +670,15 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
if (old_ctx != NULL) {
prepare_setup_sigcontext(current, ctx_has_vsx_region);
- if (!access_ok(old_ctx, ctx_size)
- || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
- ctx_has_vsx_region)
- || __copy_to_user(&old_ctx->uc_sigmask,
- ¤t->blocked, sizeof(sigset_t)))
+ if (!user_write_access_begin(old_ctx, ctx_size))
return -EFAULT;
+
+ unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+ 0, ctx_has_vsx_region, efault_out);
+ unsafe_copy_to_user(&old_ctx->uc_sigmask, ¤t->blocked,
+ sizeof(sigset_t), efault_out);
+
+ user_write_access_end();
}
if (new_ctx == NULL)
return 0;
@@ -698,6 +707,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
/* This returns like rt_sigreturn */
set_thread_flag(TIF_RESTOREALL);
return 0;
+
+efault_out:
+ user_write_access_end();
+ return -EFAULT;
}
@@ -849,9 +862,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
} else {
err |= __put_user(0, &frame->uc.uc_link);
prepare_setup_sigcontext(tsk, 1);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
- NULL, (unsigned long)ksig->ka.sa.sa_handler,
- 1);
+ if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
+ return -EFAULT;
+ err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
+ ksig->sig, NULL,
+ (unsigned long)ksig->ka.sa.sa_handler, 1);
+ user_write_access_end();
}
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
--
2.29.0
^ permalink raw reply related
* [PATCH v2 7/8] powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches
From: Christopher M. Riedl @ 2020-11-05 5:17 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
From: Daniel Axtens <dja@axtens.net>
Add uaccess blocks and use the 'unsafe' versions of functions doing user
access where possible to reduce the number of times uaccess has to be
opened/closed.
There is no 'unsafe' version of copy_siginfo_to_user, so move it
slightly to allow for a "longer" uaccess block.
Signed-off-by: Daniel Axtens <dja@axtens.net>
Co-developed-by: Christopher M. Riedl <cmr@codefail.de>
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 54 +++++++++++++++++++++------------
1 file changed, 34 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d72153825719..d17f2d5436d2 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -848,44 +848,51 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsigned long msr __maybe_unused = regs->msr;
frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
- if (!access_ok(frame, sizeof(*frame)))
- goto badframe;
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- if (err)
+ /* This only applies when calling unsafe_setup_sigcontext() and must be
+ * called before opening the uaccess window.
+ */
+ if (!MSR_TM_ACTIVE(msr))
+ prepare_setup_sigcontext(tsk, 1);
+
+ if (!user_write_access_begin(frame, sizeof(*frame)))
goto badframe;
+ unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+ unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
+ unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
if (MSR_TM_ACTIVE(msr)) {
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
- err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+ unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+ user_write_access_end();
+
err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
&frame->uc_transact.uc_mcontext,
tsk, ksig->sig, NULL,
(unsigned long)ksig->ka.sa.sa_handler,
msr);
+
+ if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
+ goto badframe;
+
#endif
} else {
- err |= __put_user(0, &frame->uc.uc_link);
- prepare_setup_sigcontext(tsk, 1);
- if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
- return -EFAULT;
- err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
- ksig->sig, NULL,
- (unsigned long)ksig->ka.sa.sa_handler, 1);
- user_write_access_end();
+ unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+ unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+ NULL, (unsigned long)ksig->ka.sa.sa_handler,
+ 1, badframe_block);
}
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto badframe;
+
+ unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+ user_write_access_end();
/* Make sure signal handler doesn't get spurious FP exceptions */
tsk->thread.fp_state.fpscr = 0;
@@ -900,6 +907,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
regs->nip = (unsigned long) &frame->tramp[0];
}
+
+ /* Save the siginfo outside of the unsafe block. */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ goto badframe;
+
/* Allocate a dummy caller frame for the signal handler. */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
@@ -939,6 +951,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
return 0;
+badframe_block:
+ user_write_access_end();
badframe:
signal_fault(current, regs, "handle_rt_signal64", frame);
--
2.29.0
^ permalink raw reply related
* Re: [PATCH 34/36] tty: serial: pmac_zilog: Make disposable variable __always_unused
From: Christophe Leroy @ 2020-11-05 7:04 UTC (permalink / raw)
To: Lee Jones
Cc: Greg Kroah-Hartman, linuxppc-dev, linux-kernel, Paul Mackerras,
linux-serial, Jiri Slaby
In-Reply-To: <20201104193549.4026187-35-lee.jones@linaro.org>
Le 04/11/2020 à 20:35, Lee Jones a écrit :
> Fixes the following W=1 kernel build warning(s):
>
> drivers/tty/serial/pmac_zilog.h:365:58: warning: variable ‘garbage’ set but not used [-Wunused-but-set-variable]
Explain how you are fixing this warning.
Setting __always_unused is usually not the good solution for fixing this warning, but here I guess
this is likely the good solution. But it should be explained why.
Christophe
>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Jiri Slaby <jirislaby@kernel.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linux-serial@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
> ---
> drivers/tty/serial/pmac_zilog.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/tty/serial/pmac_zilog.h b/drivers/tty/serial/pmac_zilog.h
> index bb874e76810e0..968aec7c1cf82 100644
> --- a/drivers/tty/serial/pmac_zilog.h
> +++ b/drivers/tty/serial/pmac_zilog.h
> @@ -362,7 +362,7 @@ static inline void zssync(struct uart_pmac_port *port)
>
> /* Misc macros */
> #define ZS_CLEARERR(port) (write_zsreg(port, 0, ERR_RES))
> -#define ZS_CLEARFIFO(port) do { volatile unsigned char garbage; \
> +#define ZS_CLEARFIFO(port) do { volatile unsigned char __always_unused garbage; \
> garbage = read_zsdata(port); \
> garbage = read_zsdata(port); \
> garbage = read_zsdata(port); \
>
^ permalink raw reply
* Re: [PATCH 31/36] powerpc: asm: hvconsole: Move 'hvc_vio_init_early's prototype to shared location
From: Christophe Leroy @ 2020-11-05 7:10 UTC (permalink / raw)
To: Lee Jones; +Cc: Paul Mackerras, linuxppc-dev, linux-kernel
In-Reply-To: <20201104193549.4026187-32-lee.jones@linaro.org>
Le 04/11/2020 à 20:35, Lee Jones a écrit :
> Fixes the following W=1 kernel build warning(s):
>
> drivers/tty/hvc/hvc_vio.c:385:13: warning: no previous prototype for ‘hvc_vio_init_early’ [-Wmissing-prototypes]
> 385 | void __init hvc_vio_init_early(void)
> | ^~~~~~~~~~~~~~~~~~
>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
> ---
> arch/powerpc/include/asm/hvconsole.h | 3 +++
> arch/powerpc/platforms/pseries/pseries.h | 3 ---
> arch/powerpc/platforms/pseries/setup.c | 1 +
> 3 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
> index 999ed5ac90531..936a1ee1ac786 100644
> --- a/arch/powerpc/include/asm/hvconsole.h
> +++ b/arch/powerpc/include/asm/hvconsole.h
> @@ -24,5 +24,8 @@
> extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
> extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
>
> +/* Provided by HVC VIO */
> +extern void hvc_vio_init_early(void);
> +
Declaring a prototype 'extern' is pointless. Don't add new misuse of 'extern' keyword.
> #endif /* __KERNEL__ */
> #endif /* _PPC64_HVCONSOLE_H */
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index 13fa370a87e4e..7be5b054dfc36 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void);
> /* Poweron flag used for enabling auto ups restart */
> extern unsigned long rtas_poweron_auto;
>
> -/* Provided by HVC VIO */
> -extern void hvc_vio_init_early(void);
> -
> /* Dynamic logical Partitioning/Mobility */
> extern void dlpar_free_cc_nodes(struct device_node *);
> extern void dlpar_free_cc_property(struct property *);
> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
> index 633c45ec406da..6999b83f06612 100644
> --- a/arch/powerpc/platforms/pseries/setup.c
> +++ b/arch/powerpc/platforms/pseries/setup.c
> @@ -71,6 +71,7 @@
> #include <asm/swiotlb.h>
> #include <asm/svm.h>
> #include <asm/dtl.h>
> +#include <asm/hvconsole.h>
>
> #include "pseries.h"
> #include "../../../../drivers/pci/pci.h"
>
Christophe
^ permalink raw reply
* Re: [PATCH 34/36] tty: serial: pmac_zilog: Make disposable variable __always_unused
From: Jiri Slaby @ 2020-11-05 7:39 UTC (permalink / raw)
To: Christophe Leroy, Lee Jones
Cc: Greg Kroah-Hartman, linuxppc-dev, linux-kernel, Paul Mackerras,
linux-serial
In-Reply-To: <445a6440-b4c8-4536-891b-0cefc78e5f57@csgroup.eu>
On 05. 11. 20, 8:04, Christophe Leroy wrote:
>
>
> Le 04/11/2020 à 20:35, Lee Jones a écrit :
>> Fixes the following W=1 kernel build warning(s):
>>
>> drivers/tty/serial/pmac_zilog.h:365:58: warning: variable ‘garbage’
>> set but not used [-Wunused-but-set-variable]
>
> Explain how you are fixing this warning.
>
> Setting __always_unused is usually not the good solution for fixing
> this warning, but here I guess this is likely the good solution. But it
> should be explained why.
Or, why is the "garbage =" needed in the first place? read_zsdata is not
defined with __warn_unused_result__. And even if it was, would
(void)!read_zsdata(port) fix it?
>> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>> Cc: Jiri Slaby <jirislaby@kernel.org>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> Cc: Paul Mackerras <paulus@samba.org>
>> Cc: linux-serial@vger.kernel.org
>> Cc: linuxppc-dev@lists.ozlabs.org
>> Signed-off-by: Lee Jones <lee.jones@linaro.org>
>> ---
>> drivers/tty/serial/pmac_zilog.h | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/tty/serial/pmac_zilog.h
>> b/drivers/tty/serial/pmac_zilog.h
>> index bb874e76810e0..968aec7c1cf82 100644
>> --- a/drivers/tty/serial/pmac_zilog.h
>> +++ b/drivers/tty/serial/pmac_zilog.h
>> @@ -362,7 +362,7 @@ static inline void zssync(struct uart_pmac_port
>> *port)
>> /* Misc macros */
>> #define ZS_CLEARERR(port) (write_zsreg(port, 0, ERR_RES))
>> -#define ZS_CLEARFIFO(port) do { volatile unsigned char garbage; \
>> +#define ZS_CLEARFIFO(port) do { volatile unsigned char
>> __always_unused garbage; \
>> garbage = read_zsdata(port); \
>> garbage = read_zsdata(port); \
>> garbage = read_zsdata(port); \
>>
thanks,
--
js
^ permalink raw reply
* Re: Kernel 5.10-rc1 not mounting NAND flash (Bisected to d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits"))
From: Miquel Raynal @ 2020-11-05 7:49 UTC (permalink / raw)
To: Christophe Leroy; +Cc: linux-mtd, linuxppc-dev, linux-kernel
In-Reply-To: <a04de8a0-4e3b-d9c6-139e-c25d9d5423d1@csgroup.eu>
Hi Christophe,
Christophe Leroy <christophe.leroy@csgroup.eu> wrote on Wed, 4 Nov 2020
19:37:57 +0100:
> Hi Miquel,
>
> Le 04/11/2020 à 18:38, Miquel Raynal a écrit :
> > Hi Christophe,
> >
> > Christophe Leroy <christophe.leroy@csgroup.eu> wrote on Wed, 04 Nov
> > 2020 18:33:53 +0100:
> >
> >> Hi Miquel,
> >>
> >> I'm unable to boot 5.10-rc1 on my boards. I get the following error:
> >>
> >> [ 4.125811] nand: device found, Manufacturer ID: 0xad, Chip ID: 0x76
> >> [ 4.131992] nand: Hynix NAND 64MiB 3,3V 8-bit
> >> [ 4.136173] nand: 64 MiB, SLC, erase size: 16 KiB, page size: 512, OOB size: 16
> >> [ 4.143534] ------------[ cut here ]------------
> >> [ 4.147934] Unsupported ECC algorithm!
> >> [ 4.152142] WARNING: CPU: 0 PID: 1 at drivers/mtd/nand/raw/nand_base.c:5244 nand_scan_with_ids+0x1260/0x1640
> >> ...
> >> [ 4.332052] ---[ end trace e3a36f62cae4ac56 ]---
> >> [ 4.336882] gpio-nand: probe of c0000000.nand failed with error -22
> >>
> >> Bisected to commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits")
> >>
> >> My first impression is that with that change, the value set in chip->ecc.algo
> >> by gpio_nand_probe() in drivers/mtd/nand/raw/gpio.c gets overwritten in rawnand_dt_init()
> >>
> >> The following change fixes the problem, though I'm not sure it is the right fix. Can you have a look ?
> >>
> >> diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
> >> index 1f0d542d5923..aa74797cf2da 100644
> >> --- a/drivers/mtd/nand/raw/nand_base.c
> >> +++ b/drivers/mtd/nand/raw/nand_base.c
> >> @@ -5032,7 +5032,8 @@ static int rawnand_dt_init(struct nand_chip *chip)
> >> chip->ecc.engine_type = nand->ecc.defaults.engine_type;
> >>
> >> chip->ecc.placement = nand->ecc.user_conf.placement;
> >> - chip->ecc.algo = nand->ecc.user_conf.algo;
> >> + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
> >> + chip->ecc.algo = nand->ecc.user_conf.algo;
> >> chip->ecc.strength = nand->ecc.user_conf.strength;
> >> chip->ecc.size = nand->ecc.user_conf.step_size;
> >>
> >> ---
> >>
> >> Thanks
> >> Christophe
> >
> > Sorry for introducing this issue, I didn't had the time to send the
> > Fixes PR yet but I think this issue has been solved already. Could
> > you please try with a recent linux-next?
> >
>
> Sorry, same problem with "Linux version 5.10.0-rc2-next-20201104"
Can you please give this patch a try, please?
---8<---
Author: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu Nov 5 08:44:48 2020 +0100
mtd: rawnand: gpio: Move the ECC initialization to ->attach_chip()
While forcing a Hamming software ECC looks clearly wrong, let's just
fix the situation for now and move these lines to the ->attach_chip()
hook which gets executed after the user input parsing and NAND chip
discovery.
Fixes: d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits")
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 3bd847ccc3f3..6feab847f5e0 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -161,8 +161,15 @@ static int gpio_nand_exec_op(struct nand_chip *chip,
return ret;
}
+static int gpio_nand_attach_chip(struct nand_chip *chip)
+{
+ chip->ecc.mode = NAND_ECC_SOFT;
+ chip->ecc.algo = NAND_ECC_HAMMING;
+}
+
static const struct nand_controller_ops gpio_nand_ops = {
.exec_op = gpio_nand_exec_op,
+ .attach_chip = gpio_nand_attach_chip,
};
#ifdef CONFIG_OF
@@ -342,8 +349,6 @@ static int gpio_nand_probe(struct platform_device *pdev)
gpiomtd->base.ops = &gpio_nand_ops;
nand_set_flash_node(chip, pdev->dev.of_node);
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
chip->options = gpiomtd->plat.options;
chip->controller = &gpiomtd->base;
^ permalink raw reply related
* Re: [PATCH v1 4/4] powernv/memtrace: don't abuse memory hot(un)plug infrastructure for memory allocations
From: David Hildenbrand @ 2020-11-05 8:29 UTC (permalink / raw)
To: Michael Ellerman
Cc: Michal Hocko, Wei Yang, David Hildenbrand, linux-kernel,
Michal Hocko, linux-mm, Paul Mackerras, Rashmica Gupta,
linuxppc-dev, Andrew Morton, Mike Rapoport, Oscar Salvador
In-Reply-To: <87o8kcttjp.fsf@mpe.ellerman.id.au>
> Am 05.11.2020 um 03:53 schrieb Michael Ellerman <mpe@ellerman.id.au>:
>
> David Hildenbrand <david@redhat.com> writes:
>> Let's use alloc_contig_pages() for allocating memory and remove the
>> linear mapping manually via arch_remove_linear_mapping(). Mark all pages
>> PG_offline, such that they will definitely not get touched - e.g.,
>> when hibernating. When freeing memory, try to revert what we did.
>> The original idea was discussed in:
>> https://lkml.kernel.org/r/48340e96-7e6b-736f-9e23-d3111b915b6e@redhat.com
>> This is similar to CONFIG_DEBUG_PAGEALLOC handling on other
>> architectures, whereby only single pages are unmapped from the linear
>> mapping. Let's mimic what memory hot(un)plug would do with the linear
>> mapping.
>> We now need MEMORY_HOTPLUG and CONTIG_ALLOC as dependencies.
>> Simple test under QEMU TCG (10GB RAM, single NUMA node):
>> sh-5.0# mount -t debugfs none /sys/kernel/debug/
>> sh-5.0# cat /sys/devices/system/memory/block_size_bytes
>> 40000000
>> sh-5.0# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
>> [ 71.052836][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
>> sh-5.0# echo 0x80000000 > /sys/kernel/debug/powerpc/memtrace/enable
>> [ 75.424302][ T356] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages
>> [ 75.430549][ T356] memtrace: Freed trace memory back on node 0
>> [ 75.604520][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
>> sh-5.0# echo 0x100000000 > /sys/kernel/debug/powerpc/memtrace/enable
>> [ 80.418835][ T356] radix-mmu: Mapped 0x0000000080000000-0x0000000100000000 with 64.0 KiB pages
>> [ 80.430493][ T356] memtrace: Freed trace memory back on node 0
>> [ 80.433882][ T356] memtrace: Failed to allocate trace memory on node 0
>> sh-5.0# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
>> [ 91.920158][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
>
> I gave this a quick spin on a real machine, seems to work OK.
>
> I don't have the actual memtrace tools setup to do an actual trace, will
> try and get someone to test that also.
>
> One observation is that previously the memory was zeroed when enabling
> the memtrace, whereas now it's not.
>
> eg, before:
>
> # hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace
> 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
> *
> 10000000
>
> whereas after:
>
> # hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace
> 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
> *
> 00000080 e0 fd 43 00 00 00 00 00 e0 fd 43 00 00 00 00 00 |..C.......C.....|
> 00000090 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
> *
> 00000830 98 bf 39 00 00 00 00 00 98 bf 39 00 00 00 00 00 |..9.......9.....|
> 00000840 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
> *
> 000008a0 b0 c8 47 00 00 00 00 00 b0 c8 47 00 00 00 00 00 |..G.......G.....|
> 000008b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
> ...
> 0fffff70 78 53 49 7d 00 00 29 2e 88 00 92 41 01 00 49 39 |xSI}..)....A..I9|
> 0fffff80 b4 07 4a 7d 28 f8 00 7d 00 48 08 7c 0c 00 c2 40 |..J}(..}.H.|...@|
> 0fffff90 2d f9 40 7d f0 ff c2 40 b4 07 0a 7d 00 48 8a 7f |-.@}...@...}.H..|
> 0fffffa0 70 fe 9e 41 cc ff ff 4b 00 00 00 60 00 00 00 60 |p..A...K...`...`|
> 0fffffb0 01 00 00 48 00 00 00 60 00 00 a3 2f 0c fd 9e 40 |...H...`.../...@|
> 0fffffc0 00 00 a2 3c 00 00 a5 e8 00 00 62 3c 00 00 63 e8 |...<......b<..c.|
> 0fffffd0 01 00 20 39 83 02 80 38 00 00 3c 99 01 00 00 48 |.. 9...8..<....H|
> 0fffffe0 00 00 00 60 e4 fc ff 4b 00 00 80 38 78 fb e3 7f |...`...K...8x...|
> 0ffffff0 01 00 00 48 00 00 00 60 2c fe ff 4b 00 00 00 60 |...H...`,..K...`|
> 10000000
>
>
> That's a nice way for root to read kernel memory, so we should probably
> add a __GFP_ZERO or memset in there somewhere.
Thanks for catching that! Will have a look on Monday if alloc_contig_pages() already properly handled __GFP_ZERO so we can use it, otherwise I‘ll fix that.
I don‘t recall that memory hotunplug does any zeroing - that‘s why I didn‘t add any explicit zeroing. Could be you were just lucky in your experiment - I assume we‘ll leak kernel memory already.
Thank!
> cheers
^ permalink raw reply
* Re: [PATCH 34/36] tty: serial: pmac_zilog: Make disposable variable __always_unused
From: Lee Jones @ 2020-11-05 8:36 UTC (permalink / raw)
To: Jiri Slaby
Cc: Greg Kroah-Hartman, linux-kernel, Paul Mackerras, linux-serial,
linuxppc-dev
In-Reply-To: <e027b620-56f8-7d8b-84ff-54839f94a4c7@kernel.org>
On Thu, 05 Nov 2020, Jiri Slaby wrote:
> On 05. 11. 20, 8:04, Christophe Leroy wrote:
> >
> >
> > Le 04/11/2020 à 20:35, Lee Jones a écrit :
> > > Fixes the following W=1 kernel build warning(s):
> > >
> > > drivers/tty/serial/pmac_zilog.h:365:58: warning: variable
> > > ‘garbage’ set but not used [-Wunused-but-set-variable]
> >
> > Explain how you are fixing this warning.
> >
> > Setting __always_unused is usually not the good solution for fixing
> > this warning, but here I guess this is likely the good solution. But it
> > should be explained why.
There are normally 3 ways to fix this warning;
- Start using/checking the variable/result
- Remove the variable
- Mark it as __{always,maybe}_unused
The later just tells the compiler that not checking the resultant
value is intentional. There are some functions (as Jiri mentions
below) which are marked as '__must_check' which *require* a dummy
(garbage) variable to be used.
> Or, why is the "garbage =" needed in the first place? read_zsdata is not
> defined with __warn_unused_result__.
I used '__always_used' here for fear of breaking something.
However, if it's safe to remove it, then all the better.
> And even if it was, would (void)!read_zsdata(port) fix it?
That's hideous. :D
*Much* better to just use '__always_used' in that use-case.
> > > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > > Cc: Jiri Slaby <jirislaby@kernel.org>
> > > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > > Cc: Paul Mackerras <paulus@samba.org>
> > > Cc: linux-serial@vger.kernel.org
> > > Cc: linuxppc-dev@lists.ozlabs.org
> > > Signed-off-by: Lee Jones <lee.jones@linaro.org>
> > > ---
> > > drivers/tty/serial/pmac_zilog.h | 2 +-
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/tty/serial/pmac_zilog.h
> > > b/drivers/tty/serial/pmac_zilog.h
> > > index bb874e76810e0..968aec7c1cf82 100644
> > > --- a/drivers/tty/serial/pmac_zilog.h
> > > +++ b/drivers/tty/serial/pmac_zilog.h
> > > @@ -362,7 +362,7 @@ static inline void zssync(struct uart_pmac_port
> > > *port)
> > > /* Misc macros */
> > > #define ZS_CLEARERR(port) (write_zsreg(port, 0, ERR_RES))
> > > -#define ZS_CLEARFIFO(port) do { volatile unsigned char garbage; \
> > > +#define ZS_CLEARFIFO(port) do { volatile unsigned char
> > > __always_unused garbage; \
> > > garbage = read_zsdata(port); \
> > > garbage = read_zsdata(port); \
> > > garbage = read_zsdata(port); \
> > >
>
> thanks,
--
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
^ permalink raw reply
* Re: [PATCH 31/36] powerpc: asm: hvconsole: Move 'hvc_vio_init_early's prototype to shared location
From: Lee Jones @ 2020-11-05 8:38 UTC (permalink / raw)
To: Christophe Leroy; +Cc: Paul Mackerras, linuxppc-dev, linux-kernel
In-Reply-To: <d2a23842-631e-cd5e-84ec-48485328ba52@csgroup.eu>
On Thu, 05 Nov 2020, Christophe Leroy wrote:
>
>
> Le 04/11/2020 à 20:35, Lee Jones a écrit :
> > Fixes the following W=1 kernel build warning(s):
> >
> > drivers/tty/hvc/hvc_vio.c:385:13: warning: no previous prototype for ‘hvc_vio_init_early’ [-Wmissing-prototypes]
> > 385 | void __init hvc_vio_init_early(void)
> > | ^~~~~~~~~~~~~~~~~~
> >
> > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > Cc: Paul Mackerras <paulus@samba.org>
> > Cc: linuxppc-dev@lists.ozlabs.org
> > Signed-off-by: Lee Jones <lee.jones@linaro.org>
> > ---
> > arch/powerpc/include/asm/hvconsole.h | 3 +++
> > arch/powerpc/platforms/pseries/pseries.h | 3 ---
> > arch/powerpc/platforms/pseries/setup.c | 1 +
> > 3 files changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
> > index 999ed5ac90531..936a1ee1ac786 100644
> > --- a/arch/powerpc/include/asm/hvconsole.h
> > +++ b/arch/powerpc/include/asm/hvconsole.h
> > @@ -24,5 +24,8 @@
> > extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
> > extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
> > +/* Provided by HVC VIO */
> > +extern void hvc_vio_init_early(void);
> > +
>
> Declaring a prototype 'extern' is pointless. Don't add new misuse of 'extern' keyword.
No new code (misuse or otherwise) is being added in this patch.
It's just moved from one place to another.
I can also strip out 'extern' if it's preferred.
> > #endif /* __KERNEL__ */
> > #endif /* _PPC64_HVCONSOLE_H */
> > diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> > index 13fa370a87e4e..7be5b054dfc36 100644
> > --- a/arch/powerpc/platforms/pseries/pseries.h
> > +++ b/arch/powerpc/platforms/pseries/pseries.h
> > @@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void);
> > /* Poweron flag used for enabling auto ups restart */
> > extern unsigned long rtas_poweron_auto;
> > -/* Provided by HVC VIO */
> > -extern void hvc_vio_init_early(void);
> > -
> > /* Dynamic logical Partitioning/Mobility */
> > extern void dlpar_free_cc_nodes(struct device_node *);
> > extern void dlpar_free_cc_property(struct property *);
> > diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
> > index 633c45ec406da..6999b83f06612 100644
> > --- a/arch/powerpc/platforms/pseries/setup.c
> > +++ b/arch/powerpc/platforms/pseries/setup.c
> > @@ -71,6 +71,7 @@
> > #include <asm/swiotlb.h>
> > #include <asm/svm.h>
> > #include <asm/dtl.h>
> > +#include <asm/hvconsole.h>
> > #include "pseries.h"
> > #include "../../../../drivers/pci/pci.h"
> >
>
> Christophe
--
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
^ permalink raw reply
* Re: [PATCH 31/36] powerpc: asm: hvconsole: Move 'hvc_vio_init_early's prototype to shared location
From: Lee Jones @ 2020-11-05 8:39 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev, Paul Mackerras, linux-kernel
In-Reply-To: <87r1p8u230.fsf@mpe.ellerman.id.au>
On Thu, 05 Nov 2020, Michael Ellerman wrote:
> Lee Jones <lee.jones@linaro.org> writes:
> > Fixes the following W=1 kernel build warning(s):
> >
> > drivers/tty/hvc/hvc_vio.c:385:13: warning: no previous prototype for ‘hvc_vio_init_early’ [-Wmissing-prototypes]
> > 385 | void __init hvc_vio_init_early(void)
> > | ^~~~~~~~~~~~~~~~~~
> >
> > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > Cc: Paul Mackerras <paulus@samba.org>
> > Cc: linuxppc-dev@lists.ozlabs.org
> > Signed-off-by: Lee Jones <lee.jones@linaro.org>
> > ---
> > arch/powerpc/include/asm/hvconsole.h | 3 +++
> > arch/powerpc/platforms/pseries/pseries.h | 3 ---
> > arch/powerpc/platforms/pseries/setup.c | 1 +
> > 3 files changed, 4 insertions(+), 3 deletions(-)
>
> Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Thanks.
> > diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
> > index 999ed5ac90531..936a1ee1ac786 100644
> > --- a/arch/powerpc/include/asm/hvconsole.h
> > +++ b/arch/powerpc/include/asm/hvconsole.h
> > @@ -24,5 +24,8 @@
> > extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
> > extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
> >
> > +/* Provided by HVC VIO */
> > +extern void hvc_vio_init_early(void);
>
> extern isn't needed, but don't feel you need to respin just to drop it.
That's fine. I don't mind re-spinning.
--
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
^ permalink raw reply
* Re: [PATCH 34/36] tty: serial: pmac_zilog: Make disposable variable __always_unused
From: Jiri Slaby @ 2020-11-05 8:55 UTC (permalink / raw)
To: Lee Jones
Cc: Greg Kroah-Hartman, linux-kernel, Paul Mackerras, linux-serial,
linuxppc-dev
In-Reply-To: <20201105083626.GW4488@dell>
On 05. 11. 20, 9:36, Lee Jones wrote:
> On Thu, 05 Nov 2020, Jiri Slaby wrote:
>
>> On 05. 11. 20, 8:04, Christophe Leroy wrote:
>>>
>>>
>>> Le 04/11/2020 à 20:35, Lee Jones a écrit :
>>>> Fixes the following W=1 kernel build warning(s):
>>>>
>>>> drivers/tty/serial/pmac_zilog.h:365:58: warning: variable
>>>> ‘garbage’ set but not used [-Wunused-but-set-variable]
>>>
>>> Explain how you are fixing this warning.
>>>
>>> Setting __always_unused is usually not the good solution for fixing
>>> this warning, but here I guess this is likely the good solution. But it
>>> should be explained why.
>
> There are normally 3 ways to fix this warning;
>
> - Start using/checking the variable/result
> - Remove the variable
> - Mark it as __{always,maybe}_unused
>
> The later just tells the compiler that not checking the resultant
> value is intentional. There are some functions (as Jiri mentions
> below) which are marked as '__must_check' which *require* a dummy
> (garbage) variable to be used.
>
>> Or, why is the "garbage =" needed in the first place? read_zsdata is not
>> defined with __warn_unused_result__.
>
> I used '__always_used' here for fear of breaking something.
>
> However, if it's safe to remove it, then all the better.
Yes please -- this "garbage" is one of the examples of volatile misuses.
If readb didn't work on volatile pointer, marking the return variable as
volatile wouldn't save it.
>> And even if it was, would (void)!read_zsdata(port) fix it?
>
> That's hideous. :D
Sure, marking reads as must_check would be insane.
> *Much* better to just use '__always_used' in that use-case.
Then using a dummy variable to fool must_check must mean must_check is
used incorrectly, no :)? But there are always exceptions…
thanks,
--
js
suse labs
^ permalink raw reply
* Re: [PATCH 34/36] tty: serial: pmac_zilog: Make disposable variable __always_unused
From: Lee Jones @ 2020-11-05 9:00 UTC (permalink / raw)
To: Jiri Slaby
Cc: Greg Kroah-Hartman, linux-kernel, Paul Mackerras, linux-serial,
linuxppc-dev
In-Reply-To: <a6b63789-1315-cec1-9575-0d858a6da1d5@kernel.org>
On Thu, 05 Nov 2020, Jiri Slaby wrote:
> On 05. 11. 20, 9:36, Lee Jones wrote:
> > On Thu, 05 Nov 2020, Jiri Slaby wrote:
> >
> > > On 05. 11. 20, 8:04, Christophe Leroy wrote:
> > > >
> > > >
> > > > Le 04/11/2020 à 20:35, Lee Jones a écrit :
> > > > > Fixes the following W=1 kernel build warning(s):
> > > > >
> > > > > drivers/tty/serial/pmac_zilog.h:365:58: warning: variable
> > > > > ‘garbage’ set but not used [-Wunused-but-set-variable]
> > > >
> > > > Explain how you are fixing this warning.
> > > >
> > > > Setting __always_unused is usually not the good solution for fixing
> > > > this warning, but here I guess this is likely the good solution. But it
> > > > should be explained why.
> >
> > There are normally 3 ways to fix this warning;
> >
> > - Start using/checking the variable/result
> > - Remove the variable
> > - Mark it as __{always,maybe}_unused
> >
> > The later just tells the compiler that not checking the resultant
> > value is intentional. There are some functions (as Jiri mentions
> > below) which are marked as '__must_check' which *require* a dummy
> > (garbage) variable to be used.
> >
> > > Or, why is the "garbage =" needed in the first place? read_zsdata is not
> > > defined with __warn_unused_result__.
> >
> > I used '__always_used' here for fear of breaking something.
> >
> > However, if it's safe to remove it, then all the better.
>
> Yes please -- this "garbage" is one of the examples of volatile misuses. If
> readb didn't work on volatile pointer, marking the return variable as
> volatile wouldn't save it.
>
> > > And even if it was, would (void)!read_zsdata(port) fix it?
> >
> > That's hideous. :D
>
> Sure, marking reads as must_check would be insane.
>
> > *Much* better to just use '__always_used' in that use-case.
>
> Then using a dummy variable to fool must_check must mean must_check is used
> incorrectly, no :)? But there are always exceptions…
Agreed on all points.
Will fix.
--
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
^ permalink raw reply
* Re: Kernel 5.10-rc1 not mounting NAND flash (Bisected to d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits"))
From: Christophe Leroy @ 2020-11-05 9:06 UTC (permalink / raw)
To: Miquel Raynal; +Cc: linux-mtd, linuxppc-dev, linux-kernel
In-Reply-To: <20201105084939.72ea6bfd@xps13>
Quoting Miquel Raynal <miquel.raynal@bootlin.com>:
> Hi Christophe,
>
> Christophe Leroy <christophe.leroy@csgroup.eu> wrote on Wed, 4 Nov 2020
> 19:37:57 +0100:
>
>> Hi Miquel,
>>
>> Le 04/11/2020 à 18:38, Miquel Raynal a écrit :
>> > Hi Christophe,
>> >
>> > Christophe Leroy <christophe.leroy@csgroup.eu> wrote on Wed, 04 Nov
>> > 2020 18:33:53 +0100:
>> >
>> >> Hi Miquel,
>> >>
>> >> I'm unable to boot 5.10-rc1 on my boards. I get the following error:
>> >>
>> >> [ 4.125811] nand: device found, Manufacturer ID: 0xad, Chip ID: 0x76
>> >> [ 4.131992] nand: Hynix NAND 64MiB 3,3V 8-bit
>> >> [ 4.136173] nand: 64 MiB, SLC, erase size: 16 KiB, page size:
>> 512, OOB size: 16
>> >> [ 4.143534] ------------[ cut here ]------------
>> >> [ 4.147934] Unsupported ECC algorithm!
>> >> [ 4.152142] WARNING: CPU: 0 PID: 1 at
>> drivers/mtd/nand/raw/nand_base.c:5244
>> nand_scan_with_ids+0x1260/0x1640
>> >> ...
>> >> [ 4.332052] ---[ end trace e3a36f62cae4ac56 ]---
>> >> [ 4.336882] gpio-nand: probe of c0000000.nand failed with error -22
>> >>
>> >> Bisected to commit d7157ff49a5b ("mtd: rawnand: Use the ECC
>> framework user input parsing bits")
>> >>
>> >> My first impression is that with that change, the value set in
>> chip->ecc.algo
>> >> by gpio_nand_probe() in drivers/mtd/nand/raw/gpio.c gets
>> overwritten in rawnand_dt_init()
>> >>
>> >> The following change fixes the problem, though I'm not sure it
>> is the right fix. Can you have a look ?
>> >>
>> >> diff --git a/drivers/mtd/nand/raw/nand_base.c
>> b/drivers/mtd/nand/raw/nand_base.c
>> >> index 1f0d542d5923..aa74797cf2da 100644
>> >> --- a/drivers/mtd/nand/raw/nand_base.c
>> >> +++ b/drivers/mtd/nand/raw/nand_base.c
>> >> @@ -5032,7 +5032,8 @@ static int rawnand_dt_init(struct nand_chip *chip)
>> >> chip->ecc.engine_type = nand->ecc.defaults.engine_type;
>> >>
>> >> chip->ecc.placement = nand->ecc.user_conf.placement;
>> >> - chip->ecc.algo = nand->ecc.user_conf.algo;
>> >> + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
>> >> + chip->ecc.algo = nand->ecc.user_conf.algo;
>> >> chip->ecc.strength = nand->ecc.user_conf.strength;
>> >> chip->ecc.size = nand->ecc.user_conf.step_size;
>> >>
>> >> ---
>> >>
>> >> Thanks
>> >> Christophe
>> >
>> > Sorry for introducing this issue, I didn't had the time to send the
>> > Fixes PR yet but I think this issue has been solved already. Could
>> > you please try with a recent linux-next?
>> >
>>
>> Sorry, same problem with "Linux version 5.10.0-rc2-next-20201104"
>
> Can you please give this patch a try, please?
>
> ---8<---
>
> Author: Miquel Raynal <miquel.raynal@bootlin.com>
> Date: Thu Nov 5 08:44:48 2020 +0100
>
> mtd: rawnand: gpio: Move the ECC initialization to ->attach_chip()
>
> While forcing a Hamming software ECC looks clearly wrong, let's just
> fix the situation for now and move these lines to the ->attach_chip()
> hook which gets executed after the user input parsing and NAND chip
> discovery.
>
> Fixes: d7157ff49a5b ("mtd: rawnand: Use the ECC framework user
> input parsing bits")
> Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
>
> diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
> index 3bd847ccc3f3..6feab847f5e0 100644
> --- a/drivers/mtd/nand/raw/gpio.c
> +++ b/drivers/mtd/nand/raw/gpio.c
> @@ -161,8 +161,15 @@ static int gpio_nand_exec_op(struct nand_chip *chip,
> return ret;
> }
>
> +static int gpio_nand_attach_chip(struct nand_chip *chip)
> +{
> + chip->ecc.mode = NAND_ECC_SOFT;
> + chip->ecc.algo = NAND_ECC_HAMMING;
> +}
> +
> static const struct nand_controller_ops gpio_nand_ops = {
> .exec_op = gpio_nand_exec_op,
> + .attach_chip = gpio_nand_attach_chip,
> };
>
> #ifdef CONFIG_OF
> @@ -342,8 +349,6 @@ static int gpio_nand_probe(struct platform_device *pdev)
> gpiomtd->base.ops = &gpio_nand_ops;
>
> nand_set_flash_node(chip, pdev->dev.of_node);
> - chip->ecc.mode = NAND_ECC_SOFT;
> - chip->ecc.algo = NAND_ECC_HAMMING;
> chip->options = gpiomtd->plat.options;
> chip->controller = &gpiomtd->base;
Works with the following:
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 4ec0a1e10867..66d3f1eb788c 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -161,8 +161,17 @@ static int gpio_nand_exec_op(struct nand_chip *chip,
return ret;
}
+static int gpio_nand_attach_chip(struct nand_chip *chip)
+{
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+}
+
static const struct nand_controller_ops gpio_nand_ops = {
.exec_op = gpio_nand_exec_op,
+ .attach_chip = gpio_nand_attach_chip,
};
#ifdef CONFIG_OF
@@ -342,8 +351,6 @@ static int gpio_nand_probe(struct platform_device *pdev)
gpiomtd->base.ops = &gpio_nand_ops;
nand_set_flash_node(chip, pdev->dev.of_node);
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
- chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
chip->options = gpiomtd->plat.options;
chip->controller = &gpiomtd->base;
---
Christophe
^ permalink raw reply related
* Re: Kernel 5.10-rc1 not mounting NAND flash (Bisected to d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits"))
From: Miquel Raynal @ 2020-11-05 9:13 UTC (permalink / raw)
To: Christophe Leroy; +Cc: linux-mtd, linuxppc-dev, linux-kernel
In-Reply-To: <20201105100651.Horde.jOAklfLApjH2WjmauwW9Gg1@messagerie.c-s.fr>
Hi Christophe,
Christophe Leroy <christophe.leroy@csgroup.eu> wrote on Thu, 05 Nov
2020 10:06:51 +0100:
> Quoting Miquel Raynal <miquel.raynal@bootlin.com>:
>
> > Hi Christophe,
> >
> > Christophe Leroy <christophe.leroy@csgroup.eu> wrote on Wed, 4 Nov 2020
> > 19:37:57 +0100:
> >
> >> Hi Miquel,
> >>
> >> Le 04/11/2020 à 18:38, Miquel Raynal a écrit :
> >> > Hi Christophe,
> >> >
> >> > Christophe Leroy <christophe.leroy@csgroup.eu> wrote on Wed, 04 Nov
> >> > 2020 18:33:53 +0100:
> >> >
> >> >> Hi Miquel,
> >> >>
> >> >> I'm unable to boot 5.10-rc1 on my boards. I get the following error:
> >> >>
> >> >> [ 4.125811] nand: device found, Manufacturer ID: 0xad, Chip ID: 0x
> 76
> >> >> [ 4.131992] nand: Hynix NAND 64MiB 3,3V 8-bit
> >> >> [ 4.136173] nand: 64 MiB, SLC, erase size: 16 KiB, page size:>> 512, OOB size: 16
> >> >> [ 4.143534] ------------[ cut here ]------------
> >> >> [ 4.147934] Unsupported ECC algorithm!
> >> >> [ 4.152142] WARNING: CPU: 0 PID: 1 at >> drivers/mtd/nand/raw/nand_base.c:5244 >> nand_scan_with_ids+0x1260/0x1640
> >> >> ...
> >> >> [ 4.332052] ---[ end trace e3a36f62cae4ac56 ]---
> >> >> [ 4.336882] gpio-nand: probe of c0000000.nand failed with error -2
> 2
> >> >>
> >> >> Bisected to commit d7157ff49a5b ("mtd: rawnand: Use the ECC >> framework user input parsing bits")
> >> >>
> >> >> My first impression is that with that change, the value set in >> chip->ecc.algo
> >> >> by gpio_nand_probe() in drivers/mtd/nand/raw/gpio.c gets >> overwritten in rawnand_dt_init()
> >> >>
> >> >> The following change fixes the problem, though I'm not sure it >> is the right fix. Can you have a look ?
> >> >>
> >> >> diff --git a/drivers/mtd/nand/raw/nand_base.c >> b/drivers/mtd/nand/raw/nand_base.c
> >> >> index 1f0d542d5923..aa74797cf2da 100644
> >> >> --- a/drivers/mtd/nand/raw/nand_base.c
> >> >> +++ b/drivers/mtd/nand/raw/nand_base.c
> >> >> @@ -5032,7 +5032,8 @@ static int rawnand_dt_init(struct nand_chip *ch
> ip)
> >> >> chip->ecc.engine_type = nand->ecc.defaults.engine_type;
> >> >>
> >> >> chip->ecc.placement = nand->ecc.user_conf.placement;
> >> >> - chip->ecc.algo = nand->ecc.user_conf.algo;
> >> >> + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
> >> >> + chip->ecc.algo = nand->ecc.user_conf.algo;
> >> >> chip->ecc.strength = nand->ecc.user_conf.strength;
> >> >> chip->ecc.size = nand->ecc.user_conf.step_size;
> >> >>
> >> >> ---
> >> >>
> >> >> Thanks
> >> >> Christophe
> >> >
> >> > Sorry for introducing this issue, I didn't had the time to send the
> >> > Fixes PR yet but I think this issue has been solved already. Could
> >> > you please try with a recent linux-next?
> >> >
> >>
> >> Sorry, same problem with "Linux version 5.10.0-rc2-next-20201104"
> >
> > Can you please give this patch a try, please?
> >
> > ---8<---
> >
> > Author: Miquel Raynal <miquel.raynal@bootlin.com>
> > Date: Thu Nov 5 08:44:48 2020 +0100
> >
> > mtd: rawnand: gpio: Move the ECC initialization to ->attach_chip()
> >
> > While forcing a Hamming software ECC looks clearly wrong, let's just
> > fix the situation for now and move these lines to the ->attach_chip()
> > hook which gets executed after the user input parsing and NAND chip
> > discovery.
> >
> > Fixes: d7157ff49a5b ("mtd: rawnand: Use the ECC framework user > input parsing bits")
> > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> >
> > diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
> > index 3bd847ccc3f3..6feab847f5e0 100644
> > --- a/drivers/mtd/nand/raw/gpio.c
> > +++ b/drivers/mtd/nand/raw/gpio.c
> > @@ -161,8 +161,15 @@ static int gpio_nand_exec_op(struct nand_chip *chip,
> > return ret;
> > }
> >
> > +static int gpio_nand_attach_chip(struct nand_chip *chip)
> > +{
> > + chip->ecc.mode = NAND_ECC_SOFT;
> > + chip->ecc.algo = NAND_ECC_HAMMING;
> > +}
> > +
> > static const struct nand_controller_ops gpio_nand_ops = {
> > .exec_op = gpio_nand_exec_op,
> > + .attach_chip = gpio_nand_attach_chip,
> > };
> >
> > #ifdef CONFIG_OF
> > @@ -342,8 +349,6 @@ static int gpio_nand_probe(struct platform_device *pd
> ev)
> > gpiomtd->base.ops = &gpio_nand_ops;
> >
> > nand_set_flash_node(chip, pdev->dev.of_node);
> > - chip->ecc.mode = NAND_ECC_SOFT;
> > - chip->ecc.algo = NAND_ECC_HAMMING;
> > chip->options = gpiomtd->plat.options;
> > chip->controller = &gpiomtd->base;
>
>
> Works with the following:
>
> diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
> index 4ec0a1e10867..66d3f1eb788c 100644
> --- a/drivers/mtd/nand/raw/gpio.c
> +++ b/drivers/mtd/nand/raw/gpio.c
> @@ -161,8 +161,17 @@ static int gpio_nand_exec_op(struct nand_chip *chip,
> return ret;
> }
>
> +static int gpio_nand_attach_chip(struct nand_chip *chip)
> +{
> + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
> + chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
> +
> + return 0;
> +}
Yup indeed it was not even compile tested. Good to know, I'll check
the other drivers and send the patch soon.
Thanks,
Miquèl
^ permalink raw reply
* [Bug 209869] Kernel 5.10-rc1 fails to boot on a PowerMac G4 3,6 at an early stage
From: bugzilla-daemon @ 2020-11-05 9:26 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-209869-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=209869
--- Comment #9 from Christophe Leroy (christophe.leroy@csgroup.eu) ---
Ok, what about 5.10-rc1 + KASAN without reverting the patch ?
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* Re: Kernel panic from malloc() on SUSE 15.1?
From: Michael Ellerman @ 2020-11-05 10:19 UTC (permalink / raw)
To: Carl Jacobsen; +Cc: linuxppc-dev
In-Reply-To: <CAKkwB_RD0_=9SSwyYn-8Vo2dr2Li4X-v_KJ4qBWZRgxZuGUeRw@mail.gmail.com>
Carl Jacobsen <cjacobsen@storix.com> writes:
> The panic (on a call to malloc from static linked libcrypto) looks like
> this:
Thanks.
This doesn't make a lot of sense.
> Bad kernel stack pointer 7fffffffeac0 at 700
"at 700" is the regs->nip value, and suggests we're trying to handle a
program check, which is either a trap or BUG or WARN, or illegal
instruction or several other things.
> Oops: Bad kernel stack pointer, sig: 6 [#1]
> SMP NR_CPUS=2048 NUMA pSeries
> Modules linked in: scsi_transport_iscsi af_packet xt_tcpudp ip6t_rpfilter
> ip6t_REJECT ipt_REJECT xt_conntrack ip_set nfnetlink ebtable_nat
> ebtable_broute br_netfilter bridge stp llc ip6table_nat nf_conntrack_ipv6
> nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_raw ip6table_security
> iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat
> nf_conntrack libcrc32c iptable_mangle iptable_raw iptable_security
> ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables
> x_tables ibmveth(X) vmx_crypto gf128mul crct10dif_vpmsum rtc_generic btrfs
> xor zstd_decompress zstd_compress xxhash raid6_pq sr_mod cdrom sd_mod
> ibmvscsi(X) scsi_transport_srp crc32c_vpmsum sg dm_multipath dm_mod
> scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
> Supported: Yes, External
> CPU: 0 PID: 14144 Comm: rand_test_no_pt Tainted: G 4.12.14-197.18-default #1 SLE15-SP1
> task: c00000002fa23b80 task.stack: c000000032824000
> NIP: 0000000000000700 LR: 0000000010004ad0 CTR: 0000000000000000
> REGS: c00000001ec2fd40 TRAP: 0300 Tainted: G (4.12.14-197.18-default)
But then here it says TRAP = 0x300, which is != 0x700.
The trap number is hardcoded in the bad stack handling code, and I don't
see how we can end up with nip == 0x700 but the trap value == 0x300.
> MSR: 8000000000001000 <SF,ME> CR: 44000844 XER: 20000000
And here the MSR says you were in big endian mode, but you said before
your machine was ppc64le.
> CFAR: 00000000000010f0 DAR: ffffffffffffb27a DSISR: 40000000 SOFTE: 0
> GPR00: 0000000020000000 00007fffffffeac0 00000000102af788 fffffffffffffffd
> GPR04: 0000000000000020 0000000000000030 00000000102b0550 0000000000000001
> GPR08: 0000000000000000 00007fffb7dacc00 00000000102b0520 800000010280f033
> GPR12: 0000000000004000 00007fffb7ffa100 0000000000000000 0000000000000000
> GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> GPR24: 0000000000000000 0000000000000000 0000000000000000 00007fffb7fef4b8
> GPR28: 00007fffb7ff0000 0000000000000000 0000000000000000 00007fffffffeac0
The rest of the regs look like user space values, not kernel.
> NIP [0000000000000700] 0x700
> LR [0000000010004ad0] 0x10004ad0
> Call Trace:
> Instruction dump:
> 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000 00000000 7db243a6 7db142a6 f92d0080 7d20e2a6
> ---[ end trace cc04515f274cfbf6 ]---
What hardware is this on?
Can you try booting with ppc_tm=off on the kernel command line, and see
if that changes anything?
Can you put your compiled test program up somewhere we can download it
and look at? Or post the disassembly?
cheers
^ permalink raw reply
* Re: [PATCH v1 4/4] powernv/memtrace: don't abuse memory hot(un)plug infrastructure for memory allocations
From: Michael Ellerman @ 2020-11-05 10:47 UTC (permalink / raw)
To: David Hildenbrand
Cc: Michal Hocko, Wei Yang, David Hildenbrand, linux-kernel,
Michal Hocko, linux-mm, Paul Mackerras, Rashmica Gupta,
linuxppc-dev, Andrew Morton, Mike Rapoport, Oscar Salvador
In-Reply-To: <1D39DC0E-C07A-4B9E-B811-67684A4A0FE9@redhat.com>
David Hildenbrand <david@redhat.com> writes:
>> Am 05.11.2020 um 03:53 schrieb Michael Ellerman <mpe@ellerman.id.au>:
>>
>> David Hildenbrand <david@redhat.com> writes:
>>> Let's use alloc_contig_pages() for allocating memory and remove the
>>> linear mapping manually via arch_remove_linear_mapping(). Mark all pages
>>> PG_offline, such that they will definitely not get touched - e.g.,
>>> when hibernating. When freeing memory, try to revert what we did.
>>> The original idea was discussed in:
>>> https://lkml.kernel.org/r/48340e96-7e6b-736f-9e23-d3111b915b6e@redhat.com
>>> This is similar to CONFIG_DEBUG_PAGEALLOC handling on other
>>> architectures, whereby only single pages are unmapped from the linear
>>> mapping. Let's mimic what memory hot(un)plug would do with the linear
>>> mapping.
>>> We now need MEMORY_HOTPLUG and CONTIG_ALLOC as dependencies.
>>> Simple test under QEMU TCG (10GB RAM, single NUMA node):
>>> sh-5.0# mount -t debugfs none /sys/kernel/debug/
>>> sh-5.0# cat /sys/devices/system/memory/block_size_bytes
>>> 40000000
>>> sh-5.0# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
>>> [ 71.052836][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
>>> sh-5.0# echo 0x80000000 > /sys/kernel/debug/powerpc/memtrace/enable
>>> [ 75.424302][ T356] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages
>>> [ 75.430549][ T356] memtrace: Freed trace memory back on node 0
>>> [ 75.604520][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
>>> sh-5.0# echo 0x100000000 > /sys/kernel/debug/powerpc/memtrace/enable
>>> [ 80.418835][ T356] radix-mmu: Mapped 0x0000000080000000-0x0000000100000000 with 64.0 KiB pages
>>> [ 80.430493][ T356] memtrace: Freed trace memory back on node 0
>>> [ 80.433882][ T356] memtrace: Failed to allocate trace memory on node 0
>>> sh-5.0# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
>>> [ 91.920158][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
>>
>> I gave this a quick spin on a real machine, seems to work OK.
>>
>> I don't have the actual memtrace tools setup to do an actual trace, will
>> try and get someone to test that also.
>>
>> One observation is that previously the memory was zeroed when enabling
>> the memtrace, whereas now it's not.
>>
>> eg, before:
>>
>> # hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace
>> 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
>> *
>> 10000000
>>
>> whereas after:
>>
>> # hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace
>> 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
>> *
>> 00000080 e0 fd 43 00 00 00 00 00 e0 fd 43 00 00 00 00 00 |..C.......C.....|
>> 00000090 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
>> *
>> 00000830 98 bf 39 00 00 00 00 00 98 bf 39 00 00 00 00 00 |..9.......9.....|
>> 00000840 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
>> *
>> 000008a0 b0 c8 47 00 00 00 00 00 b0 c8 47 00 00 00 00 00 |..G.......G.....|
>> 000008b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
>> ...
>> 0fffff70 78 53 49 7d 00 00 29 2e 88 00 92 41 01 00 49 39 |xSI}..)....A..I9|
>> 0fffff80 b4 07 4a 7d 28 f8 00 7d 00 48 08 7c 0c 00 c2 40 |..J}(..}.H.|...@|
>> 0fffff90 2d f9 40 7d f0 ff c2 40 b4 07 0a 7d 00 48 8a 7f |-.@}...@...}.H..|
>> 0fffffa0 70 fe 9e 41 cc ff ff 4b 00 00 00 60 00 00 00 60 |p..A...K...`...`|
>> 0fffffb0 01 00 00 48 00 00 00 60 00 00 a3 2f 0c fd 9e 40 |...H...`.../...@|
>> 0fffffc0 00 00 a2 3c 00 00 a5 e8 00 00 62 3c 00 00 63 e8 |...<......b<..c.|
>> 0fffffd0 01 00 20 39 83 02 80 38 00 00 3c 99 01 00 00 48 |.. 9...8..<....H|
>> 0fffffe0 00 00 00 60 e4 fc ff 4b 00 00 80 38 78 fb e3 7f |...`...K...8x...|
>> 0ffffff0 01 00 00 48 00 00 00 60 2c fe ff 4b 00 00 00 60 |...H...`,..K...`|
>> 10000000
>>
>>
>> That's a nice way for root to read kernel memory, so we should probably
>> add a __GFP_ZERO or memset in there somewhere.
>
> Thanks for catching that! Will have a look on Monday if
> alloc_contig_pages() already properly handled __GFP_ZERO so we can use
> it, otherwise I‘ll fix that.
I had a quick look and didn't see it, but maybe it is in there somewhere.
> I don‘t recall that memory hotunplug does any zeroing - that‘s why I
> didn‘t add any explicit zeroing. Could be you were just lucky in your
> experiment - I assume we‘ll leak kernel memory already.
Hmm yeah good point. I did try it multiple times, and I never get
anything non-zero with the existing code.
I guess it's just that the new method is more likely to give us memory
that's already been used for something.
So I guess that's not actually a problem with your patch, it's just
exposing an existing issue.
cheers
^ permalink raw reply
* Re: Kernel panic from malloc() on SUSE 15.1?
From: Segher Boessenkool @ 2020-11-05 11:00 UTC (permalink / raw)
To: Michael Ellerman; +Cc: Carl Jacobsen, linuxppc-dev
In-Reply-To: <87lffgt8b9.fsf@mpe.ellerman.id.au>
On Thu, Nov 05, 2020 at 09:19:22PM +1100, Michael Ellerman wrote:
> Carl Jacobsen <cjacobsen@storix.com> writes:
> This doesn't make a lot of sense.
>
> > Bad kernel stack pointer 7fffffffeac0 at 700
>
> "at 700" is the regs->nip value, and suggests we're trying to handle a
> program check, which is either a trap or BUG or WARN, or illegal
> instruction or several other things.
> > REGS: c00000001ec2fd40 TRAP: 0300 Tainted: G (4.12.14-197.18-default)
>
> But then here it says TRAP = 0x300, which is != 0x700.
>
> The trap number is hardcoded in the bad stack handling code, and I don't
> see how we can end up with nip == 0x700 but the trap value == 0x300.
>
> > MSR: 8000000000001000 <SF,ME> CR: 44000844 XER: 20000000
>
> And here the MSR says you were in big endian mode, but you said before
> your machine was ppc64le.
It looks like you got a DSI (the 300), but for some reason that
interrupt was not taken in LE mode, so the instruction at 300 was read
as a lot of gobbledygook, not a valid insn, and the processor took a
program interrupt (the 700).
(MSR[RI]=0, but there can be other causes for that of course.)
Segher
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox