LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 02/10] powerpc/signal: Add unsafe_copy_{vsx, fpr}_from_user()
From: Christopher M. Riedl @ 2021-01-28  4:04 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210128040424.12720-1-cmr@codefail.de>

Reuse the "safe" implementation from signal.c except for calling
unsafe_copy_from_user() to copy into a local buffer.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 2559a681536e..c18402d625f1 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -53,6 +53,33 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
 				&buf[i], label);\
 } while (0)
 
+#define unsafe_copy_fpr_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *__f = (u64 __user *)from;				\
+	u64 buf[ELF_NFPREG];						\
+	int i;								\
+									\
+	unsafe_copy_from_user(buf, __f, ELF_NFPREG * sizeof(double),	\
+				label);					\
+	for (i = 0; i < ELF_NFPREG - 1; i++)				\
+		__t->thread.TS_FPR(i) = buf[i];				\
+	__t->thread.fp_state.fpscr = buf[i];				\
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *__f = (u64 __user *)from;				\
+	u64 buf[ELF_NVSRHALFREG];					\
+	int i;								\
+									\
+	unsafe_copy_from_user(buf, __f,					\
+				ELF_NVSRHALFREG * sizeof(double),	\
+				label);					\
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
+		__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];	\
+} while (0)
+
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 #define unsafe_copy_ckfpr_to_user(to, task, label)	do {		\
 	struct task_struct *__t = task;					\
@@ -80,6 +107,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
 	unsafe_copy_to_user(to, (task)->thread.fp_state.fpr,	\
 			    ELF_NFPREG * sizeof(double), label)
 
+#define unsafe_copy_fpr_from_user(task, from, label)			\
+	unsafe_copy_from_user((task)->thread.fp_state.fpr, from,	\
+			    ELF_NFPREG * sizeof(double), label)
+
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
 {
@@ -115,6 +146,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
 #else
 #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
 
+#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
 {
-- 
2.26.1


^ permalink raw reply related

* [PATCH v4 01/10] powerpc/uaccess: Add unsafe_copy_from_user
From: Christopher M. Riedl @ 2021-01-28  4:04 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210128040424.12720-1-cmr@codefail.de>

Just wrap __copy_tofrom_user() for the usual 'unsafe' pattern which
takes in a label to goto on error.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/include/asm/uaccess.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 501c9a79038c..036e82eefac9 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -542,6 +542,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
 #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
 
+#define unsafe_copy_from_user(d, s, l, e) \
+	unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
+
 #define unsafe_copy_to_user(d, s, l, e) \
 do {									\
 	u8 __user *_dst = (u8 __user *)(d);				\
-- 
2.26.1


^ permalink raw reply related

* [PATCH v4 00/10] Improve signal performance on PPC64 with KUAP
From: Christopher M. Riedl @ 2021-01-28  4:04 UTC (permalink / raw)
  To: linuxppc-dev

As reported by Anton, there is a large penalty to signal handling
performance on radix systems using KUAP. The signal handling code
performs many user access operations, each of which needs to switch the
KUAP permissions bit to open and then close user access. This involves a
costly 'mtspr' operation [0].

There is existing work done on x86 and by Christopher Leroy for PPC32 to
instead open up user access in "blocks" using user_*_access_{begin,end}.
We can do the same in PPC64 to bring performance back up on KUAP-enabled
radix and now also hash MMU systems [1].

Hash MMU KUAP support along with uaccess flush has landed in linuxppc/next
since the last revision. This series also provides a large benefit on hash
with KUAP. However, in the hash implementation of KUAP the user AMR is
always restored during system_call_exception() which cannot be avoided.
Fewer user access switches naturally also result in less uaccess flushing.

The first two patches add some needed 'unsafe' versions of copy-from
functions. While these do not make use of asm-goto they still allow for
avoiding the repeated uaccess switches.

The third patch moves functions called by setup_sigcontext() into a new
prepare_setup_sigcontext() to simplify converting setup_sigcontext()
into an 'unsafe' version which assumes an open uaccess window later.

The fourth and fifths patches clean-up some of the Transactional Memory
ifdef stuff to simplify using uaccess blocks later.

The next two patches rewrite some of the signal64 helper functions to
be 'unsafe'. Finally, the last three patches update the main signal
handling functions to make use of the new 'unsafe' helpers and eliminate
some additional uaccess switching.

I used the will-it-scale signal1 benchmark to measure and compare
performance [2]. The below results are from running a minimal
kernel+initramfs QEMU/KVM guest on a POWER9 Blackbird:

	signal1_threads -t1 -s10

	|                             | hash   | radix  |
	| --------------------------- | ------ | ------ |
	| linuxppc/next               | 118693 | 133296 |
	| linuxppc/next w/o KUAP+KUEP | 228911 | 228654 |
	| unsafe-signal64             | 199443 | 234716 |

[0]: https://github.com/linuxppc/issues/issues/277
[1]: https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=196278
[2]: https://github.com/antonblanchard/will-it-scale/blob/master/tests/signal1.c

v4:	* Fix issues identified by Christophe Leroy (thanks for review)
	* Use __get_user() directly to copy the 8B sigset_t

v3:	* Rebase on latest linuxppc/next
	* Reword confusing commit messages
	* Add missing comma in macro in signal.h which broke compiles without
	  CONFIG_ALTIVEC
	* Validate hash KUAP signal performance improvements

v2:	* Rebase on latest linuxppc/next + Christophe Leroy's PPC32
	  signal series
	* Simplify/remove TM ifdefery similar to PPC32 series and clean
	  up the uaccess begin/end calls
	* Isolate non-inline functions so they are not called when
	  uaccess window is open

Christopher M. Riedl (8):
  powerpc/uaccess: Add unsafe_copy_from_user
  powerpc/signal: Add unsafe_copy_{vsx,fpr}_from_user()
  powerpc/signal64: Move non-inline functions out of setup_sigcontext()
  powerpc: Reference param in MSR_TM_ACTIVE() macro
  powerpc/signal64: Remove TM ifdefery in middle of if/else block
  powerpc/signal64: Replace setup_sigcontext() w/
    unsafe_setup_sigcontext()
  powerpc/signal64: Replace restore_sigcontext() w/
    unsafe_restore_sigcontext()
  powerpc/signal64: Use __get_user() to copy sigset_t

Daniel Axtens (2):
  powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess
    switches
  powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches

 arch/powerpc/include/asm/reg.h     |   2 +-
 arch/powerpc/include/asm/uaccess.h |   3 +
 arch/powerpc/kernel/signal.h       |  33 ++++
 arch/powerpc/kernel/signal_64.c    | 251 ++++++++++++++++++-----------
 4 files changed, 196 insertions(+), 93 deletions(-)

-- 
2.26.1


^ permalink raw reply

* [PATCH v4 05/10] powerpc/signal64: Remove TM ifdefery in middle of if/else block
From: Christopher M. Riedl @ 2021-01-28  4:04 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210128040424.12720-1-cmr@codefail.de>

Rework the messy ifdef breaking up the if-else for TM similar to
commit f1cf4f93de2f ("powerpc/signal32: Remove ifdefery in middle of if/else").

Unlike that commit for ppc32, the ifdef can't be removed entirely since
uc_transact in sigframe depends on CONFIG_PPC_TRANSACTIONAL_MEM.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index b211a8ea4f6e..8e1d804ce552 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -710,9 +710,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	struct pt_regs *regs = current_pt_regs();
 	struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
 	sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	unsigned long msr;
-#endif
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current->restart_block.fn = do_no_restart_syscall;
@@ -765,7 +763,10 @@ SYSCALL_DEFINE0(rt_sigreturn)
 
 	if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
 		goto badframe;
+#endif
+
 	if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 		/* We recheckpoint on return. */
 		struct ucontext __user *uc_transact;
 
@@ -778,9 +779,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
 					   &uc_transact->uc_mcontext))
 			goto badframe;
-	} else
 #endif
-	{
+	} else {
 		/*
 		 * Fall through, for non-TM restore
 		 *
@@ -818,10 +818,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	unsigned long newsp = 0;
 	long err = 0;
 	struct pt_regs *regs = tsk->thread.regs;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
 	unsigned long msr = regs->msr;
-#endif
 
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
 	if (!access_ok(frame, sizeof(*frame)))
@@ -836,8 +834,9 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+
 	if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 		/* The ucontext_t passed to userland points to the second
 		 * ucontext_t (for transactional state) with its uc_link ptr.
 		 */
@@ -847,9 +846,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 					    tsk, ksig->sig, NULL,
 					    (unsigned long)ksig->ka.sa.sa_handler,
 					    msr);
-	} else
 #endif
-	{
+	} else {
 		err |= __put_user(0, &frame->uc.uc_link);
 		prepare_setup_sigcontext(tsk, 1);
 		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
-- 
2.26.1


^ permalink raw reply related

* [PATCH v4 06/10] powerpc/signal64: Replace setup_sigcontext() w/ unsafe_setup_sigcontext()
From: Christopher M. Riedl @ 2021-01-28  4:04 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210128040424.12720-1-cmr@codefail.de>

Previously setup_sigcontext() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.

Rewrite setup_sigcontext() to assume that a userspace write access window
is open. Replace all uaccess functions with their 'unsafe' versions
which avoid the repeated uaccess switches.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 70 ++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 8e1d804ce552..4248e4489ff1 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -101,9 +101,13 @@ static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_re
  * Set up the sigcontext for the signal frame.
  */
 
-static long setup_sigcontext(struct sigcontext __user *sc,
-		struct task_struct *tsk, int signr, sigset_t *set,
-		unsigned long handler, int ctx_has_vsx_region)
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler,		\
+				ctx_has_vsx_region, e)			\
+	unsafe_op_wrap(__unsafe_setup_sigcontext(sc, tsk, signr, set,	\
+			handler, ctx_has_vsx_region), e)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+					struct task_struct *tsk, int signr, sigset_t *set,
+					unsigned long handler, int ctx_has_vsx_region)
 {
 	/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
 	 * process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -118,20 +122,19 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 #endif
 	struct pt_regs *regs = tsk->thread.regs;
 	unsigned long msr = regs->msr;
-	long err = 0;
 	/* Force usr to alway see softe as 1 (interrupts enabled) */
 	unsigned long softe = 0x1;
 
 	BUG_ON(tsk != current);
 
 #ifdef CONFIG_ALTIVEC
-	err |= __put_user(v_regs, &sc->v_regs);
+	unsafe_put_user(v_regs, &sc->v_regs, efault_out);
 
 	/* save altivec registers */
 	if (tsk->thread.used_vr) {
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
-		err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
-				      33 * sizeof(vector128));
+		unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+				    33 * sizeof(vector128), efault_out);
 		/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
 		 * contains valid data.
 		 */
@@ -140,12 +143,12 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	/* We always copy to/from vrsave, it's 0 if we don't have or don't
 	 * use altivec.
 	 */
-	err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+	unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
 #else /* CONFIG_ALTIVEC */
-	err |= __put_user(0, &sc->v_regs);
+	unsafe_put_user(0, &sc->v_regs, efault_out);
 #endif /* CONFIG_ALTIVEC */
 	/* copy fpr regs and fpscr */
-	err |= copy_fpr_to_user(&sc->fp_regs, tsk);
+	unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
 
 	/*
 	 * Clear the MSR VSX bit to indicate there is no valid state attached
@@ -160,24 +163,27 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	 */
 	if (tsk->thread.used_vsr && ctx_has_vsx_region) {
 		v_regs += ELF_NVRREG;
-		err |= copy_vsx_to_user(v_regs, tsk);
+		unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
 		/* set MSR_VSX in the MSR value in the frame to
 		 * indicate that sc->vs_reg) contains valid data.
 		 */
 		msr |= MSR_VSX;
 	}
 #endif /* CONFIG_VSX */
-	err |= __put_user(&sc->gp_regs, &sc->regs);
+	unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
 	WARN_ON(!FULL_REGS(regs));
-	err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
-	err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
-	err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
-	err |= __put_user(signr, &sc->signal);
-	err |= __put_user(handler, &sc->handler);
+	unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+	unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+	unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+	unsafe_put_user(signr, &sc->signal, efault_out);
+	unsafe_put_user(handler, &sc->handler, efault_out);
 	if (set != NULL)
-		err |=  __put_user(set->sig[0], &sc->oldmask);
+		unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
 
-	return err;
+	return 0;
+
+efault_out:
+	return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -664,12 +670,15 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 
 	if (old_ctx != NULL) {
 		prepare_setup_sigcontext(current, ctx_has_vsx_region);
-		if (!access_ok(old_ctx, ctx_size)
-		    || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
-					ctx_has_vsx_region)
-		    || __copy_to_user(&old_ctx->uc_sigmask,
-				      &current->blocked, sizeof(sigset_t)))
+		if (!user_write_access_begin(old_ctx, ctx_size))
 			return -EFAULT;
+
+		unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+					0, ctx_has_vsx_region, efault_out);
+		unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+				    sizeof(sigset_t), efault_out);
+
+		user_write_access_end();
 	}
 	if (new_ctx == NULL)
 		return 0;
@@ -698,6 +707,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	/* This returns like rt_sigreturn */
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
+
+efault_out:
+	user_write_access_end();
+	return -EFAULT;
 }
 
 
@@ -850,9 +863,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	} else {
 		err |= __put_user(0, &frame->uc.uc_link);
 		prepare_setup_sigcontext(tsk, 1);
-		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
-					NULL, (unsigned long)ksig->ka.sa.sa_handler,
-					1);
+		if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
+			return -EFAULT;
+		err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
+						ksig->sig, NULL,
+						(unsigned long)ksig->ka.sa.sa_handler, 1);
+		user_write_access_end();
 	}
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
-- 
2.26.1


^ permalink raw reply related

* [PATCH v4 08/10] powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches
From: Christopher M. Riedl @ 2021-01-28  4:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20210128040424.12720-1-cmr@codefail.de>

From: Daniel Axtens <dja@axtens.net>

Add uaccess blocks and use the 'unsafe' versions of functions doing user
access where possible to reduce the number of times uaccess has to be
opened/closed.

There is no 'unsafe' version of copy_siginfo_to_user, so move it
slightly to allow for a "longer" uaccess block.

Signed-off-by: Daniel Axtens <dja@axtens.net>
Co-developed-by: Christopher M. Riedl <cmr@codefail.de>
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 54 +++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d668f8af18fe..a471e97589a8 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -849,44 +849,51 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	unsigned long msr = regs->msr;
 
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
-	if (!access_ok(frame, sizeof(*frame)))
-		goto badframe;
 
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
-	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
-	if (err)
+	/* This only applies when calling unsafe_setup_sigcontext() and must be
+	 * called before opening the uaccess window.
+	 */
+	if (!MSR_TM_ACTIVE(msr))
+		prepare_setup_sigcontext(tsk, 1);
+
+	if (!user_write_access_begin(frame, sizeof(*frame)))
 		goto badframe;
 
+	unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+	unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
+	unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
 
 	if (MSR_TM_ACTIVE(msr)) {
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 		/* The ucontext_t passed to userland points to the second
 		 * ucontext_t (for transactional state) with its uc_link ptr.
 		 */
-		err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+		user_write_access_end();
+
 		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
 					    &frame->uc_transact.uc_mcontext,
 					    tsk, ksig->sig, NULL,
 					    (unsigned long)ksig->ka.sa.sa_handler,
 					    msr);
+
+		if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
+			goto badframe;
+
 #endif
 	} else {
-		err |= __put_user(0, &frame->uc.uc_link);
-		prepare_setup_sigcontext(tsk, 1);
-		if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
-			return -EFAULT;
-		err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
-						ksig->sig, NULL,
-						(unsigned long)ksig->ka.sa.sa_handler, 1);
-		user_write_access_end();
+		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+		unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+					NULL, (unsigned long)ksig->ka.sa.sa_handler,
+					1, badframe_block);
 	}
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
-		goto badframe;
+
+	unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+	user_write_access_end();
 
 	/* Make sure signal handler doesn't get spurious FP exceptions */
 	tsk->thread.fp_state.fpscr = 0;
@@ -901,6 +908,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		regs->nip = (unsigned long) &frame->tramp[0];
 	}
 
+
+	/* Save the siginfo outside of the unsafe block. */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		goto badframe;
+
 	/* Allocate a dummy caller frame for the signal handler. */
 	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
 	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
@@ -940,6 +952,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 	return 0;
 
+badframe_block:
+	user_write_access_end();
 badframe:
 	signal_fault(current, regs, "handle_rt_signal64", frame);
 
-- 
2.26.1


^ permalink raw reply related

* [PATCH v4 03/10] powerpc/signal64: Move non-inline functions out of setup_sigcontext()
From: Christopher M. Riedl @ 2021-01-28  4:04 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210128040424.12720-1-cmr@codefail.de>

There are non-inline functions which get called in setup_sigcontext() to
save register state to the thread struct. Move these functions into a
separate prepare_setup_sigcontext() function so that
setup_sigcontext() can be refactored later into an "unsafe" version
which assumes an open uaccess window. Non-inline functions should be
avoided when uaccess is open.

The majority of setup_sigcontext() can be refactored to execute in an
"unsafe" context (uaccess window is opened) except for some non-inline
functions. Move these out into a separate prepare_setup_sigcontext()
function which must be called first and before opening up a uaccess
window. A follow-up commit converts setup_sigcontext() to be "unsafe".

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index f9e4a1ac440f..b211a8ea4f6e 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -79,6 +79,24 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
 }
 #endif
 
+static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_region)
+{
+#ifdef CONFIG_ALTIVEC
+	/* save altivec registers */
+	if (tsk->thread.used_vr)
+		flush_altivec_to_thread(tsk);
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+	flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+	if (tsk->thread.used_vsr && ctx_has_vsx_region)
+		flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
+
 /*
  * Set up the sigcontext for the signal frame.
  */
@@ -97,7 +115,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	 */
 #ifdef CONFIG_ALTIVEC
 	elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
-	unsigned long vrsave;
 #endif
 	struct pt_regs *regs = tsk->thread.regs;
 	unsigned long msr = regs->msr;
@@ -112,7 +129,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 
 	/* save altivec registers */
 	if (tsk->thread.used_vr) {
-		flush_altivec_to_thread(tsk);
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
 		err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
 				      33 * sizeof(vector128));
@@ -124,17 +140,10 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	/* We always copy to/from vrsave, it's 0 if we don't have or don't
 	 * use altivec.
 	 */
-	vrsave = 0;
-	if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
-		vrsave = mfspr(SPRN_VRSAVE);
-		tsk->thread.vrsave = vrsave;
-	}
-
-	err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
+	err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
 #else /* CONFIG_ALTIVEC */
 	err |= __put_user(0, &sc->v_regs);
 #endif /* CONFIG_ALTIVEC */
-	flush_fp_to_thread(tsk);
 	/* copy fpr regs and fpscr */
 	err |= copy_fpr_to_user(&sc->fp_regs, tsk);
 
@@ -150,7 +159,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	 * VMX data.
 	 */
 	if (tsk->thread.used_vsr && ctx_has_vsx_region) {
-		flush_vsx_to_thread(tsk);
 		v_regs += ELF_NVRREG;
 		err |= copy_vsx_to_user(v_regs, tsk);
 		/* set MSR_VSX in the MSR value in the frame to
@@ -655,6 +663,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		ctx_has_vsx_region = 1;
 
 	if (old_ctx != NULL) {
+		prepare_setup_sigcontext(current, ctx_has_vsx_region);
 		if (!access_ok(old_ctx, ctx_size)
 		    || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
 					ctx_has_vsx_region)
@@ -842,6 +851,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 #endif
 	{
 		err |= __put_user(0, &frame->uc.uc_link);
+		prepare_setup_sigcontext(tsk, 1);
 		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
 					NULL, (unsigned long)ksig->ka.sa.sa_handler,
 					1);
-- 
2.26.1


^ permalink raw reply related

* [PATCH v4 07/10] powerpc/signal64: Replace restore_sigcontext() w/ unsafe_restore_sigcontext()
From: Christopher M. Riedl @ 2021-01-28  4:04 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210128040424.12720-1-cmr@codefail.de>

Previously restore_sigcontext() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.

Rewrite restore_sigcontext() to assume that a userspace read access
window is open. Replace all uaccess functions with their 'unsafe'
versions which avoid the repeated uaccess switches.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 68 ++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4248e4489ff1..d668f8af18fe 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -326,14 +326,14 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 /*
  * Restore the sigcontext from the signal frame.
  */
-
-static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
-			      struct sigcontext __user *sc)
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, e) \
+	unsafe_op_wrap(__unsafe_restore_sigcontext(tsk, set, sig, sc), e)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+						int sig, struct sigcontext __user *sc)
 {
 #ifdef CONFIG_ALTIVEC
 	elf_vrreg_t __user *v_regs;
 #endif
-	unsigned long err = 0;
 	unsigned long save_r13 = 0;
 	unsigned long msr;
 	struct pt_regs *regs = tsk->thread.regs;
@@ -348,27 +348,28 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 		save_r13 = regs->gpr[13];
 
 	/* copy the GPRs */
-	err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
-	err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+	unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr),
+			      efault_out);
+	unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
 	/* get MSR separately, transfer the LE bit if doing signal return */
-	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+	unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
 	if (sig)
 		regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
-	err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
-	err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
-	err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
-	err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
-	err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
+	unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+	unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+	unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+	unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+	unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
 	/* Don't allow userspace to set SOFTE */
 	set_trap_norestart(regs);
-	err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
-	err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
-	err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+	unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+	unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+	unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
 
 	if (!sig)
 		regs->gpr[13] = save_r13;
 	if (set != NULL)
-		err |=  __get_user(set->sig[0], &sc->oldmask);
+		unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
 
 	/*
 	 * Force reload of FP/VEC.
@@ -378,29 +379,28 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
 
 #ifdef CONFIG_ALTIVEC
-	err |= __get_user(v_regs, &sc->v_regs);
-	if (err)
-		return err;
+	unsafe_get_user(v_regs, &sc->v_regs, efault_out);
 	if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
 		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != NULL && (msr & MSR_VEC) != 0) {
-		err |= __copy_from_user(&tsk->thread.vr_state, v_regs,
-					33 * sizeof(vector128));
+		unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+				      33 * sizeof(vector128), efault_out);
 		tsk->thread.used_vr = true;
 	} else if (tsk->thread.used_vr) {
 		memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
 	}
 	/* Always get VRSAVE back */
 	if (v_regs != NULL)
-		err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+		unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33],
+				efault_out);
 	else
 		tsk->thread.vrsave = 0;
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
 #endif /* CONFIG_ALTIVEC */
 	/* restore floating point */
-	err |= copy_fpr_from_user(tsk, &sc->fp_regs);
+	unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
 #ifdef CONFIG_VSX
 	/*
 	 * Get additional VSX data. Update v_regs to point after the
@@ -409,14 +409,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 	 */
 	v_regs += ELF_NVRREG;
 	if ((msr & MSR_VSX) != 0) {
-		err |= copy_vsx_from_user(tsk, v_regs);
+		unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
 		tsk->thread.used_vsr = true;
 	} else {
 		for (i = 0; i < 32 ; i++)
 			tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
 	}
 #endif
-	return err;
+	return 0;
+
+efault_out:
+	return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -701,8 +704,14 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
 		do_exit(SIGSEGV);
 	set_current_blocked(&set);
-	if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext))
+
+	if (!user_read_access_begin(new_ctx, ctx_size))
+		return -EFAULT;
+	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+		user_read_access_end();
 		do_exit(SIGSEGV);
+	}
+	user_read_access_end();
 
 	/* This returns like rt_sigreturn */
 	set_thread_flag(TIF_RESTOREALL);
@@ -807,8 +816,13 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		 * causing a TM bad thing.
 		 */
 		current->thread.regs->msr &= ~MSR_TS_MASK;
-		if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+		if (!user_read_access_begin(uc, sizeof(*uc)))
+			return -EFAULT;
+		if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) {
+			user_read_access_end();
 			goto badframe;
+		}
+		user_read_access_end();
 	}
 
 	if (restore_altstack(&uc->uc_stack))
-- 
2.26.1


^ permalink raw reply related

* Re: [PATCH v15 10/10] arm64: Add IMA log information in kimage used for kexec
From: Lakshmi Ramasubramanian @ 2021-01-28  4:05 UTC (permalink / raw)
  To: Will Deacon, Mimi Zohar
  Cc: mark.rutland, bhsharma, tao.li, paulus, vincenzo.frascino,
	frowand.list, sashal, robh, masahiroy, jmorris, takahiro.akashi,
	linux-arm-kernel, catalin.marinas, serge, devicetree,
	pasha.tatashin, prsriva, hsinyi, allison, christophe.leroy,
	mbrugger, balajib, gregkh, linux-kernel, james.morse,
	dmitry.kasatkin, linux-integrity, linuxppc-dev, bauerman
In-Reply-To: <20210127231334.GB1016@willie-the-truck>

On 1/27/21 3:13 PM, Will Deacon wrote:
> On Wed, Jan 27, 2021 at 01:31:02PM -0500, Mimi Zohar wrote:
>> On Wed, 2021-01-27 at 10:24 -0800, Lakshmi Ramasubramanian wrote:
>>> On 1/27/21 10:02 AM, Will Deacon wrote:
>>>> On Wed, Jan 27, 2021 at 09:56:53AM -0800, Lakshmi Ramasubramanian wrote:
>>>>> On 1/27/21 8:54 AM, Will Deacon wrote:
>>>>>> On Fri, Jan 15, 2021 at 09:30:17AM -0800, Lakshmi Ramasubramanian wrote:
>>>>>>> Address and size of the buffer containing the IMA measurement log need
>>>>>>> to be passed from the current kernel to the next kernel on kexec.
>>>>>>>
>>>>>>> Add address and size fields to "struct kimage_arch" for ARM64 platform
>>>>>>> to hold the address and size of the IMA measurement log buffer.
>>>>>>>
>>>>>>> Update CONFIG_KEXEC_FILE to select CONFIG_HAVE_IMA_KEXEC, if CONFIG_IMA
>>>>>>> is enabled, to indicate that the IMA measurement log information is
>>>>>>> present in the device tree for ARM64.
>>>>>>>
>>>>>>> Co-developed-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
>>>>>>> Signed-off-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
>>>>>>> Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
>>>>>>> Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
>>>>>>> ---
>>>>>>>     arch/arm64/Kconfig             | 1 +
>>>>>>>     arch/arm64/include/asm/kexec.h | 5 +++++
>>>>>>>     2 files changed, 6 insertions(+)
>>>>>>>
>>>>>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>>>>>>> index 1d466addb078..ea7f7fe3dccd 100644
>>>>>>> --- a/arch/arm64/Kconfig
>>>>>>> +++ b/arch/arm64/Kconfig
>>>>>>> @@ -1094,6 +1094,7 @@ config KEXEC
>>>>>>>     config KEXEC_FILE
>>>>>>>     	bool "kexec file based system call"
>>>>>>>     	select KEXEC_CORE
>>>>>>> +	select HAVE_IMA_KEXEC if IMA
>>>>>>>     	help
>>>>>>>     	  This is new version of kexec system call. This system call is
>>>>>>>     	  file based and takes file descriptors as system call argument
>>>>>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>>>>>> index d24b527e8c00..2bd19ccb6c43 100644
>>>>>>> --- a/arch/arm64/include/asm/kexec.h
>>>>>>> +++ b/arch/arm64/include/asm/kexec.h
>>>>>>> @@ -100,6 +100,11 @@ struct kimage_arch {
>>>>>>>     	void *elf_headers;
>>>>>>>     	unsigned long elf_headers_mem;
>>>>>>>     	unsigned long elf_headers_sz;
>>>>>>> +
>>>>>>> +#ifdef CONFIG_IMA_KEXEC
>>>>>>> +	phys_addr_t ima_buffer_addr;
>>>>>>> +	size_t ima_buffer_size;
>>>>>>> +#endif
>>>>>>
>>>>>> Why do these need to be in the arch structure instead of 'struct kimage'?
>>>>>>
>>>>>
>>>>> Currently, only powerpc and, with this patch set, arm64 have support for
>>>>> carrying forward IMA measurement list across kexec system call. The above
>>>>> fields are used for tracking IMA measurement list.
>>>>>
>>>>> Do you see a reason to move these fields to "struct kimage"?
>>>>
>>>> If they're gated on CONFIG_IMA_KEXEC, then it seems harmless for them to
>>>> be added to the shared structure. Or are you saying that there are
>>>> architectures which have CONFIG_IMA_KEXEC but do not want these fields?
>>>>
>>>
>>> As far as I know, there are no other architectures that define
>>> CONFIG_IMA_KEXEC, but do not use these fields.
>>
>> Yes, CONFIG_IMA_KEXEC enables "carrying the IMA measurement list across
>> a soft boot".   The only arch that currently carries the IMA
>> measurement across kexec is powerpc.
> 
> Ok, in which case this sounds like it should be in the shared structure, no?
> 

Ok - I'll move the IMA kexec buffer fields from "struct kimage_arch" to 
"struct kimage" for both powerpc and arm64.

thanks,
  -lakshmi



^ permalink raw reply

* Re: [PATCH 1/2] PCI/AER: Disable AER interrupt during suspend
From: Kai-Heng Feng @ 2021-01-28  4:09 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Joerg Roedel,
	open list:PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC,
	open list:PCI SUBSYSTEM, open list, Lalithambika Krishnakumar,
	Oliver O'Halloran, Bjorn Helgaas, Mika Westerberg, Lu Baolu
In-Reply-To: <20210127205053.GA3049358@bjorn-Precision-5520>

On Thu, Jan 28, 2021 at 4:51 AM Bjorn Helgaas <helgaas@kernel.org> wrote:
>
> On Thu, Jan 28, 2021 at 01:31:00AM +0800, Kai-Heng Feng wrote:
> > Commit 50310600ebda ("iommu/vt-d: Enable PCI ACS for platform opt in
> > hint") enables ACS, and some platforms lose its NVMe after resume from
> > firmware:
> > [   50.947816] pcieport 0000:00:1b.0: DPC: containment event, status:0x1f01 source:0x0000
> > [   50.947817] pcieport 0000:00:1b.0: DPC: unmasked uncorrectable error detected
> > [   50.947829] pcieport 0000:00:1b.0: PCIe Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Receiver ID)
> > [   50.947830] pcieport 0000:00:1b.0:   device [8086:06ac] error status/mask=00200000/00010000
> > [   50.947831] pcieport 0000:00:1b.0:    [21] ACSViol                (First)
> > [   50.947841] pcieport 0000:00:1b.0: AER: broadcast error_detected message
> > [   50.947843] nvme nvme0: frozen state error detected, reset controller
> >
> > It happens right after ACS gets enabled during resume.
> >
> > To prevent that from happening, disable AER interrupt and enable it on
> > system suspend and resume, respectively.
>
> Lots of questions here.  Maybe this is what we'll end up doing, but I
> am curious about why the error is reported in the first place.
>
> Is this a consequence of the link going down and back up?

Could be. From the observations, it only happens when firmware suspend
(S3) is used.
Maybe it happens when it's gets powered up, but I don't have equipment
to debug at hardware level.

If we use non-firmware suspend method, enabling ACS after resume won't
trip AER and DPC.

>
> Is it consequence of the device doing a DMA when it shouldn't?

If it's doing DMA while suspending, the same error should also happen
after NVMe is suspended and before PCIe port suspending.
Furthermore, if non-firmware suspend method is used, there's so such
issue, so less likely to be any DMA operation.

>
> Are we doing something in the wrong order during suspend?  Or maybe
> resume, since I assume the error is reported during resume?

Yes the error is reported during resume. The suspend/resume order
seems fine as non-firmware suspend doesn't have this issue.

>
> If we *do* take the error, why doesn't DPC recovery work?

It works for the root port, but not for the NVMe drive:
[   50.947816] pcieport 0000:00:1b.0: DPC: containment event,
status:0x1f01 source:0x0000
[   50.947817] pcieport 0000:00:1b.0: DPC: unmasked uncorrectable error detected
[   50.947829] pcieport 0000:00:1b.0: PCIe Bus Error:
severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Receiver
ID)
[   50.947830] pcieport 0000:00:1b.0:   device [8086:06ac] error
status/mask=00200000/00010000
[   50.947831] pcieport 0000:00:1b.0:    [21] ACSViol                (First)
[   50.947841] pcieport 0000:00:1b.0: AER: broadcast error_detected message
[   50.947843] nvme nvme0: frozen state error detected, reset controller
[   50.948400] ACPI: EC: event unblocked
[   50.948432] xhci_hcd 0000:00:14.0: PME# disabled
[   50.948444] xhci_hcd 0000:00:14.0: enabling bus mastering
[   50.949056] pcieport 0000:00:1b.0: PME# disabled
[   50.949068] pcieport 0000:00:1c.0: PME# disabled
[   50.949416] e1000e 0000:00:1f.6: PME# disabled
[   50.949463] e1000e 0000:00:1f.6: enabling bus mastering
[   50.951606] sd 0:0:0:0: [sda] Starting disk
[   50.951610] nvme 0000:01:00.0: can't change power state from D3hot
to D0 (config space inaccessible)
[   50.951730] nvme nvme0: Removing after probe failure status: -19
[   50.952360] nvme nvme0: failed to set APST feature (-19)
[   50.971136] snd_hda_intel 0000:00:1f.3: PME# disabled
[   51.089330] pcieport 0000:00:1b.0: AER: broadcast resume message
[   51.089345] pcieport 0000:00:1b.0: AER: device recovery successful

But I think why recovery doesn't work for NVMe is for another discussion...

Kai-Heng

>
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=209149
> > Fixes: 50310600ebda ("iommu/vt-d: Enable PCI ACS for platform opt in hint")
> > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> > ---
> >  drivers/pci/pcie/aer.c | 18 ++++++++++++++++++
> >  1 file changed, 18 insertions(+)
> >
> > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > index 77b0f2c45bc0..0e9a85530ae6 100644
> > --- a/drivers/pci/pcie/aer.c
> > +++ b/drivers/pci/pcie/aer.c
> > @@ -1365,6 +1365,22 @@ static int aer_probe(struct pcie_device *dev)
> >       return 0;
> >  }
> >
> > +static int aer_suspend(struct pcie_device *dev)
> > +{
> > +     struct aer_rpc *rpc = get_service_data(dev);
> > +
> > +     aer_disable_rootport(rpc);
> > +     return 0;
> > +}
> > +
> > +static int aer_resume(struct pcie_device *dev)
> > +{
> > +     struct aer_rpc *rpc = get_service_data(dev);
> > +
> > +     aer_enable_rootport(rpc);
> > +     return 0;
> > +}
> > +
> >  /**
> >   * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP
> >   * @dev: pointer to Root Port, RCEC, or RCiEP
> > @@ -1437,6 +1453,8 @@ static struct pcie_port_service_driver aerdriver = {
> >       .service        = PCIE_PORT_SERVICE_AER,
> >
> >       .probe          = aer_probe,
> > +     .suspend        = aer_suspend,
> > +     .resume         = aer_resume,
> >       .remove         = aer_remove,
> >  };
> >
> > --
> > 2.29.2
> >

^ permalink raw reply

* Re: [PATCH v15 09/10] arm64: Call kmalloc() to allocate DTB buffer
From: Thiago Jung Bauermann @ 2021-01-28  4:14 UTC (permalink / raw)
  To: Lakshmi Ramasubramanian
  Cc: mark.rutland, bhsharma, tao.li, zohar, paulus, vincenzo.frascino,
	frowand.list, sashal, robh, masahiroy, jmorris, takahiro.akashi,
	linux-arm-kernel, catalin.marinas, serge, devicetree,
	pasha.tatashin, Will Deacon, prsriva, hsinyi, allison,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, gregkh, linux-integrity, linuxppc-dev
In-Reply-To: <58d3ffbf-4d80-c893-34d6-366ebfac55bd@linux.microsoft.com>


Lakshmi Ramasubramanian <nramas@linux.microsoft.com> writes:

> On 1/27/21 7:52 PM, Thiago Jung Bauermann wrote:
>> Will Deacon <will@kernel.org> writes:
>> 
>>> On Wed, Jan 27, 2021 at 09:59:38AM -0800, Lakshmi Ramasubramanian wrote:
>>>> On 1/27/21 8:52 AM, Will Deacon wrote:
>>>>
>>>> Hi Will,
>>>>
>>>>> On Fri, Jan 15, 2021 at 09:30:16AM -0800, Lakshmi Ramasubramanian wrote:
>>>>>> create_dtb() function allocates kernel virtual memory for
>>>>>> the device tree blob (DTB).  This is not consistent with other
>>>>>> architectures, such as powerpc, which calls kmalloc() for allocating
>>>>>> memory for the DTB.
>>>>>>
>>>>>> Call kmalloc() to allocate memory for the DTB, and kfree() to free
>>>>>> the allocated memory.
>>>>>>
>>>>>> Co-developed-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
>>>>>> Signed-off-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
>>>>>> Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
>>>>>> ---
>>>>>>    arch/arm64/kernel/machine_kexec_file.c | 12 +++++++-----
>>>>>>    1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
>>>>>> index 7de9c47dee7c..51c40143d6fa 100644
>>>>>> --- a/arch/arm64/kernel/machine_kexec_file.c
>>>>>> +++ b/arch/arm64/kernel/machine_kexec_file.c
>>>>>> @@ -29,7 +29,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
>>>>>>    int arch_kimage_file_post_load_cleanup(struct kimage *image)
>>>>>>    {
>>>>>> -	vfree(image->arch.dtb);
>>>>>> +	kfree(image->arch.dtb);
>>>>>>    	image->arch.dtb = NULL;
>>>>>>    	vfree(image->arch.elf_headers);
>>>>>> @@ -59,19 +59,21 @@ static int create_dtb(struct kimage *image,
>>>>>>    			+ cmdline_len + DTB_EXTRA_SPACE;
>>>>>>    	for (;;) {
>>>>>> -		buf = vmalloc(buf_size);
>>>>>> +		buf = kmalloc(buf_size, GFP_KERNEL);
>>>>>
>>>>> Is there a functional need for this patch? I build the 'dtbs' target just
>>>>> now and sdm845-db845c.dtb is approaching 100K, which feels quite large
>>>>> for kmalloc().
>>>>
>>>> Changing the allocation from vmalloc() to kmalloc() would help us further
>>>> consolidate the DTB setup code for powerpc and arm64.
>>>
>>> Ok, but at the risk of allocation failure. Can powerpc use vmalloc()
>>> instead?
>> I believe this patch stems from this suggestion by Rob Herring:
>> 
>>> This could be taken a step further and do the allocation of the new
>>> FDT. The difference is arm64 uses vmalloc and powerpc uses kmalloc. The
>>> arm64 version also retries with a bigger allocation. That seems
>>> unnecessary.
>> in
>> https://lore.kernel.org/linux-integrity/20201211221006.1052453-3-robh@kernel.org/
>> The problem is that this patch implements only part of the suggestion,
>> which isn't useful in itself. So the patch series should either drop
>> this patch or consolidate the FDT allocation between the arches.
>> I just tested on powernv and pseries platforms and powerpc can use
>> vmalloc for the FDT buffer.
>> 
>
> Thanks for verifying on powerpc platform Thiago.
>
> I'll update the patch to do the following:
>
> => Use vmalloc for FDT buffer allocation on powerpc
> => Keep vmalloc for arm64, but remove the retry on allocation.
> => Also, there was a memory leak of FDT buffer in the error code path on arm64,
> which I'll fix as well.
>
> Did I miss anything?

Yes, you missed the second part of Rob's suggestion I was mentioning,
which is factoring out the code which allocates the new FDT from both
arm64 and powerpc.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center

^ permalink raw reply

* Re: [PATCH v15 09/10] arm64: Call kmalloc() to allocate DTB buffer
From: Lakshmi Ramasubramanian @ 2021-01-28  4:33 UTC (permalink / raw)
  To: Thiago Jung Bauermann
  Cc: mark.rutland, bhsharma, tao.li, zohar, paulus, vincenzo.frascino,
	frowand.list, sashal, robh, masahiroy, jmorris, takahiro.akashi,
	linux-arm-kernel, catalin.marinas, serge, devicetree,
	pasha.tatashin, Will Deacon, prsriva, hsinyi, allison,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, gregkh, linux-integrity, linuxppc-dev
In-Reply-To: <87y2gdr93p.fsf@manicouagan.localdomain>

On 1/27/21 8:14 PM, Thiago Jung Bauermann wrote:
> 
> Lakshmi Ramasubramanian <nramas@linux.microsoft.com> writes:
> 
>> On 1/27/21 7:52 PM, Thiago Jung Bauermann wrote:
>>> Will Deacon <will@kernel.org> writes:
>>>
>>>> On Wed, Jan 27, 2021 at 09:59:38AM -0800, Lakshmi Ramasubramanian wrote:
>>>>> On 1/27/21 8:52 AM, Will Deacon wrote:
>>>>>
>>>>> Hi Will,
>>>>>
>>>>>> On Fri, Jan 15, 2021 at 09:30:16AM -0800, Lakshmi Ramasubramanian wrote:
>>>>>>> create_dtb() function allocates kernel virtual memory for
>>>>>>> the device tree blob (DTB).  This is not consistent with other
>>>>>>> architectures, such as powerpc, which calls kmalloc() for allocating
>>>>>>> memory for the DTB.
>>>>>>>
>>>>>>> Call kmalloc() to allocate memory for the DTB, and kfree() to free
>>>>>>> the allocated memory.
>>>>>>>
>>>>>>> Co-developed-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
>>>>>>> Signed-off-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
>>>>>>> Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
>>>>>>> ---
>>>>>>>     arch/arm64/kernel/machine_kexec_file.c | 12 +++++++-----
>>>>>>>     1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
>>>>>>> index 7de9c47dee7c..51c40143d6fa 100644
>>>>>>> --- a/arch/arm64/kernel/machine_kexec_file.c
>>>>>>> +++ b/arch/arm64/kernel/machine_kexec_file.c
>>>>>>> @@ -29,7 +29,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
>>>>>>>     int arch_kimage_file_post_load_cleanup(struct kimage *image)
>>>>>>>     {
>>>>>>> -	vfree(image->arch.dtb);
>>>>>>> +	kfree(image->arch.dtb);
>>>>>>>     	image->arch.dtb = NULL;
>>>>>>>     	vfree(image->arch.elf_headers);
>>>>>>> @@ -59,19 +59,21 @@ static int create_dtb(struct kimage *image,
>>>>>>>     			+ cmdline_len + DTB_EXTRA_SPACE;
>>>>>>>     	for (;;) {
>>>>>>> -		buf = vmalloc(buf_size);
>>>>>>> +		buf = kmalloc(buf_size, GFP_KERNEL);
>>>>>>
>>>>>> Is there a functional need for this patch? I build the 'dtbs' target just
>>>>>> now and sdm845-db845c.dtb is approaching 100K, which feels quite large
>>>>>> for kmalloc().
>>>>>
>>>>> Changing the allocation from vmalloc() to kmalloc() would help us further
>>>>> consolidate the DTB setup code for powerpc and arm64.
>>>>
>>>> Ok, but at the risk of allocation failure. Can powerpc use vmalloc()
>>>> instead?
>>> I believe this patch stems from this suggestion by Rob Herring:
>>>
>>>> This could be taken a step further and do the allocation of the new
>>>> FDT. The difference is arm64 uses vmalloc and powerpc uses kmalloc. The
>>>> arm64 version also retries with a bigger allocation. That seems
>>>> unnecessary.
>>> in
>>> https://lore.kernel.org/linux-integrity/20201211221006.1052453-3-robh@kernel.org/
>>> The problem is that this patch implements only part of the suggestion,
>>> which isn't useful in itself. So the patch series should either drop
>>> this patch or consolidate the FDT allocation between the arches.
>>> I just tested on powernv and pseries platforms and powerpc can use
>>> vmalloc for the FDT buffer.
>>>
>>
>> Thanks for verifying on powerpc platform Thiago.
>>
>> I'll update the patch to do the following:
>>
>> => Use vmalloc for FDT buffer allocation on powerpc
>> => Keep vmalloc for arm64, but remove the retry on allocation.
>> => Also, there was a memory leak of FDT buffer in the error code path on arm64,
>> which I'll fix as well.
>>
>> Did I miss anything?
> 
> Yes, you missed the second part of Rob's suggestion I was mentioning,
> which is factoring out the code which allocates the new FDT from both
> arm64 and powerpc.
> 

Sure - I'll address that.

thanks,
  -lakshmi


^ permalink raw reply

* Re: [PATCH v15 09/10] arm64: Call kmalloc() to allocate DTB buffer
From: Joe Perches @ 2021-01-28  4:57 UTC (permalink / raw)
  To: Thiago Jung Bauermann, Will Deacon
  Cc: mark.rutland, bhsharma, tao.li, zohar, paulus, vincenzo.frascino,
	frowand.list, sashal, robh, Lakshmi Ramasubramanian, masahiroy,
	jmorris, takahiro.akashi, linux-arm-kernel, catalin.marinas,
	serge, devicetree, pasha.tatashin, prsriva, hsinyi, allison,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, gregkh, linux-integrity, linuxppc-dev
In-Reply-To: <871re5soof.fsf@manicouagan.localdomain>

On Thu, 2021-01-28 at 00:52 -0300, Thiago Jung Bauermann wrote:
> The problem is that this patch implements only part of the suggestion,
> which isn't useful in itself. So the patch series should either drop
> this patch or consolidate the FDT allocation between the arches.
> 
> I just tested on powernv and pseries platforms and powerpc can use
> vmalloc for the FDT buffer.

Perhaps more sensible to use kvmalloc/kvfree.




^ permalink raw reply

* Re: [PATCH] powerpc64: Workaround sigtramp vdso return call.
From: Nicholas Piggin @ 2021-01-28  5:38 UTC (permalink / raw)
  To: Paul E Murphy, Raoni Fassina Firmino
  Cc: Florian Weimer, linuxppc-dev, libc-alpha
In-Reply-To: <20210127162140.wtetd4ob2iynvvvt@work-tp>

+linuxppc-dev

Excerpts from Raoni Fassina Firmino's message of January 28, 2021 2:21 am:
> On Tue, Jan 26, 2021 at 08:45:00AM -0600, AL glibc-alpha wrote:
>> 
>> 
>> On 1/26/21 8:12 AM, Florian Weimer via Libc-alpha wrote:
>> > * Raoni Fassina Firmino:
>> > 
>> > > A not so recent kernel change[1] changed how the trampoline
>> > > `__kernel_sigtramp_rt64` is used to call signal handlers.
>> > > 
>> > > This was exposed on the test misc/tst-sigcontext-get_pc
>> > > 
>> > > Before kernel 5.9, the kernel set LR to the trampoline address and
>> > > jumped directly to the signal handler, and at the end the signal
>> > > handler, as any other function, would `blr` to the address set.  In
>> > > other words, the trampoline was executed just at the end of the signal
>> > > handler and the only thing it did was call sigreturn.  But since
>> > > kernel 5.9 the kernel set CTRL to the signal handler and calls to the
>> > > trampoline code, the trampoline then `bctrl` to the address in CTRL,
>> > > setting the LR to the next instruction in the middle of the
>> > > trampoline, when the signal handler returns, the rest of the
>> > > trampoline code executes the same code as before.
>> > 
>> > Thanks for the patch, byt:
>> > 
>> > No one has explained so far why the original blr instruction couldn't be
>> > augmented with the appropriate branch predictor hint.  The 2.07 ISA
>> > manual suggests that it's possible, but maybe I'm reading it wrong.
>> 
>> bctrl is the preferred form of making indirect calls.  You can add hint 0b01
>> to bclr (blr) to get similar behavior on power8/9, but as noted in the ISA,
>> it is optional.
> 
> What branch prediction we are talking about? I think there is only one
> blr that is relevant, the one returning from the signal handler to the
> trampoline. In this case it if it is a simple blr is already hinted
> correctly with 0b00 (I think it is the default BH for blr), that it is a
> subroutine return. We don't have control over the blr from the signal
> handler to change the hint to 0b01 anyway. So IIUC, the return address
> predictor failed before because the signal handler don't go back from
> the same place (+4) it was called, and it changes with the added bctrl.
> 
> I am CC'ing Nicholas and maybe he has more insight.

Prior to the kernel patch, if the signal handler code used blr BH=0b01
for returns that would indeed prevent the unbalance on processors which
implement it.

But you are right, as explained in Linux commit 0138ba5783ae, the blr is 
in the signal handler function so we can't change that.

> (I know that now this discussion is split in two places, the original
> thread Florian started and this on for the patch. Not sure where best to
> continue this)

linuxppc-dev doesn't mind responsible cross posts to other lists,
hopefully libc-alpha is too.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH 2/3] kbuild: LD_VERSION redenomination
From: Masahiro Yamada @ 2021-01-28  6:38 UTC (permalink / raw)
  To: Linux Kbuild mailing list
  Cc: Thomas Bogendoerfer, Catalin Marinas, Dominique Martinet,
	linuxppc-dev, Linux Kernel Mailing List, Jiaxun Yang, linux-mips,
	Paul Mackerras, Huacai Chen, Will Deacon, linux-arm-kernel
In-Reply-To: <20201212165431.150750-2-masahiroy@kernel.org>

On Sun, Dec 13, 2020 at 1:54 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> Commit ccbef1674a15 ("Kbuild, lto: add ld-version and ld-ifversion
> macros") introduced scripts/ld-version.sh for GCC LTO.
>
> At that time, this script handled 5 version fields because GCC LTO
> needed the downstream binutils. (https://lkml.org/lkml/2014/4/8/272)
>
> The code snippet from the submitted patch was as follows:
>
>     # We need HJ Lu's Linux binutils because mainline binutils does not
>     # support mixing assembler and LTO code in the same ld -r object.
>     # XXX check if the gcc plugin ld is the expected one too
>     # XXX some Fedora binutils should also support it. How to check for that?
>     ifeq ($(call ld-ifversion,-ge,22710001,y),y)
>         ...
>
> However, GCC LTO was not merged into the mainline after all.
> (https://lkml.org/lkml/2014/4/8/272)
>
> So, the 4th and 5th fields were never used, and finally removed by
> commit 0d61ed17dd30 ("ld-version: Drop the 4th and 5th version
> components").
>
> Since then, the last 4-digits returned by this script is always zeros.
>
> Remove the meaningless last 4-digits. This makes the version format
> consistent with GCC_VERSION, CLANG_VERSION, LLD_VERSION.
>
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> ---
>

Applied to linux-kbuild.


>  arch/arm64/Kconfig            | 2 +-
>  arch/mips/loongson64/Platform | 2 +-
>  arch/mips/vdso/Kconfig        | 2 +-
>  arch/powerpc/Makefile         | 2 +-
>  arch/powerpc/lib/Makefile     | 2 +-
>  scripts/ld-version.sh         | 2 +-
>  6 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index a6b5b7ef40ae..69d56b21a6ec 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1499,7 +1499,7 @@ config ARM64_PTR_AUTH
>         depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
>         # Modern compilers insert a .note.gnu.property section note for PAC
>         # which is only understood by binutils starting with version 2.33.1.
> -       depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100)
> +       depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
>         depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
>         depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
>         help
> diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
> index ec42c5085905..cc0b9c87f9ad 100644
> --- a/arch/mips/loongson64/Platform
> +++ b/arch/mips/loongson64/Platform
> @@ -35,7 +35,7 @@ cflags-$(CONFIG_CPU_LOONGSON64)       += $(call as-option,-Wa$(comma)-mno-fix-loongson
>  # can't easily be used safely within the kbuild framework.
>  #
>  ifeq ($(call cc-ifversion, -ge, 0409, y), y)
> -  ifeq ($(call ld-ifversion, -ge, 225000000, y), y)
> +  ifeq ($(call ld-ifversion, -ge, 22500, y), y)
>      cflags-$(CONFIG_CPU_LOONGSON64)  += \
>        $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
>    else
> diff --git a/arch/mips/vdso/Kconfig b/arch/mips/vdso/Kconfig
> index 7aec721398d5..a665f6108cb5 100644
> --- a/arch/mips/vdso/Kconfig
> +++ b/arch/mips/vdso/Kconfig
> @@ -12,7 +12,7 @@
>  # the lack of relocations. As such, we disable the VDSO for microMIPS builds.
>
>  config MIPS_LD_CAN_LINK_VDSO
> -       def_bool LD_VERSION >= 225000000 || LD_IS_LLD
> +       def_bool LD_VERSION >= 22500 || LD_IS_LLD
>
>  config MIPS_DISABLE_VDSO
>         def_bool CPU_MICROMIPS || (!CPU_MIPSR6 && !MIPS_LD_CAN_LINK_VDSO)
> diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
> index 5c8c06215dd4..6a9a852c3d56 100644
> --- a/arch/powerpc/Makefile
> +++ b/arch/powerpc/Makefile
> @@ -65,7 +65,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y))
>  ifdef CONFIG_PPC32
>  KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
>  else
> -ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
> +ifeq ($(call ld-ifversion, -ge, 22500, y),y)
>  # Have the linker provide sfpr if possible.
>  # There is a corresponding test in arch/powerpc/lib/Makefile
>  KBUILD_LDFLAGS_MODULE += --save-restore-funcs
> diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
> index 69a91b571845..d4efc182662a 100644
> --- a/arch/powerpc/lib/Makefile
> +++ b/arch/powerpc/lib/Makefile
> @@ -31,7 +31,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION)        += error-inject.o
>  # 64-bit linker creates .sfpr on demand for final link (vmlinux),
>  # so it is only needed for modules, and only for older linkers which
>  # do not support --save-restore-funcs
> -ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
> +ifeq ($(call ld-ifversion, -lt, 22500, y),y)
>  extra-$(CONFIG_PPC64)  += crtsavres.o
>  endif
>
> diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
> index f2be0ff9a738..0f8a2c0f9502 100755
> --- a/scripts/ld-version.sh
> +++ b/scripts/ld-version.sh
> @@ -6,6 +6,6 @@
>         gsub(".*version ", "");
>         gsub("-.*", "");
>         split($1,a, ".");
> -       print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
> +       print a[1]*10000 + a[2]*100 + a[3];
>         exit
>         }
> --
> 2.27.0
>


-- 
Best Regards
Masahiro Yamada

^ permalink raw reply

* Re: [PATCH 11/27] m68k: add missing FORCE and fix 'targets' to make if_changed work
From: Geert Uytterhoeven @ 2021-01-28  7:58 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: Linux-Arch, open list:TENSILICA XTENSA PORT (xtensa),
	linux-ia64@vger.kernel.org, Parisc List, linux-kbuild,
	Linux-sh list, the arch/x86 maintainers, linux-um,
	Linux Kernel Mailing List, open list:BROADCOM NVRAM DRIVER,
	linux-m68k, alpha, sparclinux, linuxppc-dev, Linux ARM
In-Reply-To: <20210128005110.2613902-12-masahiroy@kernel.org>

On Thu, Jan 28, 2021 at 1:54 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
> The rules in this Makefile cannot detect the command line change because
> the prerequisite 'FORCE' is missing.
>
> Adding 'FORCE' will result in the headers being rebuilt every time
> because the 'targets' addition is also wrong; the file paths in
> 'targets' must be relative to the current Makefile.
>
> Fix all of them so the if_changed rules work correctly.
>
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH 12/27] m68k: syscalls: switch to generic syscalltbl.sh
From: Geert Uytterhoeven @ 2021-01-28  8:17 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: Linux-Arch, open list:TENSILICA XTENSA PORT (xtensa),
	linux-ia64@vger.kernel.org, Parisc List, linux-kbuild,
	Linux-sh list, the arch/x86 maintainers, linux-um,
	Linux Kernel Mailing List, open list:BROADCOM NVRAM DRIVER,
	linux-m68k, alpha, sparclinux, linuxppc-dev, Linux ARM
In-Reply-To: <20210128005110.2613902-13-masahiroy@kernel.org>

Hi Yamada-san,

On Thu, Jan 28, 2021 at 1:54 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
> As of v5.11-rc1, 12 architectures duplicate similar shell scripts in
> order to generate syscall table headers. My goal is to unify them into
> the single scripts/syscalltbl.sh.
>
> This commit converts m68k to use scripts/syscalltbl.sh.
>
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>

Thanks a lot!

Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH 02/27] x86/syscalls: fix -Wmissing-prototypes warnings from COND_SYSCALL()
From: Sergei Shtylyov @ 2021-01-28  8:00 UTC (permalink / raw)
  To: Masahiro Yamada, linux-arch, x86
  Cc: linux-xtensa, linux-ia64, linux-parisc, linux-kbuild, linux-sh,
	linux-um, linux-kernel, linux-mips, linux-m68k, linux-alpha,
	sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <dd37a7f2-55e1-2e96-0c93-4a40980b8ef2@gmail.com>

On 28.01.2021 10:59, Sergei Shtylyov wrote:

[...]
>> Building kernel/sys_ni.c with W=1 omits tons of -Wmissing-prototypes
> 
>     Emits?
> 
>> warnings.
>>
>> $ make W=1 kernel/sys_ni.o
>>    [ snip ]
>>    CC      kernel/sys_ni.o
>> In file included from kernel/sys_ni.c:10:
>> ./arch/x86/include/asm/syscall_wrapper.h:83:14: warning: no previous 
>> prototype for '__x64_sys_io_setup' [-Wmissing-prototypes]
>>     83 |  __weak long __##abi##_##name(const struct pt_regs *__unused) \
>>        |              ^~
>> ./arch/x86/include/asm/syscall_wrapper.h:100:2: note: in expansion of macro 
>> '__COND_SYSCALL'
>>    100 |  __COND_SYSCALL(x64, sys_##name)
>>        |  ^~~~~~~~~~~~~~
>> ./arch/x86/include/asm/syscall_wrapper.h:256:2: note: in expansion of macro 
>> '__X64_COND_SYSCALL'
>>    256 |  __X64_COND_SYSCALL(name)     \
>>        |  ^~~~~~~~~~~~~~~~~~
>> kernel/sys_ni.c:39:1: note: in expansion of macro 'COND_SYSCALL'
>>     39 | COND_SYSCALL(io_setup);
>>        | ^~~~~~~~~~~~
>> ./arch/x86/include/asm/syscall_wrapper.h:83:14: warning: no previous 
>> prototype for '__ia32_sys_io_setup' [-Wmissing-prototypes]
>>     83 |  __weak long __##abi##_##name(const struct pt_regs *__unused) \
>>        |              ^~
>> ./arch/x86/include/asm/syscall_wrapper.h:120:2: note: in expansion of macro 
>> '__COND_SYSCALL'
>>    120 |  __COND_SYSCALL(ia32, sys_##name)
>>        |  ^~~~~~~~~~~~~~
>> ./arch/x86/include/asm/syscall_wrapper.h:257:2: note: in expansion of macro 
>> '__IA32_COND_SYSCALL'
>>    257 |  __IA32_COND_SYSCALL(name)
>>        |  ^~~~~~~~~~~~~~~~~~~
>> kernel/sys_ni.c:39:1: note: in expansion of macro 'COND_SYSCALL'
>>     39 | COND_SYSCALL(io_setup);
>>        | ^~~~~~~~~~~~
>>    ...
>>
>> __SYS_STUB0() and __SYS_STUBx() defined a few lines above have forward
>> declarations. Let's do likewise for __COND_SYSCALL() to fix the
>> warnings.
>>
>> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
>> ---
>>
>>   arch/x86/include/asm/syscall_wrapper.h | 1 +
>>   1 file changed, 1 insertion(+)
>>
>> diff --git a/arch/x86/include/asm/syscall_wrapper.h 
>> b/arch/x86/include/asm/syscall_wrapper.h
>> index a84333adeef2..80c08c7d5e72 100644
>> --- a/arch/x86/include/asm/syscall_wrapper.h
>> +++ b/arch/x86/include/asm/syscall_wrapper.h
>> @@ -80,6 +80,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs 
>> *regs);
>>       }
>>   #define __COND_SYSCALL(abi, name)                    \
>> +    __weak long __##abi##_##name(const struct pt_regs *__unused);    \
>>       __weak long __##abi##_##name(const struct pt_regs *__unused)    \
> 
>     Aren't these two lines identical?

     Ah, got it! :-)

[...]

MBR, Sergei

^ permalink raw reply

* Re: [PATCH 02/27] x86/syscalls: fix -Wmissing-prototypes warnings from COND_SYSCALL()
From: Sergei Shtylyov @ 2021-01-28  7:59 UTC (permalink / raw)
  To: Masahiro Yamada, linux-arch, x86
  Cc: linux-xtensa, linux-ia64, linux-parisc, linux-kbuild, linux-sh,
	linux-um, linux-kernel, linux-mips, linux-m68k, linux-alpha,
	sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20210128005110.2613902-3-masahiroy@kernel.org>

Hello!

On 28.01.2021 3:50, Masahiro Yamada wrote:

> Building kernel/sys_ni.c with W=1 omits tons of -Wmissing-prototypes

    Emits?

> warnings.
> 
> $ make W=1 kernel/sys_ni.o
>    [ snip ]
>    CC      kernel/sys_ni.o
> In file included from kernel/sys_ni.c:10:
> ./arch/x86/include/asm/syscall_wrapper.h:83:14: warning: no previous prototype for '__x64_sys_io_setup' [-Wmissing-prototypes]
>     83 |  __weak long __##abi##_##name(const struct pt_regs *__unused) \
>        |              ^~
> ./arch/x86/include/asm/syscall_wrapper.h:100:2: note: in expansion of macro '__COND_SYSCALL'
>    100 |  __COND_SYSCALL(x64, sys_##name)
>        |  ^~~~~~~~~~~~~~
> ./arch/x86/include/asm/syscall_wrapper.h:256:2: note: in expansion of macro '__X64_COND_SYSCALL'
>    256 |  __X64_COND_SYSCALL(name)     \
>        |  ^~~~~~~~~~~~~~~~~~
> kernel/sys_ni.c:39:1: note: in expansion of macro 'COND_SYSCALL'
>     39 | COND_SYSCALL(io_setup);
>        | ^~~~~~~~~~~~
> ./arch/x86/include/asm/syscall_wrapper.h:83:14: warning: no previous prototype for '__ia32_sys_io_setup' [-Wmissing-prototypes]
>     83 |  __weak long __##abi##_##name(const struct pt_regs *__unused) \
>        |              ^~
> ./arch/x86/include/asm/syscall_wrapper.h:120:2: note: in expansion of macro '__COND_SYSCALL'
>    120 |  __COND_SYSCALL(ia32, sys_##name)
>        |  ^~~~~~~~~~~~~~
> ./arch/x86/include/asm/syscall_wrapper.h:257:2: note: in expansion of macro '__IA32_COND_SYSCALL'
>    257 |  __IA32_COND_SYSCALL(name)
>        |  ^~~~~~~~~~~~~~~~~~~
> kernel/sys_ni.c:39:1: note: in expansion of macro 'COND_SYSCALL'
>     39 | COND_SYSCALL(io_setup);
>        | ^~~~~~~~~~~~
>    ...
> 
> __SYS_STUB0() and __SYS_STUBx() defined a few lines above have forward
> declarations. Let's do likewise for __COND_SYSCALL() to fix the
> warnings.
> 
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> ---
> 
>   arch/x86/include/asm/syscall_wrapper.h | 1 +
>   1 file changed, 1 insertion(+)
> 
> diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
> index a84333adeef2..80c08c7d5e72 100644
> --- a/arch/x86/include/asm/syscall_wrapper.h
> +++ b/arch/x86/include/asm/syscall_wrapper.h
> @@ -80,6 +80,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
>   	}
>   
>   #define __COND_SYSCALL(abi, name)					\
> +	__weak long __##abi##_##name(const struct pt_regs *__unused);	\
>   	__weak long __##abi##_##name(const struct pt_regs *__unused)	\

    Aren't these two lines identical?

[...]

MBR, Sergei

^ permalink raw reply

* Re: [PATCH] PCI: dwc: layerscape: convert to builtin_platform_driver()
From: Geert Uytterhoeven @ 2021-01-28  9:25 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Roy Zang, Lorenzo Pieralisi, PCI, LKML, Minghuan Lian,
	Michael Walle, linux-arm-kernel, Greg Kroah-Hartman,
	Bjorn Helgaas, linuxppc-dev, Mingkai Hu
In-Reply-To: <CAGETcx_81qOe2LvX-J_PBZWdouykPoPYdf5=yMVhnjgDxAkgaw@mail.gmail.com>

Hi Saravana,

On Wed, Jan 27, 2021 at 6:11 PM Saravana Kannan <saravanak@google.com> wrote:
> On Wed, Jan 27, 2021 at 8:56 AM Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> > On Wed, Jan 27, 2021 at 5:42 PM Saravana Kannan <saravanak@google.com> wrote:
> > > On Tue, Jan 26, 2021 at 11:43 PM Geert Uytterhoeven
> > > <geert@linux-m68k.org> wrote:
> > > > On Wed, Jan 27, 2021 at 1:44 AM Saravana Kannan <saravanak@google.com> wrote:
> > > > > On Tue, Jan 26, 2021 at 12:50 AM Geert Uytterhoeven
> > > > > <geert@linux-m68k.org> wrote:
> > > > > > On Mon, Jan 25, 2021 at 11:42 PM Saravana Kannan <saravanak@google.com> wrote:
> > > > > > > On Mon, Jan 25, 2021 at 11:49 AM Michael Walle <michael@walle.cc> wrote:
> > > > > > > > Am 2021-01-21 12:01, schrieb Geert Uytterhoeven:
> > > > > > > > > On Thu, Jan 21, 2021 at 1:05 AM Saravana Kannan <saravanak@google.com>
> > > > > > > > > wrote:
> > > > > > > > >> On Wed, Jan 20, 2021 at 3:53 PM Michael Walle <michael@walle.cc>
> > > > > > > > >> wrote:
> > > > > > > > >> > Am 2021-01-20 20:47, schrieb Saravana Kannan:
> > > > > > > > >> > > On Wed, Jan 20, 2021 at 11:28 AM Michael Walle <michael@walle.cc>
> > > > > > > > >> > > wrote:
> > > > > > > > >> > >>
> > > > > > > > >> > >> [RESEND, fat-fingered the buttons of my mail client and converted
> > > > > > > > >> > >> all CCs to BCCs :(]
> > > > > > > > >> > >>
> > > > > > > > >> > >> Am 2021-01-20 20:02, schrieb Saravana Kannan:
> > > > > > > > >> > >> > On Wed, Jan 20, 2021 at 6:24 AM Rob Herring <robh@kernel.org> wrote:
> > > > > > > > >> > >> >>
> > > > > > > > >> > >> >> On Wed, Jan 20, 2021 at 4:53 AM Michael Walle <michael@walle.cc>
> > > > > > > > >> > >> >> wrote:
> > > > > > > > >> > >> >> >
> > > > > > > > >> > >> >> > fw_devlink will defer the probe until all suppliers are ready. We can't
> > > > > > > > >> > >> >> > use builtin_platform_driver_probe() because it doesn't retry after probe
> > > > > > > > >> > >> >> > deferral. Convert it to builtin_platform_driver().
> > > > > > > > >> > >> >>
> > > > > > > > >> > >> >> If builtin_platform_driver_probe() doesn't work with fw_devlink, then
> > > > > > > > >> > >> >> shouldn't it be fixed or removed?
> > > > > > > > >> > >> >
> > > > > > > > >> > >> > I was actually thinking about this too. The problem with fixing
> > > > > > > > >> > >> > builtin_platform_driver_probe() to behave like
> > > > > > > > >> > >> > builtin_platform_driver() is that these probe functions could be
> > > > > > > > >> > >> > marked with __init. But there are also only 20 instances of
> > > > > > > > >> > >> > builtin_platform_driver_probe() in the kernel:
> > > > > > > > >> > >> > $ git grep ^builtin_platform_driver_probe | wc -l
> > > > > > > > >> > >> > 20
> > > > > > > > >> > >> >
> > > > > > > > >> > >> > So it might be easier to just fix them to not use
> > > > > > > > >> > >> > builtin_platform_driver_probe().
> > > > > > > > >> > >> >
> > > > > > > > >> > >> > Michael,
> > > > > > > > >> > >> >
> > > > > > > > >> > >> > Any chance you'd be willing to help me by converting all these to
> > > > > > > > >> > >> > builtin_platform_driver() and delete builtin_platform_driver_probe()?
> > > > > > > > >> > >>
> > > > > > > > >> > >> If it just moving the probe function to the _driver struct and
> > > > > > > > >> > >> remove the __init annotations. I could look into that.
> > > > > > > > >> > >
> > > > > > > > >> > > Yup. That's pretty much it AFAICT.
> > > > > > > > >> > >
> > > > > > > > >> > > builtin_platform_driver_probe() also makes sure the driver doesn't ask
> > > > > > > > >> > > for async probe, etc. But I doubt anyone is actually setting async
> > > > > > > > >> > > flags and still using builtin_platform_driver_probe().
> > > > > > > > >> >
> > > > > > > > >> > Hasn't module_platform_driver_probe() the same problem? And there
> > > > > > > > >> > are ~80 drivers which uses that.
> > > > > > > > >>
> > > > > > > > >> Yeah. The biggest problem with all of these is the __init markers.
> > > > > > > > >> Maybe some familiar with coccinelle can help?
> > > > > > > > >
> > > > > > > > > And dropping them will increase memory usage.
> > > > > > > >
> > > > > > > > Although I do have the changes for the builtin_platform_driver_probe()
> > > > > > > > ready, I don't think it makes much sense to send these unless we agree
> > > > > > > > on the increased memory footprint. While there are just a few
> > > > > > > > builtin_platform_driver_probe() and memory increase _might_ be
> > > > > > > > negligible, there are many more module_platform_driver_probe().
> > > > > > >
> > > > > > > While it's good to drop code that'll not be used past kernel init, the
> > > > > > > module_platform_driver_probe() is going even more extreme. It doesn't
> > > > > > > even allow deferred probe (well before kernel init is done). I don't
> > > > > > > think that behavior is right and that's why we should delete it. Also,
> > > > > >
> > > > > > This construct is typically used for builtin hardware for which the
> > > > > > dependencies are registered very early, and thus known to probe at
> > > > > > first try (if present).
> > > > > >
> > > > > > > I doubt if any of these probe functions even take up 4KB of memory.
> > > > > >
> > > > > > How many 4 KiB pages do you have in a system with 10 MiB of SRAM?
> > > > > > How many can you afford to waste?
> > > > >
> > > > > There are only a few instances of this macro in the kernel. How many
> > > >
> > > > $ git grep -lw builtin_platform_driver_probe | wc -l
> > > > 21
> > > > $ git grep -lw module_platform_driver_probe | wc -l
> > > > 86
> > > >
> > > > + the ones that haven't been converted to the above yet:
> > > >
> > > > $ git grep -lw platform_driver_probe | wc -l
> > > > 58
> > > >
> > >
> > > Yeah, this adds up in terms of the number of places we'd need to fix.
> > > But thinking more about it, a couple of points:
> > > 1. Not all builtin_platform_driver_probe() are problems for
> > > fw_devlink. So we can just fix them as we go if we need to.
> > >
> > > 2. The problem with builtin_platform_driver_probe() isn't really with
> > > the use of __init. It's the fact that it doesn't allow deferred
> > > probes. builtin_platform_driver_probe()/platform_driver_probe() could
> > > still be fixed up to allow deferred probe until we get to the point
> > > where we free the __init section (so at least till late_initcall).
> >
> > That's intentional: it is used for cases that will (must) never be deferred.
> > That's why it's safe to use __init.
>
> So was the usage of builtin_platform_driver_probe() wrong in the
> driver Michael fixed? Because, deferring and probing again clearly
> works?

It wasn't.  The regression is that the driver no longer probes at first
try, because its dependencies are now probed later.  The question is:
why are the dependencies now probed later?  How to fix that?

> Also, "must never be deferred" seems like a weird condition to
> enforce. I think the real "rule" is that if it defers, the platform is
> not expected to work. But disallowing a probe reattempt seems weird.
> What is it going to hurt if it's attempted again? At worst it fails
> one more time?

"must never be deferred" is not the real condition, but "must not be
probed after __init is freed" is (one of them, there may be other, cfr.
the last paragraph below).  The simplest way to guarantee that is to
probe the driver immediately, and only once.

> Also, I'd argue that all/most of the "can't defer, but I'm still a
> proper struct device" cases are all just patchwork to deal with the
> fact we were playing initcall chicken when there was no fw_devlink.
> I'm hoping we can move people away from that mindset. And the first

I agree, partially.  Still, how come the dependencies are no longer
probed before their consumers when fw_devlinks are enabled?
I thought fw_devlinks is supposed to avoid exactly that?

> step towards that would be to allow *platform_probe() to allow
> deferred probes until late_initcall().

At which increase of complexity, to keep track of which drivers can and
cannot be reprobed anymore after late_initcall?
Still, many of these drivers use platform_driver_probe() early for a
reason: because they need to initialize the hardware early. Probing them
later may introduce subtle bugs.

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH v6 08/39] powerpc: rearrange do_page_fault error case to be inside exception_enter
From: Christophe Leroy @ 2021-01-28  9:25 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev
In-Reply-To: <20210115165012.1260253-9-npiggin@gmail.com>



Le 15/01/2021 à 17:49, Nicholas Piggin a écrit :
> This keeps the context tracking over the entire interrupt handler which
> helps later with moving context tracking into interrupt wrappers.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/mm/fault.c | 28 ++++++++++++++++------------
>   1 file changed, 16 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index e476d7701413..e4121fd9fcf1 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -544,20 +544,24 @@ NOKPROBE_SYMBOL(__do_page_fault);
>   
>   long do_page_fault(struct pt_regs *regs)
>   {
> -	const struct exception_table_entry *entry;
> -	enum ctx_state prev_state = exception_enter();
> -	int rc = __do_page_fault(regs, regs->dar, regs->dsisr);
> -	exception_exit(prev_state);
> -	if (likely(!rc))
> -		return 0;
> -
> -	entry = search_exception_tables(regs->nip);
> -	if (unlikely(!entry))
> -		return rc;

Could we keep this layout with using a 'goto' to the end of the function, instead of pushing error 
handling to the right ?

Because at the end of the series once all context tracking is gone into helpers, the result looks 
unfriendly.

It would look cleaner as:

static long __do_page_fault(struct pt_regs *regs)
{
	long err;
	const struct exception_table_entry *entry;

	err = ___do_page_fault(regs, regs->dar, regs->dsisr);
	if (likely(!err))
		return 0;

	entry = search_exception_tables(regs->nip);
	if (likely(entry)) {
		instruction_pointer_set(regs, extable_fixup(entry));
		return 0;
	} else if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
		/* 32 and 64e handle this in asm */
		return err;
	}
	__bad_page_fault(regs, err);
	return 0;
}
NOKPROBE_SYMBOL(__do_page_fault);



> +	enum ctx_state prev_state;
> +	long err;
> +
> +	prev_state = exception_enter();
> +	err = __do_page_fault(regs, regs->dar, regs->dsisr);
> +	if (unlikely(err)) {
> +		const struct exception_table_entry *entry;
> +
> +		entry = search_exception_tables(regs->nip);
> +		if (likely(entry)) {
> +			instruction_pointer_set(regs, extable_fixup(entry));
> +			err = 0;
> +		}
> +	}
>   
> -	instruction_pointer_set(regs, extable_fixup(entry));
> +	exception_exit(prev_state);
>   
> -	return 0;
> +	return err;
>   }
>   NOKPROBE_SYMBOL(do_page_fault);
>   
> 

^ permalink raw reply

* Re: [PATCH v4 2/2] powerpc/mce: Remove per cpu variables from MCE handlers
From: Ganesh @ 2021-01-28  9:27 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev, mpe; +Cc: mahesh, npiggin
In-Reply-To: <90f24b44-1747-21f4-3829-7af20cf95e46@csgroup.eu>

On 1/25/21 2:54 PM, Christophe Leroy wrote:

>
>
> Le 22/01/2021 à 13:32, Ganesh Goudar a écrit :
>> Access to per-cpu variables requires translation to be enabled on
>> pseries machine running in hash mmu mode, Since part of MCE handler
>> runs in realmode and part of MCE handling code is shared between ppc
>> architectures pseries and powernv, it becomes difficult to manage
>> these variables differently on different architectures, So have
>> these variables in paca instead of having them as per-cpu variables
>> to avoid complications.
>>
>> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
>> ---
>> v2: Dynamically allocate memory for machine check event info
>>
>> v3: Remove check for hash mmu lpar, use memblock_alloc_try_nid
>>      to allocate memory.
>>
>> v4: Spliting the patch into two.
>> ---
>>   arch/powerpc/include/asm/mce.h     | 18 +++++++
>>   arch/powerpc/include/asm/paca.h    |  4 ++
>>   arch/powerpc/kernel/mce.c          | 79 ++++++++++++++++++------------
>>   arch/powerpc/kernel/setup-common.c |  2 +-
>>   4 files changed, 70 insertions(+), 33 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/setup-common.c 
>> b/arch/powerpc/kernel/setup-common.c
>> index 71f38e9248be..17dc451f0e45 100644
>> --- a/arch/powerpc/kernel/setup-common.c
>> +++ b/arch/powerpc/kernel/setup-common.c
>> @@ -916,7 +916,6 @@ void __init setup_arch(char **cmdline_p)
>>       /* On BookE, setup per-core TLB data structures. */
>>       setup_tlb_core_data();
>>   #endif
>> -
>
> This line removal is really required for this patch ?
I will correct it, Thanks for catching.
>
>>       /* Print various info about the machine that has been gathered 
>> so far. */
>>       print_system_info();
>>   @@ -938,6 +937,7 @@ void __init setup_arch(char **cmdline_p)
>>       exc_lvl_early_init();
>>       emergency_stack_init();
>>   +    mce_init();
>
> You have to include mce.h to avoid build failure on PPC32.
Sure, thanks
>>       smp_release_cpus();
>>         initmem_init();
>>

^ permalink raw reply

* Re: [PATCH] PCI: dwc: layerscape: convert to builtin_platform_driver()
From: Tony Lindgren @ 2021-01-28 10:00 UTC (permalink / raw)
  To: Michael Walle
  Cc: Roy Zang, Lorenzo Pieralisi, Saravana Kannan, PCI, LKML,
	Kishon Vijay Abraham I, Minghuan Lian, Geert Uytterhoeven,
	Mingkai Hu, Greg Kroah-Hartman, Bjorn Helgaas, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <a24391e62b107040435766fff52bdd31@walle.cc>

Hi,

* Michael Walle <michael@walle.cc> [210125 19:52]:
> Although I do have the changes for the builtin_platform_driver_probe()
> ready, I don't think it makes much sense to send these unless we agree
> on the increased memory footprint. While there are just a few
> builtin_platform_driver_probe() and memory increase _might_ be
> negligible, there are many more module_platform_driver_probe().

I just noticed this thread today and have pretty much come to the same
conclusions. No need to post a patch for pci-dra7xx.c, I already posted
a patch for pci-dra7xx.c yesterday as part of genpd related changes.

For me probing started breaking as the power-domains property got added.

FYI, it's the following patch:

[PATCH 01/15] PCI: pci-dra7xx: Prepare for deferred probe with module_platform_driver

Regards,

Tony



^ permalink raw reply

* Re: [PATCH] PCI: dwc: layerscape: convert to builtin_platform_driver()
From: Tony Lindgren @ 2021-01-28 10:35 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Roy Zang, Lorenzo Pieralisi, Saravana Kannan, PCI, LKML,
	Minghuan Lian, Michael Walle, Mingkai Hu, Greg Kroah-Hartman,
	Bjorn Helgaas, linuxppc-dev, linux-arm-kernel
In-Reply-To: <CAMuHMdVHouzMFiGcUz=0M0_RFL-OBvkRvQiF5h56XKDMZuC7Kg@mail.gmail.com>

* Geert Uytterhoeven <geert@linux-m68k.org> [210128 09:32]:
> It wasn't.  The regression is that the driver no longer probes at first
> try, because its dependencies are now probed later.  The question is:
> why are the dependencies now probed later?  How to fix that?

I'm afraid that may be unfixable.. It depends on things like the bus
driver probe that might get also deferred.

As suggested, I agree it's best to get rid of builtin_platform_driver_probe
where possible at the cost of dropping the __init as needed.

To me it seems we can't even add a warning to __platform_driver_probe()
if there's drv->driver.of_match_table for example. That warning would
show up on all the devices with driver in question built in even if
the device has no such hardware.

Regards,

Tony

^ permalink raw reply

* RE: [PATCH v4 02/10] powerpc/signal: Add unsafe_copy_{vsx, fpr}_from_user()
From: David Laight @ 2021-01-28 10:38 UTC (permalink / raw)
  To: 'Christopher M. Riedl', linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20210128040424.12720-3-cmr@codefail.de>

From: Christopher M. Riedl
> Sent: 28 January 2021 04:04
> 
> Reuse the "safe" implementation from signal.c except for calling
> unsafe_copy_from_user() to copy into a local buffer.
> 
> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> ---
>  arch/powerpc/kernel/signal.h | 33 +++++++++++++++++++++++++++++++++
>  1 file changed, 33 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
> index 2559a681536e..c18402d625f1 100644
> --- a/arch/powerpc/kernel/signal.h
> +++ b/arch/powerpc/kernel/signal.h
> @@ -53,6 +53,33 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
>  				&buf[i], label);\
>  } while (0)
> 
> +#define unsafe_copy_fpr_from_user(task, from, label)	do {		\
> +	struct task_struct *__t = task;					\
> +	u64 __user *__f = (u64 __user *)from;				\
> +	u64 buf[ELF_NFPREG];						\

How big is that buffer?
Isn't is likely to be reasonably large compared to a reasonable
kernel stack frame.
Especially since this isn't even a leaf function.

> +	int i;								\
> +									\
> +	unsafe_copy_from_user(buf, __f, ELF_NFPREG * sizeof(double),	\

That really ought to be sizeof(buf).

	David


> +				label);					\
> +	for (i = 0; i < ELF_NFPREG - 1; i++)				\
> +		__t->thread.TS_FPR(i) = buf[i];				\
> +	__t->thread.fp_state.fpscr = buf[i];				\
> +} while (0)
> +
> +#define unsafe_copy_vsx_from_user(task, from, label)	do {		\
> +	struct task_struct *__t = task;					\
> +	u64 __user *__f = (u64 __user *)from;				\
> +	u64 buf[ELF_NVSRHALFREG];					\
> +	int i;								\
> +									\
> +	unsafe_copy_from_user(buf, __f,					\
> +				ELF_NVSRHALFREG * sizeof(double),	\
> +				label);					\
> +	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
> +		__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];	\
> +} while (0)
> +
> +
>  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>  #define unsafe_copy_ckfpr_to_user(to, task, label)	do {		\
>  	struct task_struct *__t = task;					\
> @@ -80,6 +107,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
>  	unsafe_copy_to_user(to, (task)->thread.fp_state.fpr,	\
>  			    ELF_NFPREG * sizeof(double), label)
> 
> +#define unsafe_copy_fpr_from_user(task, from, label)			\
> +	unsafe_copy_from_user((task)->thread.fp_state.fpr, from,	\
> +			    ELF_NFPREG * sizeof(double), label)
> +
>  static inline unsigned long
>  copy_fpr_to_user(void __user *to, struct task_struct *task)
>  {
> @@ -115,6 +146,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
>  #else
>  #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
> 
> +#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
> +
>  static inline unsigned long
>  copy_fpr_to_user(void __user *to, struct task_struct *task)
>  {
> --
> 2.26.1

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox