linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [RFC V1 0/6] Generic Entry/Exit support for ppc64
@ 2025-04-28 15:22 Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 1/6] powerpc: rename arch_irq_disabled_regs Mukesh Kumar Chaurasiya
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-04-28 15:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, sshegde, bigeasy, kees,
	oleg, peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt,
	atrajeev, anjalik, coltonlewis, linuxppc-dev,
	Mukesh Kumar Chaurasiya

This is a syscall only implementation of generic entry/exit framework
for framework for ppc. IRQ handling is not done in this RFC. 

This will break the ppc32 build as of now which will be fixed along with
IRQ handling.

Below are the performance benchmarks from perf bench basic syscall.
This is for 1,00,00,000 getppid() calls

| Metric     | Without Generic Framework | With Generic Framework |
| ---------- | ------------------------- | ---------------------- |
| Total time | 0.904 [sec]               | 0.856 [sec]            |
| usecs/op   | 0.090403                  | 0.085638               |
| ops/sec    | 1,10,61,579               | 1,16,77,086            |

That's ~5% degradation as of now.

Mukesh Kumar Chaurasiya (6):
  powerpc: rename arch_irq_disabled_regs
  powerpc: Prepare to build with genreic entry/exit framework
  powerpc: introduce arch_enter_from_user_mode
  powerpc: Add flag in paca for register restore state
  powerpc: Introduce syscall exit arch functions
  powerpc: Enable Generic Entry/Exit for syscalls.

 arch/powerpc/Kconfig                    |   1 +
 arch/powerpc/include/asm/entry-common.h | 158 ++++++++++++++++++++++++
 arch/powerpc/include/asm/hw_irq.h       |   4 +-
 arch/powerpc/include/asm/interrupt.h    | 117 +++++++++++++++++-
 arch/powerpc/include/asm/paca.h         |   1 +
 arch/powerpc/include/asm/stacktrace.h   |   8 ++
 arch/powerpc/include/asm/syscall.h      |   5 +
 arch/powerpc/include/asm/thread_info.h  |   1 +
 arch/powerpc/kernel/interrupt.c         | 153 ++++++-----------------
 arch/powerpc/kernel/ptrace/ptrace.c     | 103 ---------------
 arch/powerpc/kernel/signal.c            |   8 ++
 arch/powerpc/kernel/syscall.c           | 117 +-----------------
 arch/powerpc/kernel/traps.c             |   2 +-
 arch/powerpc/kernel/watchdog.c          |   2 +-
 arch/powerpc/perf/core-book3s.c         |   2 +-
 15 files changed, 336 insertions(+), 346 deletions(-)
 create mode 100644 arch/powerpc/include/asm/entry-common.h

-- 
2.49.0



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC V1 1/6] powerpc: rename arch_irq_disabled_regs
  2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
@ 2025-04-28 15:22 ` Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 2/6] powerpc: Prepare to build with genreic entry/exit framework Mukesh Kumar Chaurasiya
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-04-28 15:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, sshegde, bigeasy, kees,
	oleg, peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt,
	atrajeev, anjalik, coltonlewis, linuxppc-dev,
	Mukesh Kumar Chaurasiya

Renaming arch_irq_disabled_regs to regs_irqs_disabled to be used
commonly in generic entry exit framework and ppc arch code.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
---
 arch/powerpc/include/asm/hw_irq.h    |  4 ++--
 arch/powerpc/include/asm/interrupt.h | 12 ++++++------
 arch/powerpc/kernel/interrupt.c      |  4 ++--
 arch/powerpc/kernel/syscall.c        |  2 +-
 arch/powerpc/kernel/traps.c          |  2 +-
 arch/powerpc/kernel/watchdog.c       |  2 +-
 arch/powerpc/perf/core-book3s.c      |  2 +-
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 569ac1165b069..2b9cf0380e0e9 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -393,7 +393,7 @@ static inline void do_hard_irq_enable(void)
 	__hard_irq_enable();
 }
 
-static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
+static inline bool regs_irqs_disabled(struct pt_regs *regs)
 {
 	return (regs->softe & IRQS_DISABLED);
 }
@@ -466,7 +466,7 @@ static inline bool arch_irqs_disabled(void)
 
 #define hard_irq_disable()		arch_local_irq_disable()
 
-static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
+static inline bool regs_irqs_disabled(struct pt_regs *regs)
 {
 	return !(regs->msr & MSR_EE);
 }
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 23638d4e73ac0..56bc8113b8cde 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -172,7 +172,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
 	/* Enable MSR[RI] early, to support kernel SLB and hash faults */
 #endif
 
-	if (!arch_irq_disabled_regs(regs))
+	if (!regs_irqs_disabled(regs))
 		trace_hardirqs_off();
 
 	if (user_mode(regs)) {
@@ -192,10 +192,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
 			CT_WARN_ON(ct_state() != CT_STATE_KERNEL &&
 				   ct_state() != CT_STATE_IDLE);
 		INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
-		INT_SOFT_MASK_BUG_ON(regs, arch_irq_disabled_regs(regs) &&
+		INT_SOFT_MASK_BUG_ON(regs, regs_irqs_disabled(regs) &&
 					   search_kernel_restart_table(regs->nip));
 	}
-	INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) &&
+	INT_SOFT_MASK_BUG_ON(regs, !regs_irqs_disabled(regs) &&
 				   !(regs->msr & MSR_EE));
 
 	booke_restore_dbcr0();
@@ -298,7 +298,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 		 * Adjust regs->softe to be soft-masked if it had not been
 		 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
 		 * not yet set disabled), or if it was in an implicit soft
-		 * masked state. This makes arch_irq_disabled_regs(regs)
+		 * masked state. This makes regs_irqs_disabled(regs)
 		 * behave as expected.
 		 */
 		regs->softe = IRQS_ALL_DISABLED;
@@ -372,7 +372,7 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
 
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC_BOOK3S
-	if (arch_irq_disabled_regs(regs)) {
+	if (regs_irqs_disabled(regs)) {
 		unsigned long rst = search_kernel_restart_table(regs->nip);
 		if (rst)
 			regs_set_return_ip(regs, rst);
@@ -661,7 +661,7 @@ void replay_soft_interrupts(void);
 
 static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
 {
-	if (!arch_irq_disabled_regs(regs))
+	if (!regs_irqs_disabled(regs))
 		local_irq_enable();
 }
 
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 8f4acc55407b0..f656192f075fb 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -343,7 +343,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
 	unsigned long ret;
 
 	BUG_ON(regs_is_unrecoverable(regs));
-	BUG_ON(arch_irq_disabled_regs(regs));
+	BUG_ON(regs_irqs_disabled(regs));
 	CT_WARN_ON(ct_state() == CT_STATE_USER);
 
 	/*
@@ -392,7 +392,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 
 	local_irq_disable();
 
-	if (!arch_irq_disabled_regs(regs)) {
+	if (!regs_irqs_disabled(regs)) {
 		/* Returning to a kernel context with local irqs enabled. */
 		WARN_ON_ONCE(!(regs->msr & MSR_EE));
 again:
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index be159ad4b77bd..9f03a6263fb41 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -32,7 +32,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 
 	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(!user_mode(regs));
-	BUG_ON(arch_irq_disabled_regs(regs));
+	BUG_ON(regs_irqs_disabled(regs));
 
 #ifdef CONFIG_PPC_PKEY
 	if (mmu_has_feature(MMU_FTR_PKEY)) {
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index cb8e9357383e9..629f2a2d4780e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1956,7 +1956,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
 	 * prevent hash faults on user addresses when reading callchains (and
 	 * looks better from an irq tracing perspective).
 	 */
-	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
+	if (IS_ENABLED(CONFIG_PPC64) && unlikely(regs_irqs_disabled(regs)))
 		performance_monitor_exception_nmi(regs);
 	else
 		performance_monitor_exception_async(regs);
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 2429cb1c7baa7..6111cbbde069d 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -373,7 +373,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
 	u64 tb;
 
 	/* should only arrive from kernel, with irqs disabled */
-	WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
+	WARN_ON_ONCE(!regs_irqs_disabled(regs));
 
 	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
 		return 0;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b906d28f74fd4..35f5f33f5777e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2483,7 +2483,7 @@ static void __perf_event_interrupt(struct pt_regs *regs)
 	 * will trigger a PMI after waking up from idle. Since counter values are _not_
 	 * saved/restored in idle path, can lead to below "Can't find PMC" message.
 	 */
-	if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+	if (unlikely(!found) && !regs_irqs_disabled(regs))
 		printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
 
 	/*
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC V1 2/6] powerpc: Prepare to build with genreic entry/exit framework
  2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 1/6] powerpc: rename arch_irq_disabled_regs Mukesh Kumar Chaurasiya
@ 2025-04-28 15:22 ` Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 3/6] powerpc: introduce arch_enter_from_user_mode Mukesh Kumar Chaurasiya
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-04-28 15:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, sshegde, bigeasy, kees,
	oleg, peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt,
	atrajeev, anjalik, coltonlewis, linuxppc-dev,
	Mukesh Kumar Chaurasiya

Enabling build with generic entry/exit framework for powerpc
architecture requires few necessary steps. This patch just takes care
of the preparation.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
---
 arch/powerpc/include/asm/entry-common.h | 11 +++++++++++
 arch/powerpc/include/asm/stacktrace.h   |  8 ++++++++
 arch/powerpc/include/asm/syscall.h      |  5 +++++
 arch/powerpc/include/asm/thread_info.h  |  1 +
 4 files changed, 25 insertions(+)
 create mode 100644 arch/powerpc/include/asm/entry-common.h

diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
new file mode 100644
index 0000000000000..e9e4220a1d225
--- /dev/null
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_PPC_ENTRY_COMMON_H
+#define _ASM_PPC_ENTRY_COMMON_H
+
+#ifdef CONFIG_GENERIC_ENTRY
+
+#include <asm/stacktrace.h>
+
+#endif /* CONFIG_GENERIC_ENTRY */
+#endif /* _ASM_PPC_ENTRY_COMMON_H */
diff --git a/arch/powerpc/include/asm/stacktrace.h b/arch/powerpc/include/asm/stacktrace.h
index 6149b53b3bc8e..3f0a242468813 100644
--- a/arch/powerpc/include/asm/stacktrace.h
+++ b/arch/powerpc/include/asm/stacktrace.h
@@ -8,6 +8,14 @@
 #ifndef _ASM_POWERPC_STACKTRACE_H
 #define _ASM_POWERPC_STACKTRACE_H
 
+#include <linux/sched.h>
+
 void show_user_instructions(struct pt_regs *regs);
 
+static inline bool on_thread_stack(void)
+{
+	return !(((unsigned long)(current->stack) ^ current_stack_pointer)
+			& ~(THREAD_SIZE -1));
+}
+
 #endif /* _ASM_POWERPC_STACKTRACE_H */
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 3dd36c5e334a9..0e94806c7bfe6 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -119,4 +119,9 @@ static inline int syscall_get_arch(struct task_struct *task)
 	else
 		return AUDIT_ARCH_PPC64;
 }
+
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+	return false;
+}
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 2785c7462ebf7..d0e87c9bae0b0 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -54,6 +54,7 @@
 struct thread_info {
 	int		preempt_count;		/* 0 => preemptable,
 						   <0 => BUG */
+	unsigned long	syscall_work;		/* SYSCALL_WORK_ flags */
 #ifdef CONFIG_SMP
 	unsigned int	cpu;
 #endif
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC V1 3/6] powerpc: introduce arch_enter_from_user_mode
  2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 1/6] powerpc: rename arch_irq_disabled_regs Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 2/6] powerpc: Prepare to build with genreic entry/exit framework Mukesh Kumar Chaurasiya
@ 2025-04-28 15:22 ` Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 4/6] powerpc: Add flag in paca for register restore state Mukesh Kumar Chaurasiya
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-04-28 15:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, sshegde, bigeasy, kees,
	oleg, peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt,
	atrajeev, anjalik, coltonlewis, linuxppc-dev,
	Mukesh Kumar Chaurasiya

Introduce arch_enter_from_user_mode required by syscall entry.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
---
 arch/powerpc/include/asm/entry-common.h | 96 +++++++++++++++++++++++++
 arch/powerpc/include/asm/interrupt.h    | 23 ++++++
 arch/powerpc/kernel/interrupt.c         | 22 ------
 3 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
index e9e4220a1d225..804f6d019ec95 100644
--- a/arch/powerpc/include/asm/entry-common.h
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -5,7 +5,103 @@
 
 #ifdef CONFIG_GENERIC_ENTRY
 
+#include <asm/cputime.h>
+#include <asm/interrupt.h>
 #include <asm/stacktrace.h>
+#include <asm/tm.h>
+
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
+{
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+	BUG_ON(regs_is_unrecoverable(regs));
+	BUG_ON(!user_mode(regs));
+	BUG_ON(regs_irqs_disabled(regs));
+
+#ifdef CONFIG_PPC_PKEY
+	if (mmu_has_feature(MMU_FTR_PKEY)) {
+		unsigned long amr, iamr;
+		bool flush_needed = false;
+		/*
+		 * When entering from userspace we mostly have the AMR/IAMR
+		 * different from kernel default values. Hence don't compare.
+		 */
+		amr = mfspr(SPRN_AMR);
+		iamr = mfspr(SPRN_IAMR);
+		regs->amr  = amr;
+		regs->iamr = iamr;
+		if (mmu_has_feature(MMU_FTR_KUAP)) {
+			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+			flush_needed = true;
+		}
+		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+			flush_needed = true;
+		}
+		if (flush_needed)
+			isync();
+	} else
+#endif
+		kuap_assert_locked();
+
+	booke_restore_dbcr0();
+
+	account_cpu_user_entry();
+
+	account_stolen_time();
+
+	/*
+	 * This is not required for the syscall exit path, but makes the
+	 * stack frame look nicer. If this was initialised in the first stack
+	 * frame, or if the unwinder was taught the first stack frame always
+	 * returns to user with IRQS_ENABLED, this store could be avoided!
+	 */
+	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
+
+	/*
+	 * If system call is called with TM active, set _TIF_RESTOREALL to
+	 * prevent RFSCV being used to return to userspace, because POWER9
+	 * TM implementation has problems with this instruction returning to
+	 * transactional state. Final register values are not relevant because
+	 * the transaction will be aborted upon return anyway. Or in the case
+	 * of unsupported_scv SIGILL fault, the return state does not much
+	 * matter because it's an edge case.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
+		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
+
+	/*
+	 * If the system call was made with a transaction active, doom it and
+	 * return without performing the system call. Unless it was an
+	 * unsupported scv vector, in which case it's treated like an illegal
+	 * instruction.
+	 */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
+	    !trap_is_unsupported_scv(regs)) {
+		/* Enable TM in the kernel, and disable EE (for scv) */
+		hard_irq_disable();
+		mtmsr(mfmsr() | MSR_TM);
+
+		/* tabort, this dooms the transaction, nothing else */
+		asm volatile(".long 0x7c00071d | ((%0) << 16)"
+				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
+
+		/*
+		 * Userspace will never see the return value. Execution will
+		 * resume after the tbegin. of the aborted transaction with the
+		 * checkpointed register state. A context switch could occur
+		 * or signal delivered to the process before resuming the
+		 * doomed transaction context, but that should all be handled
+		 * as expected.
+		 */
+		return;
+	}
+#endif // CONFIG_PPC_TRANSACTIONAL_MEM
+}
+#define arch_enter_from_user_mode arch_enter_from_user_mode
 
 #endif /* CONFIG_GENERIC_ENTRY */
 #endif /* _ASM_PPC_ENTRY_COMMON_H */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 56bc8113b8cde..6edf064a0fea2 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -138,6 +138,29 @@ static inline void nap_adjust_return(struct pt_regs *regs)
 #endif
 }
 
+static inline void booke_load_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+       unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+       if (likely(!(dbcr0 & DBCR0_IDM)))
+               return;
+
+       /*
+        * Check to see if the dbcr0 register is set up to debug.
+        * Use the internal debug mode bit to do this.
+        */
+       mtmsr(mfmsr() & ~MSR_DE);
+       if (IS_ENABLED(CONFIG_PPC32)) {
+               isync();
+               global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
+       }
+       mtspr(SPRN_DBCR0, dbcr0);
+       mtspr(SPRN_DBSR, -1);
+#endif
+}
+
+
 static inline void booke_restore_dbcr0(void)
 {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index f656192f075fb..44afc65e0e0e0 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -74,28 +74,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
 	return true;
 }
 
-static notrace void booke_load_dbcr0(void)
-{
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	unsigned long dbcr0 = current->thread.debug.dbcr0;
-
-	if (likely(!(dbcr0 & DBCR0_IDM)))
-		return;
-
-	/*
-	 * Check to see if the dbcr0 register is set up to debug.
-	 * Use the internal debug mode bit to do this.
-	 */
-	mtmsr(mfmsr() & ~MSR_DE);
-	if (IS_ENABLED(CONFIG_PPC32)) {
-		isync();
-		global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
-	}
-	mtspr(SPRN_DBCR0, dbcr0);
-	mtspr(SPRN_DBSR, -1);
-#endif
-}
-
 static notrace void check_return_regs_valid(struct pt_regs *regs)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC V1 4/6] powerpc: Add flag in paca for register restore state
  2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
                   ` (2 preceding siblings ...)
  2025-04-28 15:22 ` [RFC V1 3/6] powerpc: introduce arch_enter_from_user_mode Mukesh Kumar Chaurasiya
@ 2025-04-28 15:22 ` Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 5/6] powerpc: Introduce syscall exit arch functions Mukesh Kumar Chaurasiya
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-04-28 15:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, sshegde, bigeasy, kees,
	oleg, peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt,
	atrajeev, anjalik, coltonlewis, linuxppc-dev,
	Mukesh Kumar Chaurasiya

In case of signal being processed we want to restore the GPRS. To save the state
of whether we need to restore the registers or not a flag is introduced so that
we can save the state for that cpu in case we want to save the register state.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
---
 arch/powerpc/include/asm/paca.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1d58da9467396..215cafd64d8f5 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -174,6 +174,7 @@ struct paca_struct {
 	u8 irq_soft_mask;		/* mask for irq soft masking */
 	u8 irq_happened;		/* irq happened while soft-disabled */
 	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
+	u8 generic_fw_flags;		/* Flags for generic framework */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	u8 pmcregs_in_use;		/* pseries puts this in lppaca */
 #endif
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC V1 5/6] powerpc: Introduce syscall exit arch functions
  2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
                   ` (3 preceding siblings ...)
  2025-04-28 15:22 ` [RFC V1 4/6] powerpc: Add flag in paca for register restore state Mukesh Kumar Chaurasiya
@ 2025-04-28 15:22 ` Mukesh Kumar Chaurasiya
  2025-04-28 15:22 ` [RFC V1 6/6] powerpc: Enable Generic Entry/Exit for syscalls Mukesh Kumar Chaurasiya
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-04-28 15:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, sshegde, bigeasy, kees,
	oleg, peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt,
	atrajeev, anjalik, coltonlewis, linuxppc-dev,
	Mukesh Kumar Chaurasiya

Introducing following functions for syscall exit
 - arch_exit_to_user_mode_work
 - arch_exit_to_user_mode_work_prepare

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
---
 arch/powerpc/include/asm/entry-common.h | 51 +++++++++++++++
 arch/powerpc/include/asm/interrupt.h    | 82 +++++++++++++++++++++++++
 arch/powerpc/kernel/interrupt.c         | 81 ------------------------
 arch/powerpc/kernel/signal.c            | 14 +++++
 4 files changed, 147 insertions(+), 81 deletions(-)

diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
index 804f6d019ec95..04db70afdd820 100644
--- a/arch/powerpc/include/asm/entry-common.h
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -8,8 +8,14 @@
 #include <asm/cputime.h>
 #include <asm/interrupt.h>
 #include <asm/stacktrace.h>
+#include <asm/switch_to.h>
 #include <asm/tm.h>
 
+/*
+ * flags for paca->generic_fw_flags
+ */
+#define GFW_RESTORE_ALL 0x01
+
 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
@@ -101,7 +107,52 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 	}
 #endif // CONFIG_PPC_TRANSACTIONAL_MEM
 }
+
 #define arch_enter_from_user_mode arch_enter_from_user_mode
 
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+		unsigned long ti_work)
+{
+	unsigned long mathflags;
+
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
+		if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+		    unlikely((ti_work & _TIF_RESTORE_TM))) {
+			restore_tm_state(regs);
+		} else {
+			mathflags = MSR_FP;
+
+			if (cpu_has_feature(CPU_FTR_VSX))
+				mathflags |= MSR_VEC | MSR_VSX;
+			else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				mathflags |= MSR_VEC;
+
+			/*
+			 * If userspace MSR has all available FP bits set,
+			 * then they are live and no need to restore. If not,
+			 * it means the regs were given up and restore_math
+			 * may decide to restore them (to avoid taking an FP
+			 * fault).
+			 */
+			if ((regs->msr & mathflags) != mathflags)
+				restore_math(regs);
+		}
+	}
+
+	check_return_regs_valid(regs);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	local_paca->tm_scratch = regs->msr;
+#endif
+}
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+
+static __always_inline void arch_exit_to_user_mode(void)
+{
+	booke_load_dbcr0();
+
+	account_cpu_user_exit();
+}
+#define arch_exit_to_user_mode arch_exit_to_user_mode
+
 #endif /* CONFIG_GENERIC_ENTRY */
 #endif /* _ASM_PPC_ENTRY_COMMON_H */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 6edf064a0fea2..c6ab286a723f2 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -68,6 +68,8 @@
 
 #include <linux/context_tracking.h>
 #include <linux/hardirq.h>
+#include <linux/sched/debug.h> /* for show_regs */
+
 #include <asm/cputime.h>
 #include <asm/firmware.h>
 #include <asm/ftrace.h>
@@ -173,6 +175,86 @@ static inline void booke_restore_dbcr0(void)
 #endif
 }
 
+static inline void check_return_regs_valid(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	unsigned long trap, srr0, srr1;
+	static bool warned;
+	u8 *validp;
+	char *h;
+
+	if (trap_is_scv(regs))
+		return;
+
+	trap = TRAP(regs);
+	// EE in HV mode sets HSRRs like 0xea0
+	if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+		trap = 0xea0;
+
+	switch (trap) {
+	case 0x980:
+	case INTERRUPT_H_DATA_STORAGE:
+	case 0xe20:
+	case 0xe40:
+	case INTERRUPT_HMI:
+	case 0xe80:
+	case 0xea0:
+	case INTERRUPT_H_FAC_UNAVAIL:
+	case 0x1200:
+	case 0x1500:
+	case 0x1600:
+	case 0x1800:
+		validp = &local_paca->hsrr_valid;
+		if (!READ_ONCE(*validp))
+			return;
+
+		srr0 = mfspr(SPRN_HSRR0);
+		srr1 = mfspr(SPRN_HSRR1);
+		h = "H";
+
+		break;
+	default:
+		validp = &local_paca->srr_valid;
+		if (!READ_ONCE(*validp))
+			return;
+
+		srr0 = mfspr(SPRN_SRR0);
+		srr1 = mfspr(SPRN_SRR1);
+		h = "";
+		break;
+	}
+
+	if (srr0 == regs->nip && srr1 == regs->msr)
+		return;
+
+	/*
+	 * A NMI / soft-NMI interrupt may have come in after we found
+	 * srr_valid and before the SRRs are loaded. The interrupt then
+	 * comes in and clobbers SRRs and clears srr_valid. Then we load
+	 * the SRRs here and test them above and find they don't match.
+	 *
+	 * Test validity again after that, to catch such false positives.
+	 *
+	 * This test in general will have some window for false negatives
+	 * and may not catch and fix all such cases if an NMI comes in
+	 * later and clobbers SRRs without clearing srr_valid, but hopefully
+	 * such things will get caught most of the time, statistically
+	 * enough to be able to get a warning out.
+	 */
+	if (!READ_ONCE(*validp))
+		return;
+
+	if (!data_race(warned)) {
+		data_race(warned = true);
+		printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+		printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+		show_regs(regs);
+	}
+
+	WRITE_ONCE(*validp, 0); /* fixup */
+#endif
+}
+
 static inline void interrupt_enter_prepare(struct pt_regs *regs)
 {
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 44afc65e0e0e0..7f31f3fb9c1d8 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -4,7 +4,6 @@
 #include <linux/err.h>
 #include <linux/compat.h>
 #include <linux/rseq.h>
-#include <linux/sched/debug.h> /* for show_regs */
 
 #include <asm/kup.h>
 #include <asm/cputime.h>
@@ -74,86 +73,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
 	return true;
 }
 
-static notrace void check_return_regs_valid(struct pt_regs *regs)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
-	unsigned long trap, srr0, srr1;
-	static bool warned;
-	u8 *validp;
-	char *h;
-
-	if (trap_is_scv(regs))
-		return;
-
-	trap = TRAP(regs);
-	// EE in HV mode sets HSRRs like 0xea0
-	if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
-		trap = 0xea0;
-
-	switch (trap) {
-	case 0x980:
-	case INTERRUPT_H_DATA_STORAGE:
-	case 0xe20:
-	case 0xe40:
-	case INTERRUPT_HMI:
-	case 0xe80:
-	case 0xea0:
-	case INTERRUPT_H_FAC_UNAVAIL:
-	case 0x1200:
-	case 0x1500:
-	case 0x1600:
-	case 0x1800:
-		validp = &local_paca->hsrr_valid;
-		if (!READ_ONCE(*validp))
-			return;
-
-		srr0 = mfspr(SPRN_HSRR0);
-		srr1 = mfspr(SPRN_HSRR1);
-		h = "H";
-
-		break;
-	default:
-		validp = &local_paca->srr_valid;
-		if (!READ_ONCE(*validp))
-			return;
-
-		srr0 = mfspr(SPRN_SRR0);
-		srr1 = mfspr(SPRN_SRR1);
-		h = "";
-		break;
-	}
-
-	if (srr0 == regs->nip && srr1 == regs->msr)
-		return;
-
-	/*
-	 * A NMI / soft-NMI interrupt may have come in after we found
-	 * srr_valid and before the SRRs are loaded. The interrupt then
-	 * comes in and clobbers SRRs and clears srr_valid. Then we load
-	 * the SRRs here and test them above and find they don't match.
-	 *
-	 * Test validity again after that, to catch such false positives.
-	 *
-	 * This test in general will have some window for false negatives
-	 * and may not catch and fix all such cases if an NMI comes in
-	 * later and clobbers SRRs without clearing srr_valid, but hopefully
-	 * such things will get caught most of the time, statistically
-	 * enough to be able to get a warning out.
-	 */
-	if (!READ_ONCE(*validp))
-		return;
-
-	if (!data_race(warned)) {
-		data_race(warned = true);
-		printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
-		printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
-		show_regs(regs);
-	}
-
-	WRITE_ONCE(*validp, 0); /* fixup */
-#endif
-}
-
 static notrace unsigned long
 interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index aa17e62f37547..719930cf4ae1f 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -22,6 +22,11 @@
 
 #include "signal.h"
 
+/* This will be removed */
+#ifdef CONFIG_GENERIC_ENTRY
+#include <linux/entry-common.h>
+#endif /* CONFIG_GENERIC_ENTRY */
+
 #ifdef CONFIG_VSX
 unsigned long copy_fpr_to_user(void __user *to,
 			       struct task_struct *task)
@@ -368,3 +373,12 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
 		printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm,
 				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
 }
+
+#ifdef CONFIG_GENERIC_ENTRY
+void arch_do_signal_or_restart(struct pt_regs *regs)
+{
+	BUG_ON(regs != current->thread.regs);
+	local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
+	do_signal(current);
+}
+#endif /* CONFIG_GENERIC_ENTRY */
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC V1 6/6] powerpc: Enable Generic Entry/Exit for syscalls.
  2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
                   ` (4 preceding siblings ...)
  2025-04-28 15:22 ` [RFC V1 5/6] powerpc: Introduce syscall exit arch functions Mukesh Kumar Chaurasiya
@ 2025-04-28 15:22 ` Mukesh Kumar Chaurasiya
  2025-04-29  6:11   ` Shrikanth Hegde
  2025-04-28 15:52 ` [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
  2025-05-05 17:08 ` Ankur Arora
  7 siblings, 1 reply; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-04-28 15:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, sshegde, bigeasy, kees,
	oleg, peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt,
	atrajeev, anjalik, coltonlewis, linuxppc-dev,
	Mukesh Kumar Chaurasiya

Enable the syscall entry and exit path from generic framework.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
---
 arch/powerpc/Kconfig                |   1 +
 arch/powerpc/kernel/interrupt.c     |  46 +++++++----
 arch/powerpc/kernel/ptrace/ptrace.c | 103 ------------------------
 arch/powerpc/kernel/signal.c        |   8 +-
 arch/powerpc/kernel/syscall.c       | 117 +---------------------------
 5 files changed, 38 insertions(+), 237 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6722625a406a0..45b70ccf7c89e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -202,6 +202,7 @@ config PPC
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_ENTRY
 	select GENERIC_GETTIMEOFDAY
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IOREMAP
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 7f31f3fb9c1d8..8731064631de0 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <linux/context_tracking.h>
+#include <linux/entry-common.h>
 #include <linux/err.h>
 #include <linux/compat.h>
 #include <linux/rseq.h>
@@ -163,15 +164,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 	unsigned long ret = 0;
 	bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
 
-	CT_WARN_ON(ct_state() == CT_STATE_USER);
-
 	kuap_assert_locked();
 
 	regs->result = r3;
 
-	/* Check whether the syscall is issued inside a restartable sequence */
-	rseq_syscall(regs);
-
 	ti_flags = read_thread_flags();
 
 	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
@@ -192,13 +188,27 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 	}
 
 	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
-		do_syscall_trace_leave(regs);
 		ret |= _TIF_RESTOREALL;
 	}
 
-	local_irq_disable();
-	ret = interrupt_exit_user_prepare_main(ret, regs);
+again:
+	syscall_exit_to_user_mode(regs);
+
+	user_enter_irqoff();
+	if (!prep_irq_for_enabled_exit(true)) {
+		user_exit_irqoff();
+		local_irq_enable();
+		local_irq_disable();
+		goto again;
+	}
+
+	/* Restore user access locks last */
+	kuap_user_restore(regs);
 
+	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
+		ret |= _TIF_RESTOREALL;
+		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
+	}
 #ifdef CONFIG_PPC64
 	regs->exit_result = ret;
 #endif
@@ -209,6 +219,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 #ifdef CONFIG_PPC64
 notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
 {
+	unsigned long ret = 0;
 	/*
 	 * This is called when detecting a soft-pending interrupt as well as
 	 * an alternate-return interrupt. So we can't just have the alternate
@@ -222,14 +233,23 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg
 #ifdef CONFIG_PPC_BOOK3S_64
 	set_kuap(AMR_KUAP_BLOCKED);
 #endif
+again:
+	syscall_exit_to_user_mode(regs);
 
-	trace_hardirqs_off();
-	user_exit_irqoff();
-	account_cpu_user_entry();
+	user_enter_irqoff();
+	if (!prep_irq_for_enabled_exit(true)) {
+		user_exit_irqoff();
+		local_irq_enable();
+		local_irq_disable();
+		goto again;
+	}
 
-	BUG_ON(!user_mode(regs));
+	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
+		ret = _TIF_RESTOREALL;
+		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
+	}
 
-	regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+	regs->exit_result |= ret;
 
 	return regs->exit_result;
 }
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index c6997df632873..2a2b0b94a3eaa 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -21,9 +21,6 @@
 #include <asm/switch_to.h>
 #include <asm/debug.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
 #include "ptrace-decl.h"
 
 /*
@@ -233,106 +230,6 @@ static int do_seccomp(struct pt_regs *regs)
 static inline int do_seccomp(struct pt_regs *regs) { return 0; }
 #endif /* CONFIG_SECCOMP */
 
-/**
- * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
- * @regs: the pt_regs of the task to trace (current)
- *
- * Performs various types of tracing on syscall entry. This includes seccomp,
- * ptrace, syscall tracepoints and audit.
- *
- * The pt_regs are potentially visible to userspace via ptrace, so their
- * contents is ABI.
- *
- * One or more of the tracers may modify the contents of pt_regs, in particular
- * to modify arguments or even the syscall number itself.
- *
- * It's also possible that a tracer can choose to reject the system call. In
- * that case this function will return an illegal syscall number, and will put
- * an appropriate return value in regs->r3.
- *
- * Return: the (possibly changed) syscall number.
- */
-long do_syscall_trace_enter(struct pt_regs *regs)
-{
-	u32 flags;
-
-	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
-
-	if (flags) {
-		int rc = ptrace_report_syscall_entry(regs);
-
-		if (unlikely(flags & _TIF_SYSCALL_EMU)) {
-			/*
-			 * A nonzero return code from
-			 * ptrace_report_syscall_entry() tells us to prevent
-			 * the syscall execution, but we are not going to
-			 * execute it anyway.
-			 *
-			 * Returning -1 will skip the syscall execution. We want
-			 * to avoid clobbering any registers, so we don't goto
-			 * the skip label below.
-			 */
-			return -1;
-		}
-
-		if (rc) {
-			/*
-			 * The tracer decided to abort the syscall. Note that
-			 * the tracer may also just change regs->gpr[0] to an
-			 * invalid syscall number, that is handled below on the
-			 * exit path.
-			 */
-			goto skip;
-		}
-	}
-
-	/* Run seccomp after ptrace; allow it to set gpr[3]. */
-	if (do_seccomp(regs))
-		return -1;
-
-	/* Avoid trace and audit when syscall is invalid. */
-	if (regs->gpr[0] >= NR_syscalls)
-		goto skip;
-
-	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_sys_enter(regs, regs->gpr[0]);
-
-	if (!is_32bit_task())
-		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
-				    regs->gpr[5], regs->gpr[6]);
-	else
-		audit_syscall_entry(regs->gpr[0],
-				    regs->gpr[3] & 0xffffffff,
-				    regs->gpr[4] & 0xffffffff,
-				    regs->gpr[5] & 0xffffffff,
-				    regs->gpr[6] & 0xffffffff);
-
-	/* Return the possibly modified but valid syscall number */
-	return regs->gpr[0];
-
-skip:
-	/*
-	 * If we are aborting explicitly, or if the syscall number is
-	 * now invalid, set the return value to -ENOSYS.
-	 */
-	regs->gpr[3] = -ENOSYS;
-	return -1;
-}
-
-void do_syscall_trace_leave(struct pt_regs *regs)
-{
-	int step;
-
-	audit_syscall_exit(regs);
-
-	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_sys_exit(regs, regs->result);
-
-	step = test_thread_flag(TIF_SINGLESTEP);
-	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
-		ptrace_report_syscall_exit(regs, step);
-}
-
 void __init pt_regs_check(void);
 
 /*
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 719930cf4ae1f..8e1a1b26b5eae 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -6,6 +6,7 @@
  *    Extracted from signal_32.c and signal_64.c
  */
 
+#include <linux/entry-common.h>
 #include <linux/resume_user_mode.h>
 #include <linux/signal.h>
 #include <linux/uprobes.h>
@@ -22,11 +23,6 @@
 
 #include "signal.h"
 
-/* This will be removed */
-#ifdef CONFIG_GENERIC_ENTRY
-#include <linux/entry-common.h>
-#endif /* CONFIG_GENERIC_ENTRY */
-
 #ifdef CONFIG_VSX
 unsigned long copy_fpr_to_user(void __user *to,
 			       struct task_struct *task)
@@ -374,11 +370,9 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
 				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
 }
 
-#ifdef CONFIG_GENERIC_ENTRY
 void arch_do_signal_or_restart(struct pt_regs *regs)
 {
 	BUG_ON(regs != current->thread.regs);
 	local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
 	do_signal(current);
 }
-#endif /* CONFIG_GENERIC_ENTRY */
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 9f03a6263fb41..66fd6ca4462b0 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -3,6 +3,7 @@
 #include <linux/compat.h>
 #include <linux/context_tracking.h>
 #include <linux/randomize_kstack.h>
+#include <linux/entry-common.h>
 
 #include <asm/interrupt.h>
 #include <asm/kup.h>
@@ -21,121 +22,9 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 	kuap_lock();
 
 	add_random_kstack_offset();
+	r0 = syscall_enter_from_user_mode(regs, r0);
 
-	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
-
-	trace_hardirqs_off(); /* finish reconciling */
-
-	CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
-	user_exit_irqoff();
-
-	BUG_ON(regs_is_unrecoverable(regs));
-	BUG_ON(!user_mode(regs));
-	BUG_ON(regs_irqs_disabled(regs));
-
-#ifdef CONFIG_PPC_PKEY
-	if (mmu_has_feature(MMU_FTR_PKEY)) {
-		unsigned long amr, iamr;
-		bool flush_needed = false;
-		/*
-		 * When entering from userspace we mostly have the AMR/IAMR
-		 * different from kernel default values. Hence don't compare.
-		 */
-		amr = mfspr(SPRN_AMR);
-		iamr = mfspr(SPRN_IAMR);
-		regs->amr  = amr;
-		regs->iamr = iamr;
-		if (mmu_has_feature(MMU_FTR_KUAP)) {
-			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
-			flush_needed = true;
-		}
-		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
-			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
-			flush_needed = true;
-		}
-		if (flush_needed)
-			isync();
-	} else
-#endif
-		kuap_assert_locked();
-
-	booke_restore_dbcr0();
-
-	account_cpu_user_entry();
-
-	account_stolen_time();
-
-	/*
-	 * This is not required for the syscall exit path, but makes the
-	 * stack frame look nicer. If this was initialised in the first stack
-	 * frame, or if the unwinder was taught the first stack frame always
-	 * returns to user with IRQS_ENABLED, this store could be avoided!
-	 */
-	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
-
-	/*
-	 * If system call is called with TM active, set _TIF_RESTOREALL to
-	 * prevent RFSCV being used to return to userspace, because POWER9
-	 * TM implementation has problems with this instruction returning to
-	 * transactional state. Final register values are not relevant because
-	 * the transaction will be aborted upon return anyway. Or in the case
-	 * of unsupported_scv SIGILL fault, the return state does not much
-	 * matter because it's an edge case.
-	 */
-	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
-		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
-
-	/*
-	 * If the system call was made with a transaction active, doom it and
-	 * return without performing the system call. Unless it was an
-	 * unsupported scv vector, in which case it's treated like an illegal
-	 * instruction.
-	 */
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
-	    !trap_is_unsupported_scv(regs)) {
-		/* Enable TM in the kernel, and disable EE (for scv) */
-		hard_irq_disable();
-		mtmsr(mfmsr() | MSR_TM);
-
-		/* tabort, this dooms the transaction, nothing else */
-		asm volatile(".long 0x7c00071d | ((%0) << 16)"
-				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
-
-		/*
-		 * Userspace will never see the return value. Execution will
-		 * resume after the tbegin. of the aborted transaction with the
-		 * checkpointed register state. A context switch could occur
-		 * or signal delivered to the process before resuming the
-		 * doomed transaction context, but that should all be handled
-		 * as expected.
-		 */
-		return -ENOSYS;
-	}
-#endif // CONFIG_PPC_TRANSACTIONAL_MEM
-
-	local_irq_enable();
-
-	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
-		if (unlikely(trap_is_unsupported_scv(regs))) {
-			/* Unsupported scv vector */
-			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-			return regs->gpr[3];
-		}
-		/*
-		 * We use the return value of do_syscall_trace_enter() as the
-		 * syscall number. If the syscall was rejected for any reason
-		 * do_syscall_trace_enter() returns an invalid syscall number
-		 * and the test against NR_syscalls will fail and the return
-		 * value to be used is in regs->gpr[3].
-		 */
-		r0 = do_syscall_trace_enter(regs);
-		if (unlikely(r0 >= NR_syscalls))
-			return regs->gpr[3];
-
-	} else if (unlikely(r0 >= NR_syscalls)) {
+	if (unlikely(r0 >= NR_syscalls)) {
 		if (unlikely(trap_is_unsupported_scv(regs))) {
 			/* Unsupported scv vector */
 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC V1 0/6] Generic Entry/Exit support for ppc64
  2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
                   ` (5 preceding siblings ...)
  2025-04-28 15:22 ` [RFC V1 6/6] powerpc: Enable Generic Entry/Exit for syscalls Mukesh Kumar Chaurasiya
@ 2025-04-28 15:52 ` Mukesh Kumar Chaurasiya
  2025-05-05 17:08 ` Ankur Arora
  7 siblings, 0 replies; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-04-28 15:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, sshegde, bigeasy, kees,
	oleg, peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt,
	atrajeev, anjalik, coltonlewis, linuxppc-dev

On Mon, Apr 28, 2025 at 08:52:20PM +0530, Mukesh Kumar Chaurasiya wrote:

Few corrections in the commit message:
> This is a syscall only implementation of generic entry/exit framework
> for framework for ppc. IRQ handling is not done in this RFC. 
>
s/framework for framework/framework
> This will break the ppc32 build as of now which will be fixed along with
> IRQ handling.
> 
> Below are the performance benchmarks from perf bench basic syscall.
> This is for 1,00,00,000 getppid() calls
> 
> | Metric     | Without Generic Framework | With Generic Framework |
> | ---------- | ------------------------- | ---------------------- |
> | Total time | 0.904 [sec]               | 0.856 [sec]            |
> | usecs/op   | 0.090403                  | 0.085638               |
> | ops/sec    | 1,10,61,579               | 1,16,77,086            |
>
The coloums are reversed here

| Metric     | With Generic Framework    | Without Generic Framework |
| ---------- | ------------------------- | ------------------------- |
| Total time | 0.904 [sec]               | 0.856 [sec]               |
| usecs/op   | 0.090403                  | 0.085638                  |
| ops/sec    | 1,10,61,579               | 1,16,77,086               |

> That's ~5% degradation as of now.
> 
> Mukesh Kumar Chaurasiya (6):
>   powerpc: rename arch_irq_disabled_regs
>   powerpc: Prepare to build with genreic entry/exit framework
>   powerpc: introduce arch_enter_from_user_mode
>   powerpc: Add flag in paca for register restore state
>   powerpc: Introduce syscall exit arch functions
>   powerpc: Enable Generic Entry/Exit for syscalls.
> 
>  arch/powerpc/Kconfig                    |   1 +
>  arch/powerpc/include/asm/entry-common.h | 158 ++++++++++++++++++++++++
>  arch/powerpc/include/asm/hw_irq.h       |   4 +-
>  arch/powerpc/include/asm/interrupt.h    | 117 +++++++++++++++++-
>  arch/powerpc/include/asm/paca.h         |   1 +
>  arch/powerpc/include/asm/stacktrace.h   |   8 ++
>  arch/powerpc/include/asm/syscall.h      |   5 +
>  arch/powerpc/include/asm/thread_info.h  |   1 +
>  arch/powerpc/kernel/interrupt.c         | 153 ++++++-----------------
>  arch/powerpc/kernel/ptrace/ptrace.c     | 103 ---------------
>  arch/powerpc/kernel/signal.c            |   8 ++
>  arch/powerpc/kernel/syscall.c           | 117 +-----------------
>  arch/powerpc/kernel/traps.c             |   2 +-
>  arch/powerpc/kernel/watchdog.c          |   2 +-
>  arch/powerpc/perf/core-book3s.c         |   2 +-
>  15 files changed, 336 insertions(+), 346 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/entry-common.h
> 
> -- 
> 2.49.0
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC V1 6/6] powerpc: Enable Generic Entry/Exit for syscalls.
  2025-04-28 15:22 ` [RFC V1 6/6] powerpc: Enable Generic Entry/Exit for syscalls Mukesh Kumar Chaurasiya
@ 2025-04-29  6:11   ` Shrikanth Hegde
  2025-05-02  8:01     ` Mukesh Kumar Chaurasiya
  0 siblings, 1 reply; 12+ messages in thread
From: Shrikanth Hegde @ 2025-04-29  6:11 UTC (permalink / raw)
  To: Mukesh Kumar Chaurasiya
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, bigeasy, kees, oleg,
	peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt, atrajeev,
	anjalik, coltonlewis, linuxppc-dev, linux-kernel



On 4/28/25 20:52, Mukesh Kumar Chaurasiya wrote:
> Enable the syscall entry and exit path from generic framework.
> 
> Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

Hi Mukesh. Thanks for working on this. Trying to go through it.

> ---
>   arch/powerpc/Kconfig                |   1 +
>   arch/powerpc/kernel/interrupt.c     |  46 +++++++----
>   arch/powerpc/kernel/ptrace/ptrace.c | 103 ------------------------
>   arch/powerpc/kernel/signal.c        |   8 +-
>   arch/powerpc/kernel/syscall.c       | 117 +---------------------------
>   5 files changed, 38 insertions(+), 237 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 6722625a406a0..45b70ccf7c89e 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -202,6 +202,7 @@ config PPC
>   	select GENERIC_CPU_AUTOPROBE
>   	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
>   	select GENERIC_EARLY_IOREMAP
> +	select GENERIC_ENTRY

IMHO this should be enabled after irqentry_* is supported too.

>   	select GENERIC_GETTIMEOFDAY
>   	select GENERIC_IDLE_POLL_SETUP
>   	select GENERIC_IOREMAP
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 7f31f3fb9c1d8..8731064631de0 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -1,6 +1,7 @@
>   // SPDX-License-Identifier: GPL-2.0-or-later
>   
>   #include <linux/context_tracking.h>
> +#include <linux/entry-common.h>
>   #include <linux/err.h>
>   #include <linux/compat.h>
>   #include <linux/rseq.h>
> @@ -163,15 +164,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   	unsigned long ret = 0;
>   	bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
>   
> -	CT_WARN_ON(ct_state() == CT_STATE_USER);
> -
>   	kuap_assert_locked();
>   
>   	regs->result = r3;
>   
> -	/* Check whether the syscall is issued inside a restartable sequence */
> -	rseq_syscall(regs);
> -
>   	ti_flags = read_thread_flags();
>   
>   	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
> @@ -192,13 +188,27 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   	}
>   
>   	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
> -		do_syscall_trace_leave(regs);
>   		ret |= _TIF_RESTOREALL;
>   	}
>   
> -	local_irq_disable();
> -	ret = interrupt_exit_user_prepare_main(ret, regs);

This loop i dont understand why.

> +again:
> +	syscall_exit_to_user_mode(regs);

exit_to_user_mode also calls user_enter_irqoff.
so below user_enter_irqoff may be a nop or wrong.

also, syscall_exit_to_user_mode disables local interrupts first.

is local_irq_enable fixing something here?

> +
> +	user_enter_irqoff();
> +	if (!prep_irq_for_enabled_exit(true)) {
> +		user_exit_irqoff();
> +		local_irq_enable();
> +		local_irq_disable();
> +		goto again;
> +	}
> +
> +	/* Restore user access locks last */
> +	kuap_user_restore(regs);
>   
> +	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
> +		ret |= _TIF_RESTOREALL;
> +		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
> +	}
>   #ifdef CONFIG_PPC64
>   	regs->exit_result = ret;
>   #endif
> @@ -209,6 +219,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   #ifdef CONFIG_PPC64
>   notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
>   {
> +	unsigned long ret = 0;
>   	/*
>   	 * This is called when detecting a soft-pending interrupt as well as
>   	 * an alternate-return interrupt. So we can't just have the alternate
> @@ -222,14 +233,23 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg
>   #ifdef CONFIG_PPC_BOOK3S_64
>   	set_kuap(AMR_KUAP_BLOCKED);
>   #endif
> +again:
> +	syscall_exit_to_user_mode(regs);
>   
> -	trace_hardirqs_off();
> -	user_exit_irqoff();
> -	account_cpu_user_entry();
> +	user_enter_irqoff();
> +	if (!prep_irq_for_enabled_exit(true)) {
> +		user_exit_irqoff();
> +		local_irq_enable();
> +		local_irq_disable();
> +		goto again;
> +	}
>   
> -	BUG_ON(!user_mode(regs));
> +	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
> +		ret = _TIF_RESTOREALL;
> +		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
> +	}
>   
> -	regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
> +	regs->exit_result |= ret;
>   
>   	return regs->exit_result;
>   }
> diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
> index c6997df632873..2a2b0b94a3eaa 100644
> --- a/arch/powerpc/kernel/ptrace/ptrace.c
> +++ b/arch/powerpc/kernel/ptrace/ptrace.c
> @@ -21,9 +21,6 @@
>   #include <asm/switch_to.h>
>   #include <asm/debug.h>
>   
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/syscalls.h>
> -
>   #include "ptrace-decl.h"
>   
>   /*
> @@ -233,106 +230,6 @@ static int do_seccomp(struct pt_regs *regs)
>   static inline int do_seccomp(struct pt_regs *regs) { return 0; }
>   #endif /* CONFIG_SECCOMP */
>   
> -/**
> - * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
> - * @regs: the pt_regs of the task to trace (current)
> - *
> - * Performs various types of tracing on syscall entry. This includes seccomp,
> - * ptrace, syscall tracepoints and audit.
> - *
> - * The pt_regs are potentially visible to userspace via ptrace, so their
> - * contents is ABI.
> - *
> - * One or more of the tracers may modify the contents of pt_regs, in particular
> - * to modify arguments or even the syscall number itself.
> - *
> - * It's also possible that a tracer can choose to reject the system call. In
> - * that case this function will return an illegal syscall number, and will put
> - * an appropriate return value in regs->r3.
> - *
> - * Return: the (possibly changed) syscall number.
> - */
> -long do_syscall_trace_enter(struct pt_regs *regs)
> -{
> -	u32 flags;
> -
> -	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
> -
> -	if (flags) {
> -		int rc = ptrace_report_syscall_entry(regs);
> -
> -		if (unlikely(flags & _TIF_SYSCALL_EMU)) {
> -			/*
> -			 * A nonzero return code from
> -			 * ptrace_report_syscall_entry() tells us to prevent
> -			 * the syscall execution, but we are not going to
> -			 * execute it anyway.
> -			 *
> -			 * Returning -1 will skip the syscall execution. We want
> -			 * to avoid clobbering any registers, so we don't goto
> -			 * the skip label below.
> -			 */
> -			return -1;
> -		}
> -
> -		if (rc) {
> -			/*
> -			 * The tracer decided to abort the syscall. Note that
> -			 * the tracer may also just change regs->gpr[0] to an
> -			 * invalid syscall number, that is handled below on the
> -			 * exit path.
> -			 */
> -			goto skip;
> -		}
> -	}
> -
> -	/* Run seccomp after ptrace; allow it to set gpr[3]. */
> -	if (do_seccomp(regs))
> -		return -1;
> -
> -	/* Avoid trace and audit when syscall is invalid. */
> -	if (regs->gpr[0] >= NR_syscalls)
> -		goto skip;
> -
> -	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> -		trace_sys_enter(regs, regs->gpr[0]);
> -
> -	if (!is_32bit_task())
> -		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
> -				    regs->gpr[5], regs->gpr[6]);
> -	else
> -		audit_syscall_entry(regs->gpr[0],
> -				    regs->gpr[3] & 0xffffffff,
> -				    regs->gpr[4] & 0xffffffff,
> -				    regs->gpr[5] & 0xffffffff,
> -				    regs->gpr[6] & 0xffffffff);
> -
> -	/* Return the possibly modified but valid syscall number */
> -	return regs->gpr[0];
> -
> -skip:
> -	/*
> -	 * If we are aborting explicitly, or if the syscall number is
> -	 * now invalid, set the return value to -ENOSYS.
> -	 */
> -	regs->gpr[3] = -ENOSYS;
> -	return -1;
> -}
> -
> -void do_syscall_trace_leave(struct pt_regs *regs)
> -{
> -	int step;
> -
> -	audit_syscall_exit(regs);
> -
> -	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> -		trace_sys_exit(regs, regs->result);
> -
> -	step = test_thread_flag(TIF_SINGLESTEP);
> -	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
> -		ptrace_report_syscall_exit(regs, step);
> -}
> -
>   void __init pt_regs_check(void);
>   
>   /*
> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> index 719930cf4ae1f..8e1a1b26b5eae 100644
> --- a/arch/powerpc/kernel/signal.c
> +++ b/arch/powerpc/kernel/signal.c
> @@ -6,6 +6,7 @@
>    *    Extracted from signal_32.c and signal_64.c
>    */
>   
> +#include <linux/entry-common.h>
>   #include <linux/resume_user_mode.h>
>   #include <linux/signal.h>
>   #include <linux/uprobes.h>
> @@ -22,11 +23,6 @@
>   
>   #include "signal.h"
>   
> -/* This will be removed */
> -#ifdef CONFIG_GENERIC_ENTRY
> -#include <linux/entry-common.h>
> -#endif /* CONFIG_GENERIC_ENTRY */
> -
>   #ifdef CONFIG_VSX
>   unsigned long copy_fpr_to_user(void __user *to,
>   			       struct task_struct *task)
> @@ -374,11 +370,9 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
>   				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
>   }
>   
> -#ifdef CONFIG_GENERIC_ENTRY
>   void arch_do_signal_or_restart(struct pt_regs *regs)
>   {
>   	BUG_ON(regs != current->thread.regs);
>   	local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
>   	do_signal(current);
>   }
> -#endif /* CONFIG_GENERIC_ENTRY */
> diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
> index 9f03a6263fb41..66fd6ca4462b0 100644
> --- a/arch/powerpc/kernel/syscall.c
> +++ b/arch/powerpc/kernel/syscall.c
> @@ -3,6 +3,7 @@
>   #include <linux/compat.h>
>   #include <linux/context_tracking.h>
>   #include <linux/randomize_kstack.h>
> +#include <linux/entry-common.h>
>   
>   #include <asm/interrupt.h>
>   #include <asm/kup.h>
> @@ -21,121 +22,9 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
>   	kuap_lock();
>   
>   	add_random_kstack_offset();
> +	r0 = syscall_enter_from_user_mode(regs, r0);
>   
> -	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
> -		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> -
> -	trace_hardirqs_off(); /* finish reconciling */
> -
> -	CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
> -	user_exit_irqoff();
> -
> -	BUG_ON(regs_is_unrecoverable(regs));
> -	BUG_ON(!user_mode(regs));
> -	BUG_ON(regs_irqs_disabled(regs));
> -
> -#ifdef CONFIG_PPC_PKEY
> -	if (mmu_has_feature(MMU_FTR_PKEY)) {
> -		unsigned long amr, iamr;
> -		bool flush_needed = false;
> -		/*
> -		 * When entering from userspace we mostly have the AMR/IAMR
> -		 * different from kernel default values. Hence don't compare.
> -		 */
> -		amr = mfspr(SPRN_AMR);
> -		iamr = mfspr(SPRN_IAMR);
> -		regs->amr  = amr;
> -		regs->iamr = iamr;
> -		if (mmu_has_feature(MMU_FTR_KUAP)) {
> -			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
> -			flush_needed = true;
> -		}
> -		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
> -			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
> -			flush_needed = true;
> -		}
> -		if (flush_needed)
> -			isync();
> -	} else
> -#endif
> -		kuap_assert_locked();
> -
> -	booke_restore_dbcr0();
> -
> -	account_cpu_user_entry();
> -
> -	account_stolen_time();
> -
> -	/*
> -	 * This is not required for the syscall exit path, but makes the
> -	 * stack frame look nicer. If this was initialised in the first stack
> -	 * frame, or if the unwinder was taught the first stack frame always
> -	 * returns to user with IRQS_ENABLED, this store could be avoided!
> -	 */
> -	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
> -
> -	/*
> -	 * If system call is called with TM active, set _TIF_RESTOREALL to
> -	 * prevent RFSCV being used to return to userspace, because POWER9
> -	 * TM implementation has problems with this instruction returning to
> -	 * transactional state. Final register values are not relevant because
> -	 * the transaction will be aborted upon return anyway. Or in the case
> -	 * of unsupported_scv SIGILL fault, the return state does not much
> -	 * matter because it's an edge case.
> -	 */
> -	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
> -			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
> -		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
> -
> -	/*
> -	 * If the system call was made with a transaction active, doom it and
> -	 * return without performing the system call. Unless it was an
> -	 * unsupported scv vector, in which case it's treated like an illegal
> -	 * instruction.
> -	 */
> -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> -	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
> -	    !trap_is_unsupported_scv(regs)) {
> -		/* Enable TM in the kernel, and disable EE (for scv) */
> -		hard_irq_disable();
> -		mtmsr(mfmsr() | MSR_TM);
> -
> -		/* tabort, this dooms the transaction, nothing else */
> -		asm volatile(".long 0x7c00071d | ((%0) << 16)"
> -				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
> -
> -		/*
> -		 * Userspace will never see the return value. Execution will
> -		 * resume after the tbegin. of the aborted transaction with the
> -		 * checkpointed register state. A context switch could occur
> -		 * or signal delivered to the process before resuming the
> -		 * doomed transaction context, but that should all be handled
> -		 * as expected.
> -		 */
> -		return -ENOSYS;
> -	}
> -#endif // CONFIG_PPC_TRANSACTIONAL_MEM
> -
> -	local_irq_enable();
> -
> -	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
> -		if (unlikely(trap_is_unsupported_scv(regs))) {
> -			/* Unsupported scv vector */
> -			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
> -			return regs->gpr[3];
> -		}
> -		/*
> -		 * We use the return value of do_syscall_trace_enter() as the
> -		 * syscall number. If the syscall was rejected for any reason
> -		 * do_syscall_trace_enter() returns an invalid syscall number
> -		 * and the test against NR_syscalls will fail and the return
> -		 * value to be used is in regs->gpr[3].
> -		 */
> -		r0 = do_syscall_trace_enter(regs);
> -		if (unlikely(r0 >= NR_syscalls))
> -			return regs->gpr[3];
> -
> -	} else if (unlikely(r0 >= NR_syscalls)) {
> +	if (unlikely(r0 >= NR_syscalls)) {
>   		if (unlikely(trap_is_unsupported_scv(regs))) {
>   			/* Unsupported scv vector */
>   			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC V1 6/6] powerpc: Enable Generic Entry/Exit for syscalls.
  2025-04-29  6:11   ` Shrikanth Hegde
@ 2025-05-02  8:01     ` Mukesh Kumar Chaurasiya
  0 siblings, 0 replies; 12+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2025-05-02  8:01 UTC (permalink / raw)
  To: Shrikanth Hegde
  Cc: maddy, mpe, npiggin, christophe.leroy, naveen, neeraj.upadhyay,
	vschneid, tglx, frederic, ankur.a.arora, bigeasy, kees, oleg,
	peterz, tzimmermann, namcao, kan.liang, mcgrof, rppt, atrajeev,
	anjalik, coltonlewis, linuxppc-dev, linux-kernel

On Tue, Apr 29, 2025 at 11:41:09AM +0530, Shrikanth Hegde wrote:
> 
> 
> On 4/28/25 20:52, Mukesh Kumar Chaurasiya wrote:
> > Enable the syscall entry and exit path from generic framework.
> > 
> > Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
> 
> Hi Mukesh. Thanks for working on this. Trying to go through it.
> 
> > ---
> >   arch/powerpc/Kconfig                |   1 +
> >   arch/powerpc/kernel/interrupt.c     |  46 +++++++----
> >   arch/powerpc/kernel/ptrace/ptrace.c | 103 ------------------------
> >   arch/powerpc/kernel/signal.c        |   8 +-
> >   arch/powerpc/kernel/syscall.c       | 117 +---------------------------
> >   5 files changed, 38 insertions(+), 237 deletions(-)
> > 
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 6722625a406a0..45b70ccf7c89e 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -202,6 +202,7 @@ config PPC
> >   	select GENERIC_CPU_AUTOPROBE
> >   	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
> >   	select GENERIC_EARLY_IOREMAP
> > +	select GENERIC_ENTRY
> 
> IMHO this should be enabled after irqentry_* is supported too.
>
Yeah, it will be done after irq is supported. I did this just for the sake of syscall.
> >   	select GENERIC_GETTIMEOFDAY
> >   	select GENERIC_IDLE_POLL_SETUP
> >   	select GENERIC_IOREMAP
> > diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> > index 7f31f3fb9c1d8..8731064631de0 100644
> > --- a/arch/powerpc/kernel/interrupt.c
> > +++ b/arch/powerpc/kernel/interrupt.c
> > @@ -1,6 +1,7 @@
> >   // SPDX-License-Identifier: GPL-2.0-or-later
> >   #include <linux/context_tracking.h>
> > +#include <linux/entry-common.h>
> >   #include <linux/err.h>
> >   #include <linux/compat.h>
> >   #include <linux/rseq.h>
> > @@ -163,15 +164,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> >   	unsigned long ret = 0;
> >   	bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
> > -	CT_WARN_ON(ct_state() == CT_STATE_USER);
> > -
> >   	kuap_assert_locked();
> >   	regs->result = r3;
> > -	/* Check whether the syscall is issued inside a restartable sequence */
> > -	rseq_syscall(regs);
> > -
> >   	ti_flags = read_thread_flags();
> >   	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
> > @@ -192,13 +188,27 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> >   	}
> >   	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
> > -		do_syscall_trace_leave(regs);
> >   		ret |= _TIF_RESTOREALL;
> >   	}
> > -	local_irq_disable();
> > -	ret = interrupt_exit_user_prepare_main(ret, regs);
> 
> This loop i dont understand why.
>
This is if there are any interrupts to process.
> > +again:
> > +	syscall_exit_to_user_mode(regs);
> 
> exit_to_user_mode also calls user_enter_irqoff.
> so below user_enter_irqoff may be a nop or wrong.
Oh yeah, i'll fix this in next version.
> 
> also, syscall_exit_to_user_mode disables local interrupts first.
> 
> is local_irq_enable fixing something here?
>
It just enables irq for a short while so we can receive interrupts.
> > +
> > +	user_enter_irqoff();
> > +	if (!prep_irq_for_enabled_exit(true)) {
> > +		user_exit_irqoff();
> > +		local_irq_enable();
> > +		local_irq_disable();
> > +		goto again;
> > +	}
> > +
> > +	/* Restore user access locks last */
> > +	kuap_user_restore(regs);
> > +	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
> > +		ret |= _TIF_RESTOREALL;
> > +		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
> > +	}
> >   #ifdef CONFIG_PPC64
> >   	regs->exit_result = ret;
> >   #endif
> > @@ -209,6 +219,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> >   #ifdef CONFIG_PPC64
> >   notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
> >   {
> > +	unsigned long ret = 0;
> >   	/*
> >   	 * This is called when detecting a soft-pending interrupt as well as
> >   	 * an alternate-return interrupt. So we can't just have the alternate
> > @@ -222,14 +233,23 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg
> >   #ifdef CONFIG_PPC_BOOK3S_64
> >   	set_kuap(AMR_KUAP_BLOCKED);
> >   #endif
> > +again:
> > +	syscall_exit_to_user_mode(regs);
> > -	trace_hardirqs_off();
> > -	user_exit_irqoff();
> > -	account_cpu_user_entry();
> > +	user_enter_irqoff();
> > +	if (!prep_irq_for_enabled_exit(true)) {
> > +		user_exit_irqoff();
> > +		local_irq_enable();
> > +		local_irq_disable();
> > +		goto again;
> > +	}
> > -	BUG_ON(!user_mode(regs));
> > +	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
> > +		ret = _TIF_RESTOREALL;
> > +		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
> > +	}
> > -	regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
> > +	regs->exit_result |= ret;
> >   	return regs->exit_result;
> >   }
> > diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
> > index c6997df632873..2a2b0b94a3eaa 100644
> > --- a/arch/powerpc/kernel/ptrace/ptrace.c
> > +++ b/arch/powerpc/kernel/ptrace/ptrace.c
> > @@ -21,9 +21,6 @@
> >   #include <asm/switch_to.h>
> >   #include <asm/debug.h>
> > -#define CREATE_TRACE_POINTS
> > -#include <trace/events/syscalls.h>
> > -
> >   #include "ptrace-decl.h"
> >   /*
> > @@ -233,106 +230,6 @@ static int do_seccomp(struct pt_regs *regs)
> >   static inline int do_seccomp(struct pt_regs *regs) { return 0; }
> >   #endif /* CONFIG_SECCOMP */
> > -/**
> > - * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
> > - * @regs: the pt_regs of the task to trace (current)
> > - *
> > - * Performs various types of tracing on syscall entry. This includes seccomp,
> > - * ptrace, syscall tracepoints and audit.
> > - *
> > - * The pt_regs are potentially visible to userspace via ptrace, so their
> > - * contents is ABI.
> > - *
> > - * One or more of the tracers may modify the contents of pt_regs, in particular
> > - * to modify arguments or even the syscall number itself.
> > - *
> > - * It's also possible that a tracer can choose to reject the system call. In
> > - * that case this function will return an illegal syscall number, and will put
> > - * an appropriate return value in regs->r3.
> > - *
> > - * Return: the (possibly changed) syscall number.
> > - */
> > -long do_syscall_trace_enter(struct pt_regs *regs)
> > -{
> > -	u32 flags;
> > -
> > -	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
> > -
> > -	if (flags) {
> > -		int rc = ptrace_report_syscall_entry(regs);
> > -
> > -		if (unlikely(flags & _TIF_SYSCALL_EMU)) {
> > -			/*
> > -			 * A nonzero return code from
> > -			 * ptrace_report_syscall_entry() tells us to prevent
> > -			 * the syscall execution, but we are not going to
> > -			 * execute it anyway.
> > -			 *
> > -			 * Returning -1 will skip the syscall execution. We want
> > -			 * to avoid clobbering any registers, so we don't goto
> > -			 * the skip label below.
> > -			 */
> > -			return -1;
> > -		}
> > -
> > -		if (rc) {
> > -			/*
> > -			 * The tracer decided to abort the syscall. Note that
> > -			 * the tracer may also just change regs->gpr[0] to an
> > -			 * invalid syscall number, that is handled below on the
> > -			 * exit path.
> > -			 */
> > -			goto skip;
> > -		}
> > -	}
> > -
> > -	/* Run seccomp after ptrace; allow it to set gpr[3]. */
> > -	if (do_seccomp(regs))
> > -		return -1;
> > -
> > -	/* Avoid trace and audit when syscall is invalid. */
> > -	if (regs->gpr[0] >= NR_syscalls)
> > -		goto skip;
> > -
> > -	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> > -		trace_sys_enter(regs, regs->gpr[0]);
> > -
> > -	if (!is_32bit_task())
> > -		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
> > -				    regs->gpr[5], regs->gpr[6]);
> > -	else
> > -		audit_syscall_entry(regs->gpr[0],
> > -				    regs->gpr[3] & 0xffffffff,
> > -				    regs->gpr[4] & 0xffffffff,
> > -				    regs->gpr[5] & 0xffffffff,
> > -				    regs->gpr[6] & 0xffffffff);
> > -
> > -	/* Return the possibly modified but valid syscall number */
> > -	return regs->gpr[0];
> > -
> > -skip:
> > -	/*
> > -	 * If we are aborting explicitly, or if the syscall number is
> > -	 * now invalid, set the return value to -ENOSYS.
> > -	 */
> > -	regs->gpr[3] = -ENOSYS;
> > -	return -1;
> > -}
> > -
> > -void do_syscall_trace_leave(struct pt_regs *regs)
> > -{
> > -	int step;
> > -
> > -	audit_syscall_exit(regs);
> > -
> > -	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> > -		trace_sys_exit(regs, regs->result);
> > -
> > -	step = test_thread_flag(TIF_SINGLESTEP);
> > -	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
> > -		ptrace_report_syscall_exit(regs, step);
> > -}
> > -
> >   void __init pt_regs_check(void);
> >   /*
> > diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> > index 719930cf4ae1f..8e1a1b26b5eae 100644
> > --- a/arch/powerpc/kernel/signal.c
> > +++ b/arch/powerpc/kernel/signal.c
> > @@ -6,6 +6,7 @@
> >    *    Extracted from signal_32.c and signal_64.c
> >    */
> > +#include <linux/entry-common.h>
> >   #include <linux/resume_user_mode.h>
> >   #include <linux/signal.h>
> >   #include <linux/uprobes.h>
> > @@ -22,11 +23,6 @@
> >   #include "signal.h"
> > -/* This will be removed */
> > -#ifdef CONFIG_GENERIC_ENTRY
> > -#include <linux/entry-common.h>
> > -#endif /* CONFIG_GENERIC_ENTRY */
> > -
> >   #ifdef CONFIG_VSX
> >   unsigned long copy_fpr_to_user(void __user *to,
> >   			       struct task_struct *task)
> > @@ -374,11 +370,9 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
> >   				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
> >   }
> > -#ifdef CONFIG_GENERIC_ENTRY
> >   void arch_do_signal_or_restart(struct pt_regs *regs)
> >   {
> >   	BUG_ON(regs != current->thread.regs);
> >   	local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
> >   	do_signal(current);
> >   }
> > -#endif /* CONFIG_GENERIC_ENTRY */
> > diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
> > index 9f03a6263fb41..66fd6ca4462b0 100644
> > --- a/arch/powerpc/kernel/syscall.c
> > +++ b/arch/powerpc/kernel/syscall.c
> > @@ -3,6 +3,7 @@
> >   #include <linux/compat.h>
> >   #include <linux/context_tracking.h>
> >   #include <linux/randomize_kstack.h>
> > +#include <linux/entry-common.h>
> >   #include <asm/interrupt.h>
> >   #include <asm/kup.h>
> > @@ -21,121 +22,9 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
> >   	kuap_lock();
> >   	add_random_kstack_offset();
> > +	r0 = syscall_enter_from_user_mode(regs, r0);
> > -	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
> > -		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> > -
> > -	trace_hardirqs_off(); /* finish reconciling */
> > -
> > -	CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
> > -	user_exit_irqoff();
> > -
> > -	BUG_ON(regs_is_unrecoverable(regs));
> > -	BUG_ON(!user_mode(regs));
> > -	BUG_ON(regs_irqs_disabled(regs));
> > -
> > -#ifdef CONFIG_PPC_PKEY
> > -	if (mmu_has_feature(MMU_FTR_PKEY)) {
> > -		unsigned long amr, iamr;
> > -		bool flush_needed = false;
> > -		/*
> > -		 * When entering from userspace we mostly have the AMR/IAMR
> > -		 * different from kernel default values. Hence don't compare.
> > -		 */
> > -		amr = mfspr(SPRN_AMR);
> > -		iamr = mfspr(SPRN_IAMR);
> > -		regs->amr  = amr;
> > -		regs->iamr = iamr;
> > -		if (mmu_has_feature(MMU_FTR_KUAP)) {
> > -			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
> > -			flush_needed = true;
> > -		}
> > -		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
> > -			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
> > -			flush_needed = true;
> > -		}
> > -		if (flush_needed)
> > -			isync();
> > -	} else
> > -#endif
> > -		kuap_assert_locked();
> > -
> > -	booke_restore_dbcr0();
> > -
> > -	account_cpu_user_entry();
> > -
> > -	account_stolen_time();
> > -
> > -	/*
> > -	 * This is not required for the syscall exit path, but makes the
> > -	 * stack frame look nicer. If this was initialised in the first stack
> > -	 * frame, or if the unwinder was taught the first stack frame always
> > -	 * returns to user with IRQS_ENABLED, this store could be avoided!
> > -	 */
> > -	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
> > -
> > -	/*
> > -	 * If system call is called with TM active, set _TIF_RESTOREALL to
> > -	 * prevent RFSCV being used to return to userspace, because POWER9
> > -	 * TM implementation has problems with this instruction returning to
> > -	 * transactional state. Final register values are not relevant because
> > -	 * the transaction will be aborted upon return anyway. Or in the case
> > -	 * of unsupported_scv SIGILL fault, the return state does not much
> > -	 * matter because it's an edge case.
> > -	 */
> > -	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
> > -			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
> > -		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
> > -
> > -	/*
> > -	 * If the system call was made with a transaction active, doom it and
> > -	 * return without performing the system call. Unless it was an
> > -	 * unsupported scv vector, in which case it's treated like an illegal
> > -	 * instruction.
> > -	 */
> > -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > -	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
> > -	    !trap_is_unsupported_scv(regs)) {
> > -		/* Enable TM in the kernel, and disable EE (for scv) */
> > -		hard_irq_disable();
> > -		mtmsr(mfmsr() | MSR_TM);
> > -
> > -		/* tabort, this dooms the transaction, nothing else */
> > -		asm volatile(".long 0x7c00071d | ((%0) << 16)"
> > -				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
> > -
> > -		/*
> > -		 * Userspace will never see the return value. Execution will
> > -		 * resume after the tbegin. of the aborted transaction with the
> > -		 * checkpointed register state. A context switch could occur
> > -		 * or signal delivered to the process before resuming the
> > -		 * doomed transaction context, but that should all be handled
> > -		 * as expected.
> > -		 */
> > -		return -ENOSYS;
> > -	}
> > -#endif // CONFIG_PPC_TRANSACTIONAL_MEM
> > -
> > -	local_irq_enable();
> > -
> > -	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
> > -		if (unlikely(trap_is_unsupported_scv(regs))) {
> > -			/* Unsupported scv vector */
> > -			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
> > -			return regs->gpr[3];
> > -		}
> > -		/*
> > -		 * We use the return value of do_syscall_trace_enter() as the
> > -		 * syscall number. If the syscall was rejected for any reason
> > -		 * do_syscall_trace_enter() returns an invalid syscall number
> > -		 * and the test against NR_syscalls will fail and the return
> > -		 * value to be used is in regs->gpr[3].
> > -		 */
> > -		r0 = do_syscall_trace_enter(regs);
> > -		if (unlikely(r0 >= NR_syscalls))
> > -			return regs->gpr[3];
> > -
> > -	} else if (unlikely(r0 >= NR_syscalls)) {
> > +	if (unlikely(r0 >= NR_syscalls)) {
> >   		if (unlikely(trap_is_unsupported_scv(regs))) {
> >   			/* Unsupported scv vector */
> >   			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC V1 0/6] Generic Entry/Exit support for ppc64
  2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
                   ` (6 preceding siblings ...)
  2025-04-28 15:52 ` [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
@ 2025-05-05 17:08 ` Ankur Arora
  2025-05-05 17:18   ` Ankur Arora
  7 siblings, 1 reply; 12+ messages in thread
From: Ankur Arora @ 2025-05-05 17:08 UTC (permalink / raw)
  To: Mukesh Kumar Chaurasiya
  Cc: linux-kernel, maddy, mpe, npiggin, christophe.leroy, naveen,
	neeraj.upadhyay, vschneid, tglx, frederic, ankur.a.arora, sshegde,
	bigeasy, kees, oleg, peterz, tzimmermann, namcao, kan.liang,
	mcgrof, rppt, atrajeev, anjalik, coltonlewis, linuxppc-dev


Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com> writes:

> This is a syscall only implementation of generic entry/exit framework
> for framework for ppc. IRQ handling is not done in this RFC.
>
> This will break the ppc32 build as of now which will be fixed along with
> IRQ handling.
>
> Below are the performance benchmarks from perf bench basic syscall.
> This is for 1,00,00,000 getppid() calls
>
> | Metric     | Without Generic Framework | With Generic Framework |
> | ---------- | ------------------------- | ---------------------- |
> | Total time | 0.904 [sec]               | 0.856 [sec]            |
> | usecs/op   | 0.090403                  | 0.085638               |
> | ops/sec    | 1,10,61,579               | 1,16,77,086            |
>
> That's ~5% degradation as of now.

Is the table header inverted? That reads like a ~5% improvement with the
generic version.

Ankur

> Mukesh Kumar Chaurasiya (6):
>   powerpc: rename arch_irq_disabled_regs
>   powerpc: Prepare to build with genreic entry/exit framework
>   powerpc: introduce arch_enter_from_user_mode
>   powerpc: Add flag in paca for register restore state
>   powerpc: Introduce syscall exit arch functions
>   powerpc: Enable Generic Entry/Exit for syscalls.
>
>  arch/powerpc/Kconfig                    |   1 +
>  arch/powerpc/include/asm/entry-common.h | 158 ++++++++++++++++++++++++
>  arch/powerpc/include/asm/hw_irq.h       |   4 +-
>  arch/powerpc/include/asm/interrupt.h    | 117 +++++++++++++++++-
>  arch/powerpc/include/asm/paca.h         |   1 +
>  arch/powerpc/include/asm/stacktrace.h   |   8 ++
>  arch/powerpc/include/asm/syscall.h      |   5 +
>  arch/powerpc/include/asm/thread_info.h  |   1 +
>  arch/powerpc/kernel/interrupt.c         | 153 ++++++-----------------
>  arch/powerpc/kernel/ptrace/ptrace.c     | 103 ---------------
>  arch/powerpc/kernel/signal.c            |   8 ++
>  arch/powerpc/kernel/syscall.c           | 117 +-----------------
>  arch/powerpc/kernel/traps.c             |   2 +-
>  arch/powerpc/kernel/watchdog.c          |   2 +-
>  arch/powerpc/perf/core-book3s.c         |   2 +-
>  15 files changed, 336 insertions(+), 346 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/entry-common.h


--
ankur


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC V1 0/6] Generic Entry/Exit support for ppc64
  2025-05-05 17:08 ` Ankur Arora
@ 2025-05-05 17:18   ` Ankur Arora
  0 siblings, 0 replies; 12+ messages in thread
From: Ankur Arora @ 2025-05-05 17:18 UTC (permalink / raw)
  To: Ankur Arora
  Cc: Mukesh Kumar Chaurasiya, linux-kernel, maddy, mpe, npiggin,
	christophe.leroy, naveen, neeraj.upadhyay, vschneid, tglx,
	frederic, sshegde, bigeasy, kees, oleg, peterz, tzimmermann,
	namcao, kan.liang, mcgrof, rppt, atrajeev, anjalik, coltonlewis,
	linuxppc-dev


Ankur Arora <ankur.a.arora@oracle.com> writes:

> Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com> writes:
>
>> This is a syscall only implementation of generic entry/exit framework
>> for framework for ppc. IRQ handling is not done in this RFC.
>>
>> This will break the ppc32 build as of now which will be fixed along with
>> IRQ handling.
>>
>> Below are the performance benchmarks from perf bench basic syscall.
>> This is for 1,00,00,000 getppid() calls
>>
>> | Metric     | Without Generic Framework | With Generic Framework |
>> | ---------- | ------------------------- | ---------------------- |
>> | Total time | 0.904 [sec]               | 0.856 [sec]            |
>> | usecs/op   | 0.090403                  | 0.085638               |
>> | ops/sec    | 1,10,61,579               | 1,16,77,086            |
>>
>> That's ~5% degradation as of now.
>
> Is the table header inverted? That reads like a ~5% improvement with the
> generic version.

Please ignore. Just noticed your update down thread.

>> Mukesh Kumar Chaurasiya (6):
>>   powerpc: rename arch_irq_disabled_regs
>>   powerpc: Prepare to build with genreic entry/exit framework
>>   powerpc: introduce arch_enter_from_user_mode
>>   powerpc: Add flag in paca for register restore state
>>   powerpc: Introduce syscall exit arch functions
>>   powerpc: Enable Generic Entry/Exit for syscalls.
>>
>>  arch/powerpc/Kconfig                    |   1 +
>>  arch/powerpc/include/asm/entry-common.h | 158 ++++++++++++++++++++++++
>>  arch/powerpc/include/asm/hw_irq.h       |   4 +-
>>  arch/powerpc/include/asm/interrupt.h    | 117 +++++++++++++++++-
>>  arch/powerpc/include/asm/paca.h         |   1 +
>>  arch/powerpc/include/asm/stacktrace.h   |   8 ++
>>  arch/powerpc/include/asm/syscall.h      |   5 +
>>  arch/powerpc/include/asm/thread_info.h  |   1 +
>>  arch/powerpc/kernel/interrupt.c         | 153 ++++++-----------------
>>  arch/powerpc/kernel/ptrace/ptrace.c     | 103 ---------------
>>  arch/powerpc/kernel/signal.c            |   8 ++
>>  arch/powerpc/kernel/syscall.c           | 117 +-----------------
>>  arch/powerpc/kernel/traps.c             |   2 +-
>>  arch/powerpc/kernel/watchdog.c          |   2 +-
>>  arch/powerpc/perf/core-book3s.c         |   2 +-
>>  15 files changed, 336 insertions(+), 346 deletions(-)
>>  create mode 100644 arch/powerpc/include/asm/entry-common.h


--
ankur


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-05-05 17:56 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-28 15:22 [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
2025-04-28 15:22 ` [RFC V1 1/6] powerpc: rename arch_irq_disabled_regs Mukesh Kumar Chaurasiya
2025-04-28 15:22 ` [RFC V1 2/6] powerpc: Prepare to build with genreic entry/exit framework Mukesh Kumar Chaurasiya
2025-04-28 15:22 ` [RFC V1 3/6] powerpc: introduce arch_enter_from_user_mode Mukesh Kumar Chaurasiya
2025-04-28 15:22 ` [RFC V1 4/6] powerpc: Add flag in paca for register restore state Mukesh Kumar Chaurasiya
2025-04-28 15:22 ` [RFC V1 5/6] powerpc: Introduce syscall exit arch functions Mukesh Kumar Chaurasiya
2025-04-28 15:22 ` [RFC V1 6/6] powerpc: Enable Generic Entry/Exit for syscalls Mukesh Kumar Chaurasiya
2025-04-29  6:11   ` Shrikanth Hegde
2025-05-02  8:01     ` Mukesh Kumar Chaurasiya
2025-04-28 15:52 ` [RFC V1 0/6] Generic Entry/Exit support for ppc64 Mukesh Kumar Chaurasiya
2025-05-05 17:08 ` Ankur Arora
2025-05-05 17:18   ` Ankur Arora

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).