From: Sven Schnelle <svens@linux.ibm.com>
To: Thomas Gleixner <tglx@linutronix.de>,
Peter Zijlstra <peterz@infradead.org>,
Frederic Weisbecker <frederic@kernel.org>,
"Eric W . Biederman" <ebiederm@xmission.com>,
Mark Rutland <mark.rutland@arm.com>,
Andy Lutomirski <luto@kernel.org>
Cc: linux-s390@vger.kernel.org, hca@linux.ibm.com,
linux-kernel@vger.kernel.org
Subject: [PATCH 1/2] entry: move the exit path to header files
Date: Tue, 16 May 2023 15:38:09 +0200 [thread overview]
Message-ID: <20230516133810.171487-2-svens@linux.ibm.com> (raw)
In-Reply-To: <20230516133810.171487-1-svens@linux.ibm.com>
In order to allow inlining the generic entry C functions,
move them to include/linux/entry-common.h.
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
---
include/linux/entry-common.h | 182 ++++++++++++++++++++++++++++++++++-
kernel/entry/common.c | 168 --------------------------------
2 files changed, 179 insertions(+), 171 deletions(-)
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index d95ab85f96ba..b409fbcbd3ac 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -7,8 +7,13 @@
#include <linux/syscalls.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
-
+#include <linux/livepatch.h>
#include <asm/entry-common.h>
+#include <linux/context_tracking.h>
+#include <linux/resume_user_mode.h>
+#include <linux/tick.h>
+
+#include <trace/events/syscalls.h>
/*
* Define dummy _TIF work flags if not defined by the architecture or for
@@ -291,7 +296,7 @@ void exit_to_user_mode(void);
* make the final state transitions. Interrupts must stay disabled between
* return from this function and the invocation of exit_to_user_mode().
*/
-void syscall_exit_to_user_mode_work(struct pt_regs *regs);
+static void syscall_exit_to_user_mode_work(struct pt_regs *regs);
/**
* syscall_exit_to_user_mode - Handle work before returning to user mode
@@ -350,7 +355,7 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs);
* Interrupt exit is not invoking #1 which is the syscall specific one time
* work.
*/
-void irqentry_exit_to_user_mode(struct pt_regs *regs);
+static void irqentry_exit_to_user_mode(struct pt_regs *regs);
#ifndef irqentry_state
/**
@@ -465,4 +470,175 @@ irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
*/
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
+static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ /*
+ * Before returning to user space ensure that all pending work
+ * items have been completed.
+ */
+ while (ti_work & EXIT_TO_USER_MODE_WORK) {
+
+ local_irq_enable_exit_to_user(ti_work);
+
+ if (ti_work & _TIF_NEED_RESCHED)
+ schedule();
+
+ if (ti_work & _TIF_UPROBE)
+ uprobe_notify_resume(regs);
+
+ if (ti_work & _TIF_PATCH_PENDING)
+ klp_update_patch_state(current);
+
+ if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
+ arch_do_signal_or_restart(regs);
+
+ if (ti_work & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
+
+ /* Architecture specific TIF work */
+ arch_exit_to_user_mode_work(regs, ti_work);
+
+ /*
+ * Disable interrupts and reevaluate the work flags as they
+ * might have changed while interrupts and preemption was
+ * enabled above.
+ */
+ local_irq_disable_exit_to_user();
+
+ /* Check if any of the above work has queued a deferred wakeup */
+ tick_nohz_user_enter_prepare();
+
+ ti_work = read_thread_flags();
+ }
+
+ /* Return the latest work state for arch_exit_to_user_mode() */
+ return ti_work;
+}
+
+
+static void exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ unsigned long ti_work;
+
+ lockdep_assert_irqs_disabled();
+
+ /* Flush pending rcuog wakeup before the last need_resched() check */
+ tick_nohz_user_enter_prepare();
+
+ ti_work = read_thread_flags();
+ if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
+ ti_work = exit_to_user_mode_loop(regs, ti_work);
+
+ arch_exit_to_user_mode_prepare(regs, ti_work);
+
+ /* Ensure that the address limit is intact and no locks are held */
+ addr_limit_user_check();
+ kmap_assert_nomap();
+ lockdep_assert_irqs_disabled();
+ lockdep_sys_exit();
+}
+
+/*
+ * If SYSCALL_EMU is set, then the only reason to report is when
+ * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
+ * instruction has been already reported in syscall_enter_from_user_mode().
+ */
+static inline bool report_single_step(unsigned long work)
+{
+ if (work & SYSCALL_WORK_SYSCALL_EMU)
+ return false;
+
+ return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
+}
+
+static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
+{
+ bool step;
+
+ /*
+ * If the syscall was rolled back due to syscall user dispatching,
+ * then the tracers below are not invoked for the same reason as
+ * the entry side was not invoked in syscall_trace_enter(): The ABI
+ * of these syscalls is unknown.
+ */
+ if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
+ if (unlikely(current->syscall_dispatch.on_dispatch)) {
+ current->syscall_dispatch.on_dispatch = false;
+ return;
+ }
+ }
+
+ audit_syscall_exit(regs);
+
+ if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
+ trace_sys_exit(regs, syscall_get_return_value(current, regs));
+
+ step = report_single_step(work);
+ if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
+ ptrace_report_syscall_exit(regs, step);
+}
+
+/*
+ * Syscall specific exit to user mode preparation. Runs with interrupts
+ * enabled.
+ */
+static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+ unsigned long nr = syscall_get_nr(current, regs);
+
+ CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+ if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
+ local_irq_enable();
+ }
+
+ rseq_syscall(regs);
+
+ /*
+ * Do one-time syscall specific work. If these work items are
+ * enabled, we want to run them exactly once per syscall exit with
+ * interrupts enabled.
+ */
+ if (unlikely(work & SYSCALL_WORK_EXIT))
+ syscall_exit_work(regs, work);
+}
+
+static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
+{
+ syscall_exit_to_user_mode_prepare(regs);
+ local_irq_disable_exit_to_user();
+ exit_to_user_mode_prepare(regs);
+}
+
+static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
+{
+ __syscall_exit_to_user_mode_work(regs);
+}
+
+/* See comment for exit_to_user_mode() in entry-common.h */
+static __always_inline void __exit_to_user_mode(void)
+{
+ instrumentation_begin();
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+
+ user_enter_irqoff();
+ arch_exit_to_user_mode();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+
+static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs)
+{
+ instrumentation_begin();
+ exit_to_user_mode_prepare(regs);
+ instrumentation_end();
+ __exit_to_user_mode();
+}
+
+
#endif
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index be61332c66b5..66af971c3fe4 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -123,19 +123,6 @@ noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
instrumentation_end();
}
-/* See comment for exit_to_user_mode() in entry-common.h */
-static __always_inline void __exit_to_user_mode(void)
-{
- instrumentation_begin();
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- instrumentation_end();
-
- user_enter_irqoff();
- arch_exit_to_user_mode();
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
void noinstr exit_to_user_mode(void)
{
__exit_to_user_mode();
@@ -144,153 +131,6 @@ void noinstr exit_to_user_mode(void)
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
-static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
- unsigned long ti_work)
-{
- /*
- * Before returning to user space ensure that all pending work
- * items have been completed.
- */
- while (ti_work & EXIT_TO_USER_MODE_WORK) {
-
- local_irq_enable_exit_to_user(ti_work);
-
- if (ti_work & _TIF_NEED_RESCHED)
- schedule();
-
- if (ti_work & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- if (ti_work & _TIF_PATCH_PENDING)
- klp_update_patch_state(current);
-
- if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
- arch_do_signal_or_restart(regs);
-
- if (ti_work & _TIF_NOTIFY_RESUME)
- resume_user_mode_work(regs);
-
- /* Architecture specific TIF work */
- arch_exit_to_user_mode_work(regs, ti_work);
-
- /*
- * Disable interrupts and reevaluate the work flags as they
- * might have changed while interrupts and preemption was
- * enabled above.
- */
- local_irq_disable_exit_to_user();
-
- /* Check if any of the above work has queued a deferred wakeup */
- tick_nohz_user_enter_prepare();
-
- ti_work = read_thread_flags();
- }
-
- /* Return the latest work state for arch_exit_to_user_mode() */
- return ti_work;
-}
-
-static void exit_to_user_mode_prepare(struct pt_regs *regs)
-{
- unsigned long ti_work;
-
- lockdep_assert_irqs_disabled();
-
- /* Flush pending rcuog wakeup before the last need_resched() check */
- tick_nohz_user_enter_prepare();
-
- ti_work = read_thread_flags();
- if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
- ti_work = exit_to_user_mode_loop(regs, ti_work);
-
- arch_exit_to_user_mode_prepare(regs, ti_work);
-
- /* Ensure that the address limit is intact and no locks are held */
- addr_limit_user_check();
- kmap_assert_nomap();
- lockdep_assert_irqs_disabled();
- lockdep_sys_exit();
-}
-
-/*
- * If SYSCALL_EMU is set, then the only reason to report is when
- * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
- * instruction has been already reported in syscall_enter_from_user_mode().
- */
-static inline bool report_single_step(unsigned long work)
-{
- if (work & SYSCALL_WORK_SYSCALL_EMU)
- return false;
-
- return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
-}
-
-static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
-{
- bool step;
-
- /*
- * If the syscall was rolled back due to syscall user dispatching,
- * then the tracers below are not invoked for the same reason as
- * the entry side was not invoked in syscall_trace_enter(): The ABI
- * of these syscalls is unknown.
- */
- if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
- if (unlikely(current->syscall_dispatch.on_dispatch)) {
- current->syscall_dispatch.on_dispatch = false;
- return;
- }
- }
-
- audit_syscall_exit(regs);
-
- if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
- trace_sys_exit(regs, syscall_get_return_value(current, regs));
-
- step = report_single_step(work);
- if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
- ptrace_report_syscall_exit(regs, step);
-}
-
-/*
- * Syscall specific exit to user mode preparation. Runs with interrupts
- * enabled.
- */
-static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
-{
- unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
- unsigned long nr = syscall_get_nr(current, regs);
-
- CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
-
- if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
- if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
- local_irq_enable();
- }
-
- rseq_syscall(regs);
-
- /*
- * Do one-time syscall specific work. If these work items are
- * enabled, we want to run them exactly once per syscall exit with
- * interrupts enabled.
- */
- if (unlikely(work & SYSCALL_WORK_EXIT))
- syscall_exit_work(regs, work);
-}
-
-static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
-{
- syscall_exit_to_user_mode_prepare(regs);
- local_irq_disable_exit_to_user();
- exit_to_user_mode_prepare(regs);
-}
-
-void syscall_exit_to_user_mode_work(struct pt_regs *regs)
-{
- __syscall_exit_to_user_mode_work(regs);
-}
-
__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
{
instrumentation_begin();
@@ -304,14 +144,6 @@ noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
__enter_from_user_mode(regs);
}
-noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
-{
- instrumentation_begin();
- exit_to_user_mode_prepare(regs);
- instrumentation_end();
- __exit_to_user_mode();
-}
-
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
{
irqentry_state_t ret = {
--
2.39.2
next prev parent reply other threads:[~2023-05-16 13:40 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-16 13:38 [RFC 0/2] allow to inline generic entry Sven Schnelle
2023-05-16 13:38 ` Sven Schnelle [this message]
2023-05-16 16:42 ` [PATCH 1/2] entry: move the exit path to header files Peter Zijlstra
2023-05-16 20:20 ` Eric W. Biederman
2023-05-17 5:45 ` Sven Schnelle
2023-05-16 16:52 ` kernel test robot
2023-05-16 18:33 ` kernel test robot
2023-05-16 19:01 ` kernel test robot
2023-05-16 13:38 ` [PATCH 2/2] entry: move the enter " Sven Schnelle
2023-05-16 19:55 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230516133810.171487-2-svens@linux.ibm.com \
--to=svens@linux.ibm.com \
--cc=ebiederm@xmission.com \
--cc=frederic@kernel.org \
--cc=hca@linux.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mark.rutland@arm.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.