From: tip-bot for Peter Zijlstra <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: torvalds@linux-foundation.org, peterz@infradead.org,
linux-kernel@vger.kernel.org, cs.os.kernel@gmail.com,
hpa@zytor.com, tglx@linutronix.de, oleg@redhat.com,
mingo@kernel.org
Subject: [tip:sched/core] sched/core: Optimize __schedule()
Date: Thu, 22 Sep 2016 06:59:51 -0700 [thread overview]
Message-ID: <tip-9af6528ee9b682df7f29dbee86fbba0b67eab944@git.kernel.org> (raw)
In-Reply-To: <20160913163729.GB5012@twins.programming.kicks-ass.net>
Commit-ID: 9af6528ee9b682df7f29dbee86fbba0b67eab944
Gitweb: http://git.kernel.org/tip/9af6528ee9b682df7f29dbee86fbba0b67eab944
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 13 Sep 2016 18:37:29 +0200
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 22 Sep 2016 14:53:45 +0200
sched/core: Optimize __schedule()
Oleg noted that by making do_exit() use __schedule() for the TASK_DEAD
context switch, we can avoid the TASK_DEAD special case currently in
__schedule() because that avoids the extra preempt_disable() from
schedule().
In order to facilitate this, create a do_task_dead() helper which we
place in the scheduler code, such that it can access __schedule().
Also add some __noreturn annotations to the functions, there's no
coming back from do_exit().
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Cheng Chao <cs.os.kernel@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: chris@chris-wilson.co.uk
Cc: tj@kernel.org
Link: http://lkml.kernel.org/r/20160913163729.GB5012@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
include/linux/kernel.h | 9 +++------
include/linux/sched.h | 2 ++
kernel/exit.c | 26 ++------------------------
kernel/sched/core.c | 38 +++++++++++++++++++++++++++-----------
4 files changed, 34 insertions(+), 41 deletions(-)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a611..74fd6f0 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -259,17 +259,14 @@ static inline void might_fault(void) { }
extern struct atomic_notifier_head panic_notifier_list;
extern long (*panic_blink)(int state);
__printf(1, 2)
-void panic(const char *fmt, ...)
- __noreturn __cold;
+void panic(const char *fmt, ...) __noreturn __cold;
void nmi_panic(struct pt_regs *regs, const char *msg);
extern void oops_enter(void);
extern void oops_exit(void);
void print_oops_end_marker(void);
extern int oops_may_print(void);
-void do_exit(long error_code)
- __noreturn;
-void complete_and_exit(struct completion *, long)
- __noreturn;
+void do_exit(long error_code) __noreturn;
+void complete_and_exit(struct completion *, long) __noreturn;
/* Internal, do not use. */
int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d750240..f00ee8e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@ static inline void io_schedule(void)
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
}
+void __noreturn do_task_dead(void);
+
struct nsproxy;
struct user_namespace;
diff --git a/kernel/exit.c b/kernel/exit.c
index 091a78b..1e1d913 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@ static void check_stack_usage(void)
static inline void check_stack_usage(void) {}
#endif
-void do_exit(long code)
+void __noreturn do_exit(long code)
{
struct task_struct *tsk = current;
int group_dead;
@@ -882,29 +882,7 @@ void do_exit(long code)
exit_rcu();
TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
- /*
- * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
- * when the following two conditions become true.
- * - There is race condition of mmap_sem (It is acquired by
- * exit_mm()), and
- * - SMI occurs before setting TASK_RUNINNG.
- * (or hypervisor of virtual machine switches to other guest)
- * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
- *
- * To avoid it, we have to wait for releasing tsk->pi_lock which
- * is held by try_to_wake_up()
- */
- smp_mb();
- raw_spin_unlock_wait(&tsk->pi_lock);
-
- /* causes final put_task_struct in finish_task_switch(). */
- tsk->state = TASK_DEAD;
- tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
- schedule();
- BUG();
- /* Avoid "noreturn function does return". */
- for (;;)
- cpu_relax(); /* For when BUG is null */
+ do_task_dead();
}
EXPORT_SYMBOL_GPL(do_exit);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ff4e3c0..b2ec53c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3331,17 +3331,6 @@ static void __sched notrace __schedule(bool preempt)
rq = cpu_rq(cpu);
prev = rq->curr;
- /*
- * do_exit() calls schedule() with preemption disabled as an exception;
- * however we must fix that up, otherwise the next task will see an
- * inconsistent (higher) preempt count.
- *
- * It also avoids the below schedule_debug() test from complaining
- * about this.
- */
- if (unlikely(prev->state == TASK_DEAD))
- preempt_enable_no_resched_notrace();
-
schedule_debug(prev);
if (sched_feat(HRTICK))
@@ -3409,6 +3398,33 @@ static void __sched notrace __schedule(bool preempt)
}
STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
+void __noreturn do_task_dead(void)
+{
+ /*
+ * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+ * when the following two conditions become true.
+ * - There is race condition of mmap_sem (It is acquired by
+ * exit_mm()), and
+ * - SMI occurs before setting TASK_RUNINNG.
+ * (or hypervisor of virtual machine switches to other guest)
+ * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
+ *
+ * To avoid it, we have to wait for releasing tsk->pi_lock which
+ * is held by try_to_wake_up()
+ */
+ smp_mb();
+ raw_spin_unlock_wait(¤t->pi_lock);
+
+ /* causes final put_task_struct in finish_task_switch(). */
+ __set_current_state(TASK_DEAD);
+ current->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
+ __schedule(false);
+ BUG();
+ /* Avoid "noreturn function does return". */
+ for (;;)
+ cpu_relax(); /* For when BUG is null */
+}
+
static inline void sched_submit_work(struct task_struct *tsk)
{
if (!tsk->state || tsk_is_pi_blocked(tsk))
next prev parent reply other threads:[~2016-09-22 14:00 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <OF73BCE1E4.D2224FEC-ON48258025.0022CFA7-48258025.0022E3DC@kedacom.com>
2016-09-09 8:13 ` [PATCH v2] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE Cheng Chao
2016-09-09 8:19 ` chengchao
2016-09-09 13:14 ` Oleg Nesterov
2016-09-09 16:24 ` Peter Zijlstra
2016-09-10 8:52 ` [PATCH v3] " Cheng Chao
2016-09-10 9:51 ` Cheng Chao
2016-09-10 16:33 ` Peter Zijlstra
2016-09-12 11:03 ` Oleg Nesterov
2016-09-13 2:45 ` Cheng Chao
2016-09-12 11:37 ` Peter Zijlstra
2016-09-12 11:41 ` Peter Zijlstra
2016-09-12 13:05 ` Oleg Nesterov
2016-09-12 15:01 ` Peter Zijlstra
2016-09-13 16:14 ` Oleg Nesterov
2016-09-13 16:37 ` Peter Zijlstra
2016-09-14 2:07 ` Cheng Chao
2016-09-14 7:50 ` Peter Zijlstra
2016-09-14 15:45 ` Oleg Nesterov
2016-09-22 13:59 ` tip-bot for Peter Zijlstra [this message]
2016-09-13 4:03 ` Cheng Chao
2016-09-13 8:45 ` Peter Zijlstra
2016-09-14 2:01 ` [PATCH v4] stop_machine: Avoid a sleep and wakeup in the stop_one_cpu() Cheng Chao
2016-09-14 15:53 ` Oleg Nesterov
2016-09-18 2:07 ` Cheng Chao
2016-09-22 13:59 ` [tip:sched/core] stop_machine: Avoid a sleep and wakeup in stop_one_cpu() tip-bot for Cheng Chao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tip-9af6528ee9b682df7f29dbee86fbba0b67eab944@git.kernel.org \
--to=tipbot@zytor.com \
--cc=cs.os.kernel@gmail.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=oleg@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).