From: Peter Zijlstra <peterz@infradead.org>
To: mingo@kernel.org, tglx@linutronix.de, pjt@google.com,
tim.c.chen@linux.intel.com, torvalds@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, subhra.mazumdar@oracle.com,
fweisbec@gmail.com, keescook@chromium.org, kerrnel@google.com,
"Peter Zijlstra (Intel)" <peterz@infradead.org>
Subject: [RFC][PATCH 11/16] sched: Basic tracking of matching tasks
Date: Mon, 18 Feb 2019 17:56:31 +0100 [thread overview]
Message-ID: <20190218173514.549503978@infradead.org> (raw)
In-Reply-To: 20190218165620.383905466@infradead.org
Introduce task_struct::core_cookie as an opaque identifier for core
scheduling. When enabled; core scheduling will only allow matching
task to be on the core; where idle matches everything.
When task_struct::core_cookie is set (and core scheduling is enabled)
these tasks are indexed in a second RB-tree, first on cookie value
then on scheduling function, such that matching task selection always
finds the most elegible match.
NOTE: *shudder* at the overhead...
NOTE: *sigh*, a 3rd copy of the scheduling function; the alternative
is per class tracking of cookies and that just duplicates a lot of
stuff for no raisin (the 2nd copy lives in the rt-mutex PI code).
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/sched.h | 8 ++
kernel/sched/core.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 4 +
3 files changed, 156 insertions(+), 1 deletion(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -635,10 +635,16 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
+ struct sched_dl_entity dl;
+
+#ifdef CONFIG_SCHED_CORE
+ struct rb_node core_node;
+ unsigned long core_cookie;
+#endif
+
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif
- struct sched_dl_entity dl;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* List of struct preempt_notifier: */
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -64,6 +64,140 @@ int sysctl_sched_rt_runtime = 950000;
DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
+/* kernel prio, less is more */
+static inline int __task_prio(struct task_struct *p)
+{
+ if (p->sched_class == &stop_sched_class) /* trumps deadline */
+ return -2;
+
+ if (rt_prio(p->prio)) /* includes deadline */
+ return p->prio; /* [-1, 99] */
+
+ if (p->sched_class == &idle_sched_class)
+ return MAX_RT_PRIO + NICE_WIDTH; /* 140 */
+
+ return MAX_RT_PRIO + MAX_NICE; /* 120, squash fair */
+}
+
+/*
+ * l(a,b)
+ * le(a,b) := !l(b,a)
+ * g(a,b) := l(b,a)
+ * ge(a,b) := !l(a,b)
+ */
+
+/* real prio, less is less */
+static inline bool __prio_less(struct task_struct *a, struct task_struct *b, bool runtime)
+{
+ int pa = __task_prio(a), pb = __task_prio(b);
+
+ if (-pa < -pb)
+ return true;
+
+ if (-pb < -pa)
+ return false;
+
+ if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
+ return !dl_time_before(a->dl.deadline, b->dl.deadline);
+
+ if (pa == MAX_RT_PRIO + MAX_NICE && runtime) /* fair */
+ return !((s64)(a->se.vruntime - b->se.vruntime) < 0);
+
+ return false;
+}
+
+static inline bool cpu_prio_less(struct task_struct *a, struct task_struct *b)
+{
+ return __prio_less(a, b, true);
+}
+
+static inline bool core_prio_less(struct task_struct *a, struct task_struct *b)
+{
+ /* cannot compare vruntime across CPUs */
+ return __prio_less(a, b, false);
+}
+
+static inline bool __sched_core_less(struct task_struct *a, struct task_struct *b)
+{
+ if (a->core_cookie < b->core_cookie)
+ return true;
+
+ if (a->core_cookie > b->core_cookie)
+ return false;
+
+ /* flip prio, so high prio is leftmost */
+ if (cpu_prio_less(b, a))
+ return true;
+
+ return false;
+}
+
+void sched_core_enqueue(struct rq *rq, struct task_struct *p)
+{
+ struct rb_node *parent, **node;
+ struct task_struct *node_task;
+
+ rq->core->core_task_seq++;
+
+ if (!p->core_cookie)
+ return;
+
+ node = &rq->core_tree.rb_node;
+ parent = *node;
+
+ while (*node) {
+ node_task = container_of(*node, struct task_struct, core_node);
+ parent = *node;
+
+ if (__sched_core_less(p, node_task))
+ node = &parent->rb_left;
+ else
+ node = &parent->rb_right;
+ }
+
+ rb_link_node(&p->core_node, parent, node);
+ rb_insert_color(&p->core_node, &rq->core_tree);
+}
+
+void sched_core_dequeue(struct rq *rq, struct task_struct *p)
+{
+ rq->core->core_task_seq++;
+
+ if (!p->core_cookie)
+ return;
+
+ rb_erase(&p->core_node, &rq->core_tree);
+}
+
+/*
+ * Find left-most (aka, highest priority) task matching @cookie.
+ */
+struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
+{
+ struct rb_node *node = rq->core_tree.rb_node;
+ struct task_struct *node_task, *match;
+
+ /*
+ * The idle task always matches any cookie!
+ */
+ match = idle_sched_class.pick_task(rq);
+
+ while (node) {
+ node_task = container_of(node, struct task_struct, core_node);
+
+ if (node_task->core_cookie < cookie) {
+ node = node->rb_left;
+ } else if (node_task->core_cookie > cookie) {
+ node = node->rb_right;
+ } else {
+ match = node_task;
+ node = node->rb_left;
+ }
+ }
+
+ return match;
+}
+
/*
* The static-key + stop-machine variable are needed such that:
*
@@ -122,6 +256,11 @@ void sched_core_put(void)
mutex_unlock(&sched_core_mutex);
}
+#else /* !CONFIG_SCHED_CORE */
+
+static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
+static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
+
#endif /* CONFIG_SCHED_CORE */
/*
@@ -826,6 +965,9 @@ static void set_load_weight(struct task_
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
+ if (sched_core_enabled(rq))
+ sched_core_enqueue(rq, p);
+
if (!(flags & ENQUEUE_NOCLOCK))
update_rq_clock(rq);
@@ -839,6 +981,9 @@ static inline void enqueue_task(struct r
static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
+ if (sched_core_enabled(rq))
+ sched_core_dequeue(rq, p);
+
if (!(flags & DEQUEUE_NOCLOCK))
update_rq_clock(rq);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -957,6 +957,10 @@ struct rq {
/* per rq */
struct rq *core;
unsigned int core_enabled;
+ struct rb_root core_tree;
+
+ /* shared state */
+ unsigned int core_task_seq;
#endif
};
next prev parent reply other threads:[~2019-02-18 17:41 UTC|newest]
Thread overview: 99+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-18 16:56 [RFC][PATCH 00/16] sched: Core scheduling Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 01/16] stop_machine: Fix stop_cpus_in_progress ordering Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 02/16] sched: Fix kerneldoc comment for ia64_set_curr_task Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 03/16] sched: Wrap rq::lock access Peter Zijlstra
2019-02-19 16:13 ` Phil Auld
2019-02-19 16:22 ` Peter Zijlstra
2019-02-19 16:37 ` Phil Auld
2019-03-18 15:41 ` Julien Desfossez
2019-03-20 2:29 ` Subhra Mazumdar
2019-03-21 21:20 ` Julien Desfossez
2019-03-22 13:34 ` Peter Zijlstra
2019-03-22 20:59 ` Julien Desfossez
2019-03-23 0:06 ` Subhra Mazumdar
2019-03-27 1:02 ` Subhra Mazumdar
2019-03-29 13:35 ` Julien Desfossez
2019-03-29 22:23 ` Subhra Mazumdar
2019-04-01 21:35 ` Subhra Mazumdar
2019-04-03 20:16 ` Julien Desfossez
2019-04-05 1:30 ` Subhra Mazumdar
2019-04-02 7:42 ` Peter Zijlstra
2019-03-22 23:28 ` Tim Chen
2019-03-22 23:44 ` Tim Chen
2019-02-18 16:56 ` [RFC][PATCH 04/16] sched/{rt,deadline}: Fix set_next_task vs pick_next_task Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 05/16] sched: Add task_struct pointer to sched_class::set_curr_task Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 06/16] sched/fair: Export newidle_balance() Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 07/16] sched: Allow put_prev_task() to drop rq->lock Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 08/16] sched: Rework pick_next_task() slow-path Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 09/16] sched: Introduce sched_class::pick_task() Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 10/16] sched: Core-wide rq->lock Peter Zijlstra
2019-02-18 16:56 ` Peter Zijlstra [this message]
2019-02-18 16:56 ` [RFC][PATCH 12/16] sched: A quick and dirty cgroup tagging interface Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 13/16] sched: Add core wide task selection and scheduling Peter Zijlstra
[not found] ` <20190402064612.GA46500@aaronlu>
2019-04-02 8:28 ` Peter Zijlstra
2019-04-02 13:20 ` Aaron Lu
2019-04-05 14:55 ` Aaron Lu
2019-04-09 18:09 ` Tim Chen
2019-04-10 4:36 ` Aaron Lu
2019-04-10 14:18 ` Aubrey Li
2019-04-11 2:11 ` Aaron Lu
2019-04-10 14:44 ` Peter Zijlstra
2019-04-11 3:05 ` Aaron Lu
2019-04-11 9:19 ` Peter Zijlstra
2019-04-10 8:06 ` Peter Zijlstra
2019-04-10 19:58 ` Vineeth Remanan Pillai
2019-04-15 16:59 ` Julien Desfossez
2019-04-16 13:43 ` Aaron Lu
2019-04-09 18:38 ` Julien Desfossez
2019-04-10 15:01 ` Peter Zijlstra
2019-04-11 0:11 ` Subhra Mazumdar
2019-04-19 8:40 ` Ingo Molnar
2019-04-19 23:16 ` Subhra Mazumdar
2019-02-18 16:56 ` [RFC][PATCH 14/16] sched/fair: Add a few assertions Peter Zijlstra
2019-02-18 16:56 ` [RFC][PATCH 15/16] sched: Trivial forced-newidle balancer Peter Zijlstra
2019-02-21 16:19 ` Valentin Schneider
2019-02-21 16:41 ` Peter Zijlstra
2019-02-21 16:47 ` Peter Zijlstra
2019-02-21 18:28 ` Valentin Schneider
2019-04-04 8:31 ` Aubrey Li
2019-04-06 1:36 ` Aubrey Li
2019-02-18 16:56 ` [RFC][PATCH 16/16] sched: Debug bits Peter Zijlstra
2019-02-18 17:49 ` [RFC][PATCH 00/16] sched: Core scheduling Linus Torvalds
2019-02-18 20:40 ` Peter Zijlstra
2019-02-19 0:29 ` Linus Torvalds
2019-02-19 15:15 ` Ingo Molnar
2019-02-22 12:17 ` Paolo Bonzini
2019-02-22 14:20 ` Peter Zijlstra
2019-02-22 19:26 ` Tim Chen
2019-02-26 8:26 ` Aubrey Li
2019-02-27 7:54 ` Aubrey Li
2019-02-21 2:53 ` Subhra Mazumdar
2019-02-21 14:03 ` Peter Zijlstra
2019-02-21 18:44 ` Subhra Mazumdar
2019-02-22 0:34 ` Subhra Mazumdar
2019-02-22 12:45 ` Mel Gorman
2019-02-22 16:10 ` Mel Gorman
2019-03-08 19:44 ` Subhra Mazumdar
2019-03-11 4:23 ` Aubrey Li
2019-03-11 18:34 ` Subhra Mazumdar
2019-03-11 23:33 ` Subhra Mazumdar
2019-03-12 0:20 ` Greg Kerr
2019-03-12 0:47 ` Subhra Mazumdar
2019-03-12 7:33 ` Aaron Lu
2019-03-12 7:45 ` Aubrey Li
2019-03-13 5:55 ` Aubrey Li
2019-03-14 0:35 ` Tim Chen
2019-03-14 5:30 ` Aubrey Li
2019-03-14 6:07 ` Li, Aubrey
2019-03-18 6:56 ` Aubrey Li
2019-03-12 19:07 ` Pawan Gupta
2019-03-26 7:32 ` Aaron Lu
2019-03-26 7:56 ` Aaron Lu
2019-02-19 22:07 ` Greg Kerr
2019-02-20 9:42 ` Peter Zijlstra
2019-02-20 18:33 ` Greg Kerr
2019-02-22 14:10 ` Peter Zijlstra
2019-03-07 22:06 ` Paolo Bonzini
2019-02-20 18:43 ` Subhra Mazumdar
2019-03-01 2:54 ` Subhra Mazumdar
2019-03-14 15:28 ` Julien Desfossez
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190218173514.549503978@infradead.org \
--to=peterz@infradead.org \
--cc=fweisbec@gmail.com \
--cc=keescook@chromium.org \
--cc=kerrnel@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=pjt@google.com \
--cc=subhra.mazumdar@oracle.com \
--cc=tglx@linutronix.de \
--cc=tim.c.chen@linux.intel.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.