[RFC v4 2/6] sched/deadline: improve the tracking of active utilization

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Luca Abeni <luca.abeni@unitn.it>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Juri Lelli <juri.lelli@arm.com>,
	Claudio Scordino <claudio@evidence.eu.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Tommaso Cucinotta <tommaso.cucinotta@sssup.it>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Luca Abeni <luca.abeni@unitn.it>
Subject: [RFC v4 2/6] sched/deadline: improve the tracking of active utilization
Date: Fri, 30 Dec 2016 12:33:07 +0100	[thread overview]
Message-ID: <1483097591-3871-3-git-send-email-lucabe72@gmail.com> (raw)
In-Reply-To: <1483097591-3871-1-git-send-email-lucabe72@gmail.com>

From: Luca Abeni <luca.abeni@unitn.it>

This patch implements a more theoretically sound algorithm for
tracking active utilization: instead of decreasing it when a
task blocks, use a timer (the "inactive timer", named after the
"Inactive" task state of the GRUB algorithm) to decrease the
active utilization at the so called "0-lag time".

Signed-off-by: Luca Abeni <luca.abeni@unitn.it>
---
 include/linux/sched.h   |  18 +++++-
 kernel/sched/core.c     |   2 +
 kernel/sched/deadline.c | 150 ++++++++++++++++++++++++++++++++++++++++++++----
 kernel/sched/sched.h    |   1 +
 4 files changed, 158 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4d19052..f34633c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1451,14 +1451,30 @@ struct sched_dl_entity {
 	 *
 	 * @dl_yielded tells if task gave up the cpu before consuming
 	 * all its available runtime during the last job.
+	 *
+	 * @dl_non_contending tells if task is inactive while still
+	 * contributing to the active utilization. In other words, it
+	 * indicates if the inactive timer has been armed and its handler
+	 * has not been executed yet. This flag is useful to avoid race
+	 * conditions between the inactive timer handler and the wakeup
+	 * code.
 	 */
-	int dl_throttled, dl_boosted, dl_yielded;
+	int dl_throttled, dl_boosted, dl_yielded, dl_non_contending;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
 	struct hrtimer dl_timer;
+
+	/*
+	 * Inactive timer, responsible for decreasing the active utilization
+	 * at the "0-lag time". When a -deadline task blocks, it contributes
+	 * to GRUB's active utilization until the "0-lag time", hence a
+	 * timer is needed to decrease the active utilization at the correct
+	 * time.
+	 */
+	struct hrtimer inactive_timer;
 };
 
 union rcu_special {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c56fb57..98f9944 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2187,6 +2187,7 @@ void __dl_clear_params(struct task_struct *p)
 
 	dl_se->dl_throttled = 0;
 	dl_se->dl_yielded = 0;
+	dl_se->dl_non_contending = 0;
 }
 
 /*
@@ -2218,6 +2219,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	RB_CLEAR_NODE(&p->dl.rb_node);
 	init_dl_task_timer(&p->dl);
+	init_inactive_task_timer(&p->dl);
 	__dl_clear_params(p);
 
 	INIT_LIST_HEAD(&p->rt.run_list);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 23c840e..cdb7274 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -65,6 +65,46 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 		dl_rq->running_bw = 0;
 }
 
+static void task_go_inactive(struct task_struct *p)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+	struct hrtimer *timer = &dl_se->inactive_timer;
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+	struct rq *rq = rq_of_dl_rq(dl_rq);
+	s64 zerolag_time;
+
+	WARN_ON(dl_se->dl_runtime == 0);
+
+	WARN_ON(hrtimer_active(&dl_se->inactive_timer));
+	WARN_ON(dl_se->dl_non_contending);
+
+	zerolag_time = dl_se->deadline -
+		 div64_long((dl_se->runtime * dl_se->dl_period),
+			dl_se->dl_runtime);
+
+	/*
+	 * Using relative times instead of the absolute "0-lag time"
+	 * allows to simplify the code
+	 */
+	zerolag_time -= rq_clock(rq);
+
+	/*
+	 * If the "0-lag time" already passed, decrease the active
+	 * utilization now, instead of starting a timer
+	 */
+	if (zerolag_time < 0) {
+		sub_running_bw(dl_se, dl_rq);
+		if (!dl_task(p))
+			__dl_clear_params(p);
+
+		return;
+	}
+
+	dl_se->dl_non_contending = 1;
+	get_task_struct(p);
+	hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
+}
+
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
@@ -610,10 +650,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 	 * The task might have changed its scheduling policy to something
 	 * different than SCHED_DEADLINE (through switched_from_dl()).
 	 */
-	if (!dl_task(p)) {
-		__dl_clear_params(p);
+	if (!dl_task(p))
 		goto unlock;
-	}
 
 	/*
 	 * The task might have been boosted by someone else and might be in the
@@ -800,6 +838,48 @@ static void update_curr_dl(struct rq *rq)
 	}
 }
 
+static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+{
+	struct sched_dl_entity *dl_se = container_of(timer,
+						     struct sched_dl_entity,
+						     inactive_timer);
+	struct task_struct *p = dl_task_of(dl_se);
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(p, &rf);
+
+	if (!dl_task(p) || p->state == TASK_DEAD) {
+		if (p->state == TASK_DEAD && dl_se->dl_non_contending)
+			sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
+
+		__dl_clear_params(p);
+
+		goto unlock;
+	}
+	if (dl_se->dl_non_contending == 0)
+		goto unlock;
+
+	sched_clock_tick();
+	update_rq_clock(rq);
+
+	sub_running_bw(dl_se, &rq->dl);
+	dl_se->dl_non_contending = 0;
+unlock:
+	task_rq_unlock(rq, p, &rf);
+	put_task_struct(p);
+
+	return HRTIMER_NORESTART;
+}
+
+void init_inactive_task_timer(struct sched_dl_entity *dl_se)
+{
+	struct hrtimer *timer = &dl_se->inactive_timer;
+
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer->function = inactive_task_timer;
+}
+
 #ifdef CONFIG_SMP
 
 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
@@ -934,7 +1014,28 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
 	if (flags & ENQUEUE_WAKEUP) {
 		struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 
-		add_running_bw(dl_se, dl_rq);
+		if (dl_se->dl_non_contending) {
+			/*
+			 * If the timer handler is currently running and the
+			 * timer cannot be cancelled, inactive_task_timer()
+			 * will see that dl_not_contending is not set, and
+			 * will do nothing, so we are still safe.
+			 */
+			if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
+				put_task_struct(dl_task_of(dl_se));
+			WARN_ON(dl_task_of(dl_se)->nr_cpus_allowed > 1);
+			dl_se->dl_non_contending = 0;
+		} else {
+			/*
+			 * Since "dl_non_contending" is not set, the
+			 * task's utilization has already been removed from
+			 * active utilization (either when the task blocked,
+			 * when the "inactive timer" fired, or when it has
+			 * been cancelled in select_task_rq_dl()).
+			 * So, add it back.
+			 */
+			add_running_bw(dl_se, dl_rq);
+		}
 		update_dl_entity(dl_se, pi_se);
 	}
 	else if (flags & ENQUEUE_REPLENISH)
@@ -1023,7 +1124,7 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	 * or "inactive")
 	 */
 	if (flags & DEQUEUE_SLEEP)
-		sub_running_bw(&p->dl, &rq->dl);
+		task_go_inactive(p);
 }
 
 /*
@@ -1097,6 +1198,22 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 	}
 	rcu_read_unlock();
 
+	rq = task_rq(p);
+	raw_spin_lock(&rq->lock);
+	if (p->dl.dl_non_contending) {
+		sub_running_bw(&p->dl, &rq->dl);
+		p->dl.dl_non_contending = 0;
+		/*
+		 * If the timer handler is currently running and the
+		 * timer cannot be cancelled, inactive_task_timer()
+		 * will see that dl_not_contending is not set, and
+		 * will do nothing, so we are still safe.
+		 */
+		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+			put_task_struct(p);
+	}
+	raw_spin_unlock(&rq->lock);
+
 out:
 	return cpu;
 }
@@ -1743,16 +1860,25 @@ void __init init_sched_dl_class(void)
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
-	 * Start the deadline timer; if we switch back to dl before this we'll
-	 * continue consuming our current CBS slice. If we stay outside of
-	 * SCHED_DEADLINE until the deadline passes, the timer will reset the
-	 * task.
+	 * task_go_inactive() can start the "inactive timer" (if the 0-lag
+	 * time is in the future). If the task switches back to dl before
+	 * the "inactive timer" fires, it can continue to consume its current
+	 * runtime using its current deadline. If it stays outside of
+	 * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
+	 * will reset the task parameters.
 	 */
-	if (!start_dl_timer(p))
-		__dl_clear_params(p);
+	if (task_on_rq_queued(p) && p->dl.dl_runtime)
+		task_go_inactive(p);
 
-	if (task_on_rq_queued(p))
+	/*
+	 * We cannot use inactive_task_timer() to invoke sub_running_bw()
+	 * at the 0-lag time, because the task could have been migrated
+	 * while SCHED_OTHER in the meanwhile.
+	 */
+	if (p->dl.dl_non_contending) {
 		sub_running_bw(&p->dl, &rq->dl);
+		p->dl.dl_non_contending = 0;
+	}
 
 	/*
 	 * Since this might be the only -deadline task on the rq,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0659772..e422803 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1367,6 +1367,7 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime
 extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+extern void init_inactive_task_timer(struct sched_dl_entity *dl_se);
 
 unsigned long to_ratio(u64 period, u64 runtime);
 
-- 
2.7.4

next prev parent reply	other threads:[~2016-12-30 11:34 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-30 11:33 [RFC v4 0/6] CPU reclaiming for SCHED_DEADLINE Luca Abeni
2016-12-30 11:33 ` [RFC v4 1/6] sched/deadline: track the active utilization Luca Abeni
2016-12-30 11:33 ` Luca Abeni [this message]
2017-01-11 17:05   ` [RFC v4 2/6] sched/deadline: improve the tracking of " Juri Lelli
2017-01-11 21:22     ` luca abeni
2016-12-30 11:33 ` [RFC v4 3/6] sched/deadline: fix the update of the total -deadline utilization Luca Abeni
2016-12-30 11:33 ` [RFC v4 4/6] sched/deadline: implement GRUB accounting Luca Abeni
2016-12-30 11:33 ` [RFC v4 5/6] sched/deadline: do not reclaim the whole CPU bandwidth Luca Abeni
2016-12-30 11:33 ` [RFC v4 6/6] sched/deadline: make GRUB a task's flag Luca Abeni
2017-01-03 18:58 ` [RFC v4 0/6] CPU reclaiming for SCHED_DEADLINE Daniel Bristot de Oliveira
2017-01-03 21:33   ` luca abeni
2017-01-04 12:17   ` luca abeni
2017-01-04 15:14     ` Daniel Bristot de Oliveira
2017-01-04 16:42       ` Luca Abeni
2017-01-04 18:00         ` Daniel Bristot de Oliveira
2017-01-04 18:30           ` Luca Abeni
2017-01-11 12:19             ` Juri Lelli
2017-01-11 12:39               ` Luca Abeni
2017-01-11 15:06                 ` Juri Lelli
2017-01-11 21:16                   ` luca abeni

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4d19052 dfblob:f34633c dfblob:c56fb57 dfblob:98f9944
dfblob:23c840e dfblob:cdb7274 dfblob:0659772 dfblob:e422803 )
 OR (
bs:"[RFC v4 2/6] sched/deadline: improve the tracking of active utilization" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1483097591-3871-3-git-send-email-lucabe72@gmail.com \
    --to=luca.abeni@unitn.it \
    --cc=bristot@redhat.com \
    --cc=claudio@evidence.eu.com \
    --cc=juri.lelli@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tommaso.cucinotta@sssup.it \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.