Re: [PATCH v3 0/5] sync a se with its cfs_rq when att(det)aching it

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: byungchul.park@lge.com
Cc: mingo@kernel.org, linux-kernel@vger.kernel.org, yuyang.du@intel.com
Subject: Re: [PATCH v3 0/5] sync a se with its cfs_rq when att(det)aching it
Date: Thu, 20 Aug 2015 03:17:21 +0200	[thread overview]
Message-ID: <20150820011721.GA3138@worktop.event.rightround.com> (raw)
In-Reply-To: <1439966836-11266-1-git-send-email-byungchul.park@lge.com>


I did something like this on top.. please have a look at the XXX and
integrate.

---

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2664,8 +2664,8 @@ static inline u64 cfs_rq_clock_task(stru
 /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
 static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 {
-	int decayed;
 	struct sched_avg *sa = &cfs_rq->avg;
+	int decayed;
 
 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
 		long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
@@ -2695,15 +2695,16 @@ static inline int update_cfs_rq_load_avg
 static inline void update_load_avg(struct sched_entity *se, int update_tg)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	int cpu = cpu_of(rq_of(cfs_rq));
 	u64 now = cfs_rq_clock_task(cfs_rq);
+	int cpu = cpu_of(rq_of(cfs_rq));
 
 	/*
 	 * Track task load average for carrying it to new CPU after migrated, and
 	 * track group sched_entity load average for task_h_load calc in migration
 	 */
 	__update_load_avg(now, cpu, &se->avg,
-		se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL);
+			  se->on_rq * scale_load_down(se->load.weight),
+			  cfs_rq->curr == se, NULL);
 
 	if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
 		update_tg_load_avg(cfs_rq, 0);
@@ -2718,9 +2719,10 @@ static void attach_entity_load_avg(struc
 	 * update here. we have to update it with prev cfs_rq just before changing
 	 * se's cfs_rq, and get here soon.
 	 */
-	if (se->avg.last_update_time)
+	if (se->avg.last_update_time) {
 		__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
-				&se->avg, 0, 0, NULL);
+				  &se->avg, 0, 0, NULL);
+	}
 
 	se->avg.last_update_time = cfs_rq->avg.last_update_time;
 	cfs_rq->avg.load_avg += se->avg.load_avg;
@@ -2732,17 +2734,13 @@ static void attach_entity_load_avg(struc
 static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
-			&se->avg, se->on_rq * scale_load_down(se->load.weight),
-			cfs_rq->curr == se, NULL);
+			  &se->avg, se->on_rq * scale_load_down(se->load.weight),
+			  cfs_rq->curr == se, NULL);
 
-	cfs_rq->avg.load_avg =
-		max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
-	cfs_rq->avg.load_sum =
-		max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
-	cfs_rq->avg.util_avg =
-		max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
-	cfs_rq->avg.util_sum =
-		max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+	cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
+	cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
+	cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
+	cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
 }
 
 /* Add the load generated by se into cfs_rq's load average */
@@ -2751,14 +2749,14 @@ enqueue_entity_load_avg(struct cfs_rq *c
 {
 	struct sched_avg *sa = &se->avg;
 	u64 now = cfs_rq_clock_task(cfs_rq);
-	int migrated = 0, decayed;
+	int migrated, decayed;
 
-	if (sa->last_update_time == 0)
-		migrated = 1;
-	else
+	migrated = !sa->last_update_time;
+	if (!migrated) {
 		__update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
 			se->on_rq * scale_load_down(se->load.weight),
 			cfs_rq->curr == se, NULL);
+	}
 
 	decayed = update_cfs_rq_load_avg(now, cfs_rq);
 
@@ -2781,7 +2779,7 @@ dequeue_entity_load_avg(struct cfs_rq *c
 	cfs_rq->runnable_load_avg =
 		max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
 	cfs_rq->runnable_load_sum =
-		max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
+		max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
 }
 
 /*
@@ -2849,6 +2847,11 @@ static inline void
 dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void remove_entity_load_avg(struct sched_entity *se) {}
 
+static inline void
+attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+static inline void
+detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+
 static inline int idle_balance(struct rq *rq)
 {
 	return 0;
@@ -7915,29 +7918,50 @@ prio_changed_fair(struct rq *rq, struct
 		check_preempt_curr(rq, p, 0);
 }
 
-static void detach_task_cfs_rq(struct task_struct *p, int queued)
+static inline bool vruntime_normalized(struct task_struct *p)
 {
+	int queued = task_on_rq_queued(p);
 	struct sched_entity *se = &p->se;
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
 	/*
-	 * Ensure the task's vruntime is normalized, so that after attaching
-	 * back it to a cfs_rq the enqueue_entity() will do the right thing.
-	 *
 	 * If it's queued, then the dequeue_entity(.flags=0) will already
-	 * have normalized the vruntime, if it's !queued, then only when
-	 * the task is sleeping it still has a non-normalized vruntime.
-	 *
+	 * have normalized the vruntime.
+	 */
+	if (queued)
+		return true;
+
+	/*
 	 * When !queued, vruntime of the task has usually NOT been normalized.
 	 * But there are some cases where it has already been normalized:
-	 *
-	 * - Moving a forked child which is waiting for being woken up by
+	 */
+
+	/*
+	 * - a forked child which is waiting for being woken up by
 	 *   wake_up_new_task().
-	 * - Moving a task which has been woken up by try_to_wake_up() and
+	 */
+	if (!se->sum_exec_runtime || p->state == TASK_WAKING)
+		return true;
+
+	/*
+	 * - a task which has been woken up by try_to_wake_up() and
 	 *   waiting for actually being woken up by sched_ttwu_pending().
+	 *
+	 * XXX this appears wrong!! check history, 
+	 * we appear to always set queued and RUNNING under the same lock instance
+	 * might be from before TASK_WAKING ?
 	 */
-	if (!queued && p->state != TASK_RUNNING &&
-	    se->sum_exec_runtime && p->state != TASK_WAKING) {
+	if (p->state == TASK_RUNNING)
+		return true;
+
+	return false;
+}
+
+static void detach_task_cfs_rq(struct task_struct *p)
+{
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	if (!vruntime_normalized(p)) {
 		/*
 		 * Fix up our vruntime so that the current sleep doesn't
 		 * cause 'unlimited' sleep bonus.
@@ -7946,10 +7970,8 @@ static void detach_task_cfs_rq(struct ta
 		se->vruntime -= cfs_rq->min_vruntime;
 	}
 
-#ifdef CONFIG_SMP
 	/* Catch up with the cfs_rq and remove our load when we leave */
 	detach_entity_load_avg(cfs_rq, se);
-#endif
 }
 
 static void attach_task_cfs_rq(struct task_struct *p, int queued)
@@ -7965,49 +7987,23 @@ static void attach_task_cfs_rq(struct ta
 	se->depth = se->parent ? se->parent->depth + 1 : 0;
 #endif
 
-#ifdef CONFIG_SMP
 	/* synchronize task with its cfs_rq */
 	attach_entity_load_avg(cfs_rq, se);
-#endif
 
-	/*
-	 * Ensure the task has a non-normalized vruntime when attaching it
-	 * to a cfs_rq, so that enqueue_entity() at wake-up time will do
-	 * the right thing.
-	 *
-	 * If it's queued, then the enqueue_entity(.flags=0) makes the task
-	 * has non-normalized vruntime, if it's !queued, then it still has
-	 * a normalized vruntime.
-	 *
-	 * When !queued, a task usually should have a non-normalized vruntime
-	 * after being attached to a cfs_rq. But there are some cases where
-	 * we should keep it normalized:
-	 *
-	 * - Moving a forked child which is waiting for being woken up by
-	 *   wake_up_new_task().
-	 * - Moving a task which has been woken up by try_to_wake_up() and
-	 *   waiting for actually being woken up by sched_ttwu_pending().
-	 */
-	if (!queued && p->state != TASK_RUNNING &&
-	    se->sum_exec_runtime && p->state != TASK_WAKING)
+	if (!vruntime_normalized(p))
 		se->vruntime += cfs_rq->min_vruntime;
 }
 
 static void switched_from_fair(struct rq *rq, struct task_struct *p)
 {
-	detach_task_cfs_rq(p, task_on_rq_queued(p));
+	detach_task_cfs_rq(p);
 }
 
-/*
- * We switched to the sched_fair class.
- */
 static void switched_to_fair(struct rq *rq, struct task_struct *p)
 {
-	int queued = task_on_rq_queued(p);
-
-	attach_task_cfs_rq(p, queued);
+	attach_task_cfs_rq(p);
 
-	if (queued) {
+	if (task_on_rq_queued(p)) {
 		/*
 		 * We were most likely switched from sched_rt, so
 		 * kick off the schedule if running, otherwise just see
@@ -8056,8 +8052,7 @@ static void task_move_group_fair(struct
 {
 	detach_task_cfs_rq(p, queued);
 	set_task_rq(p, task_cpu(p));
-
-	/* tell se's cfs_rq has been changed */
+	/* tell se's cfs_rq has been changed -- migrated */
 	p->se.avg.last_update_time = 0;
 	attach_task_cfs_rq(p, queued);
 }

next prev parent reply	other threads:[~2015-08-20  1:17 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-19  6:47 [PATCH v3 0/5] sync a se with its cfs_rq when att(det)aching it byungchul.park
2015-08-19  6:47 ` [PATCH v3 1/5] sched: add two functions adjusting cfs_rq's load when att(det)aching a se byungchul.park
2015-08-19  6:47 ` [PATCH v3 2/5] sched: make task_move_group_fair adjust cfs_rq's load in case of queued byungchul.park
2015-08-19  6:47 ` [PATCH v3 3/5] sched: sync a se with prev cfs_rq when changing cgroup byungchul.park
2015-08-19  6:47 ` [PATCH v3 4/5] sched: sync a se with its cfs_rq when switching sched class to fair class byungchul.park
2015-08-19 17:12   ` Peter Zijlstra
2015-08-19 20:11     ` Yuyang Du
2015-08-20  6:12       ` Byungchul Park
2015-08-20 21:13       ` Peter Zijlstra
2015-08-20 23:56         ` Byungchul Park
2015-08-20  5:23     ` Byungchul Park
2015-08-19  6:47 ` [PATCH v3 5/5] sched: add two functions for att(det)aching a task to(from) a cfs_rq byungchul.park
2015-08-19  6:59 ` [PATCH v3 0/5] sync a se with its cfs_rq when att(det)aching it Byungchul Park
2015-08-20  1:17 ` Peter Zijlstra [this message]
2015-08-20  5:26   ` Byungchul Park
2015-08-20  8:38   ` Byungchul Park
2015-08-20 10:46     ` Byungchul Park
2015-08-20 21:11       ` Peter Zijlstra
2015-08-21  0:01         ` Byungchul Park

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150820011721.GA3138@worktop.event.rightround.com \
    --to=peterz@infradead.org \
    --cc=byungchul.park@lge.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=yuyang.du@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.