public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Andrew Morton <akpm@osdl.org>
Cc: Erich Focht <efocht@hpce.nec.com>,
	nickpiggin@yahoo.com.au, mbligh@aracnet.com, ak@suse.de,
	jun.nakajima@intel.com, ricklind@us.ibm.com,
	linux-kernel@vger.kernel.org, kernel@kolivas.org,
	rusty@rustcorp.com.au, anton@samba.org,
	lse-tech@lists.sourceforge.net
Subject: [patch] new-context balancing, 2.6.5-rc3-mm1
Date: Tue, 30 Mar 2004 23:03:12 +0200	[thread overview]
Message-ID: <20040330210312.GA6706@elte.hu> (raw)
In-Reply-To: <20040330162514.GA2943@elte.hu>

[-- Attachment #1: Type: text/plain, Size: 384 bytes --]


i've attached sched-balance-context.patch, which is the current version
of fork()/clone() balancing, against 2.6.5-rc3-mm1.

Changes:

 - only balance CLONE_VM threads

 - take ->cpus_allowed into account when balancing.

i've checked kernel recompiles and while they didnt hurt from fork()
balancing on an 8-way SMP box, i implemented the thread-only balancing
nevertheless.

	Ingo

[-- Attachment #2: sched-balance-context.patch --]
[-- Type: text/plain, Size: 4796 bytes --]

--- linux/include/linux/sched.h.orig	
+++ linux/include/linux/sched.h	
@@ -715,12 +715,17 @@ extern void do_timer(struct pt_regs *);
 
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 #ifdef CONFIG_SMP
  extern void kick_process(struct task_struct *tsk);
+ extern void FASTCALL(wake_up_forked_thread(struct task_struct * tsk));
 #else
  static inline void kick_process(struct task_struct *tsk) { }
+ static inline void wake_up_forked_thread(struct task_struct * tsk)
+ {
+	return wake_up_forked_process(tsk);
+ }
 #endif
-extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 extern void FASTCALL(sched_fork(task_t * p));
 extern void FASTCALL(sched_exit(task_t * p));
 
--- linux/kernel/sched.c.orig	
+++ linux/kernel/sched.c	
@@ -1139,6 +1137,119 @@ enum idle_type
 };
 
 #ifdef CONFIG_SMP
+
+/*
+ * find_idlest_cpu - find the least busy runqueue.
+ */
+static int find_idlest_cpu(int this_cpu, runqueue_t *this_rq, cpumask_t mask)
+{
+	unsigned long load, min_load, this_load;
+	int i, min_cpu;
+	cpumask_t tmp;
+
+	min_cpu = UINT_MAX;
+	min_load = ULONG_MAX;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	for_each_cpu_mask(i, tmp) {
+		load = cpu_load(i);
+
+		if (load < min_load) {
+			min_cpu = i;
+			min_load = load;
+
+			/* break out early on an idle CPU: */
+			if (!min_load)
+				break;
+		}
+	}
+
+	/* add +1 to account for the new task */
+	this_load = cpu_load(this_cpu) + SCHED_LOAD_SCALE;
+
+	/*
+	 * Would with the addition of the new task to the
+	 * current CPU there be an imbalance between this
+	 * CPU and the idlest CPU?
+	 */
+	if (min_load*this_rq->sd->imbalance_pct < 100*this_load)
+		return min_cpu;
+
+	return this_cpu;
+}
+
+/*
+ * wake_up_forked_thread - wake up a freshly forked thread.
+ *
+ * This function will do some initial scheduler statistics housekeeping
+ * that must be done for every newly created context, and it also does
+ * runqueue balancing.
+ */
+void fastcall wake_up_forked_thread(task_t * p)
+{
+	unsigned long flags;
+	int this_cpu = get_cpu(), cpu;
+	runqueue_t *this_rq = cpu_rq(this_cpu), *rq;
+
+	/*
+	 * Migrate the new context to the least busy CPU,
+	 * if that CPU is out of balance.
+	 */
+	cpu = find_idlest_cpu(this_cpu, this_rq, p->cpus_allowed);
+
+	local_irq_save(flags);
+lock_again:
+	rq = cpu_rq(cpu);
+	double_rq_lock(this_rq, rq);
+
+	BUG_ON(p->state != TASK_RUNNING);
+
+	/*
+	 * We did find_idlest_cpu() unlocked, so in theory
+	 * the mask could have changed:
+	 */
+	if (!cpu_isset(cpu, p->cpus_allowed)) {
+		cpu = any_online_cpu(p->cpus_allowed);
+		double_rq_unlock(this_rq, rq);
+		goto lock_again;
+	}
+	/*
+	 * We decrease the sleep average of forking parents
+	 * and children as well, to keep max-interactive tasks
+	 * from forking tasks that are max-interactive.
+	 */
+	current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) *
+		PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
+
+	p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) *
+		CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
+
+	p->interactive_credit = 0;
+
+	p->prio = effective_prio(p);
+	set_task_cpu(p, cpu);
+
+	if (cpu == this_cpu) {
+		if (unlikely(!current->array))
+			__activate_task(p, rq);
+		else {
+			p->prio = current->prio;
+			list_add_tail(&p->run_list, &current->run_list);
+			p->array = current->array;
+			p->array->nr_active++;
+			rq->nr_running++;
+		}
+	} else {
+		__activate_task(p, rq);
+		if (TASK_PREEMPTS_CURR(p, rq))
+			resched_task(rq->curr);
+	}
+
+	double_rq_unlock(this_rq, rq);
+	local_irq_restore(flags);
+	put_cpu();
+}
+
 /*
  * If dest_cpu is allowed for this process, migrate the task to it.
  * This is accomplished by forcing the cpu_allowed mask to only
--- linux/kernel/fork.c.orig	
+++ linux/kernel/fork.c	
@@ -1179,9 +1179,23 @@ long do_fork(unsigned long clone_flags,
 			set_tsk_thread_flag(p, TIF_SIGPENDING);
 		}
 
-		if (!(clone_flags & CLONE_STOPPED))
-			wake_up_forked_process(p);	/* do this last */
-		else
+		if (!(clone_flags & CLONE_STOPPED)) {
+			/*
+			 * Do the wakeup last. On SMP we treat fork() and
+			 * CLONE_VM separately, because fork() has already
+			 * created cache footprint on this CPU (due to
+			 * copying the pagetables), hence migration would
+			 * probably be costy. Threads on the other hand
+			 * have less traction to the current CPU, and if
+			 * there's an imbalance then the scheduler can
+			 * migrate this fresh thread now, before it
+			 * accumulates a larger cache footprint:
+			 */
+			if (clone_flags & CLONE_VM)
+				wake_up_forked_thread(p);
+			else
+				wake_up_forked_process(p);
+		} else
 			p->state = TASK_STOPPED;
 		++total_forks;
 

  parent reply	other threads:[~2004-03-30 21:02 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-03-25 15:31 [Lse-tech] [patch] sched-domain cleanups, sched-2.6.5-rc2-mm2-A3 Nakajima, Jun
2004-03-25 15:40 ` Andi Kleen
2004-03-25 19:09   ` Ingo Molnar
2004-03-25 15:21     ` Andi Kleen
2004-03-25 19:39       ` Ingo Molnar
2004-03-25 20:30         ` Ingo Molnar
2004-03-29  8:45           ` Andi Kleen
2004-03-29 10:20             ` Rick Lindsley
2004-03-29  5:07               ` Andi Kleen
2004-03-29 11:28               ` Nick Piggin
2004-03-29 17:30                 ` Rick Lindsley
2004-03-30  0:01                   ` Nick Piggin
2004-03-30  1:26                     ` Rick Lindsley
2004-03-29 11:20             ` Nick Piggin
2004-03-29  6:01               ` Andi Kleen
2004-03-29 11:46                 ` Ingo Molnar
2004-03-29  7:03                   ` Andi Kleen
2004-03-29  7:10                     ` Andi Kleen
2004-03-29 20:14                   ` Andi Kleen
2004-03-29 23:51                     ` Nick Piggin
2004-03-30  6:34                       ` Andi Kleen
2004-03-30  6:40                         ` Ingo Molnar
2004-03-30  7:07                           ` Andi Kleen
2004-03-30  7:14                             ` Nick Piggin
2004-03-30  7:45                               ` Ingo Molnar
2004-03-30  7:58                                 ` Nick Piggin
2004-03-30  7:15                             ` Ingo Molnar
2004-03-30  7:18                               ` Nick Piggin
2004-03-30  7:48                               ` Andi Kleen
2004-03-30  8:18                                 ` Ingo Molnar
2004-03-30  9:36                                   ` Andi Kleen
2004-03-30  7:42                             ` Ingo Molnar
2004-03-30  7:03                         ` Nick Piggin
2004-03-30  7:13                           ` Andi Kleen
2004-03-30  7:24                             ` Nick Piggin
2004-03-30  7:38                             ` Arjan van de Ven
2004-03-30  7:13                           ` Martin J. Bligh
2004-03-30  7:31                             ` Nick Piggin
2004-03-30  7:38                               ` Martin J. Bligh
2004-03-30  8:05                               ` Ingo Molnar
2004-03-30  8:19                                 ` Nick Piggin
2004-03-30  8:45                                   ` Ingo Molnar
2004-03-30  8:53                                     ` Nick Piggin
2004-03-30 15:27                                       ` Martin J. Bligh
2004-03-25 19:24     ` Martin J. Bligh
2004-03-25 21:48       ` Ingo Molnar
2004-03-25 22:28         ` Martin J. Bligh
2004-03-29 22:30           ` Erich Focht
2004-03-30  9:05             ` Nick Piggin
2004-03-30 10:04               ` Erich Focht
2004-03-30 10:58                 ` Andi Kleen
2004-03-30 16:03                   ` [patch] sched-2.6.5-rc3-mm1-A0 Ingo Molnar
2004-03-31  2:30                     ` Nick Piggin
2004-03-30 11:02                 ` [Lse-tech] [patch] sched-domain cleanups, sched-2.6.5-rc2-mm2-A3 Andrew Morton
     [not found]                   ` <20040330161438.GA2257@elte.hu>
     [not found]                     ` <20040330161910.GA2860@elte.hu>
     [not found]                       ` <20040330162514.GA2943@elte.hu>
2004-03-30 21:03                         ` Ingo Molnar [this message]
2004-03-31  2:30                           ` [patch] new-context balancing, 2.6.5-rc3-mm1 Nick Piggin
2004-03-31 18:59                   ` [Lse-tech] [patch] sched-domain cleanups, sched-2.6.5-rc2-mm2-A3 Erich Focht
2004-03-31  2:08                 ` Nick Piggin
2004-03-31 22:23                   ` Erich Focht
2004-03-30 15:01             ` Martin J. Bligh
2004-03-31 21:23               ` Erich Focht
2004-03-31 21:33                 ` Martin J. Bligh
2004-03-25 21:59   ` Ingo Molnar
2004-03-25 22:26     ` Rick Lindsley
2004-03-25 22:30     ` Andrew Theurer
2004-03-25 22:38       ` Martin J. Bligh
2004-03-26  1:29       ` Andi Kleen
2004-03-26  3:23   ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040330210312.GA6706@elte.hu \
    --to=mingo@elte.hu \
    --cc=ak@suse.de \
    --cc=akpm@osdl.org \
    --cc=anton@samba.org \
    --cc=efocht@hpce.nec.com \
    --cc=jun.nakajima@intel.com \
    --cc=kernel@kolivas.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lse-tech@lists.sourceforge.net \
    --cc=mbligh@aracnet.com \
    --cc=nickpiggin@yahoo.com.au \
    --cc=ricklind@us.ibm.com \
    --cc=rusty@rustcorp.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox