All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Andrew Morton <akpm@osdl.org>
Cc: Erich Focht <efocht@hpce.nec.com>,
	nickpiggin@yahoo.com.au, mbligh@aracnet.com, ak@suse.de,
	jun.nakajima@intel.com, ricklind@us.ibm.com,
	linux-kernel@vger.kernel.org, kernel@kolivas.org,
	rusty@rustcorp.com.au, anton@samba.org,
	lse-tech@lists.sourceforge.net
Subject: [patch] new-context balancing, 2.6.5-rc3-mm1
Date: Tue, 30 Mar 2004 23:03:12 +0200	[thread overview]
Message-ID: <20040330210312.GA6706@elte.hu> (raw)
In-Reply-To: <20040330162514.GA2943@elte.hu>

[-- Attachment #1: Type: text/plain, Size: 384 bytes --]


i've attached sched-balance-context.patch, which is the current version
of fork()/clone() balancing, against 2.6.5-rc3-mm1.

Changes:

 - only balance CLONE_VM threads

 - take ->cpus_allowed into account when balancing.

i've checked kernel recompiles and while they didnt hurt from fork()
balancing on an 8-way SMP box, i implemented the thread-only balancing
nevertheless.

	Ingo

[-- Attachment #2: sched-balance-context.patch --]
[-- Type: text/plain, Size: 4796 bytes --]

--- linux/include/linux/sched.h.orig	
+++ linux/include/linux/sched.h	
@@ -715,12 +715,17 @@ extern void do_timer(struct pt_regs *);
 
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 #ifdef CONFIG_SMP
  extern void kick_process(struct task_struct *tsk);
+ extern void FASTCALL(wake_up_forked_thread(struct task_struct * tsk));
 #else
  static inline void kick_process(struct task_struct *tsk) { }
+ static inline void wake_up_forked_thread(struct task_struct * tsk)
+ {
+	return wake_up_forked_process(tsk);
+ }
 #endif
-extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 extern void FASTCALL(sched_fork(task_t * p));
 extern void FASTCALL(sched_exit(task_t * p));
 
--- linux/kernel/sched.c.orig	
+++ linux/kernel/sched.c	
@@ -1139,6 +1137,119 @@ enum idle_type
 };
 
 #ifdef CONFIG_SMP
+
+/*
+ * find_idlest_cpu - find the least busy runqueue.
+ */
+static int find_idlest_cpu(int this_cpu, runqueue_t *this_rq, cpumask_t mask)
+{
+	unsigned long load, min_load, this_load;
+	int i, min_cpu;
+	cpumask_t tmp;
+
+	min_cpu = UINT_MAX;
+	min_load = ULONG_MAX;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	for_each_cpu_mask(i, tmp) {
+		load = cpu_load(i);
+
+		if (load < min_load) {
+			min_cpu = i;
+			min_load = load;
+
+			/* break out early on an idle CPU: */
+			if (!min_load)
+				break;
+		}
+	}
+
+	/* add +1 to account for the new task */
+	this_load = cpu_load(this_cpu) + SCHED_LOAD_SCALE;
+
+	/*
+	 * Would with the addition of the new task to the
+	 * current CPU there be an imbalance between this
+	 * CPU and the idlest CPU?
+	 */
+	if (min_load*this_rq->sd->imbalance_pct < 100*this_load)
+		return min_cpu;
+
+	return this_cpu;
+}
+
+/*
+ * wake_up_forked_thread - wake up a freshly forked thread.
+ *
+ * This function will do some initial scheduler statistics housekeeping
+ * that must be done for every newly created context, and it also does
+ * runqueue balancing.
+ */
+void fastcall wake_up_forked_thread(task_t * p)
+{
+	unsigned long flags;
+	int this_cpu = get_cpu(), cpu;
+	runqueue_t *this_rq = cpu_rq(this_cpu), *rq;
+
+	/*
+	 * Migrate the new context to the least busy CPU,
+	 * if that CPU is out of balance.
+	 */
+	cpu = find_idlest_cpu(this_cpu, this_rq, p->cpus_allowed);
+
+	local_irq_save(flags);
+lock_again:
+	rq = cpu_rq(cpu);
+	double_rq_lock(this_rq, rq);
+
+	BUG_ON(p->state != TASK_RUNNING);
+
+	/*
+	 * We did find_idlest_cpu() unlocked, so in theory
+	 * the mask could have changed:
+	 */
+	if (!cpu_isset(cpu, p->cpus_allowed)) {
+		cpu = any_online_cpu(p->cpus_allowed);
+		double_rq_unlock(this_rq, rq);
+		goto lock_again;
+	}
+	/*
+	 * We decrease the sleep average of forking parents
+	 * and children as well, to keep max-interactive tasks
+	 * from forking tasks that are max-interactive.
+	 */
+	current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) *
+		PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
+
+	p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) *
+		CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
+
+	p->interactive_credit = 0;
+
+	p->prio = effective_prio(p);
+	set_task_cpu(p, cpu);
+
+	if (cpu == this_cpu) {
+		if (unlikely(!current->array))
+			__activate_task(p, rq);
+		else {
+			p->prio = current->prio;
+			list_add_tail(&p->run_list, &current->run_list);
+			p->array = current->array;
+			p->array->nr_active++;
+			rq->nr_running++;
+		}
+	} else {
+		__activate_task(p, rq);
+		if (TASK_PREEMPTS_CURR(p, rq))
+			resched_task(rq->curr);
+	}
+
+	double_rq_unlock(this_rq, rq);
+	local_irq_restore(flags);
+	put_cpu();
+}
+
 /*
  * If dest_cpu is allowed for this process, migrate the task to it.
  * This is accomplished by forcing the cpu_allowed mask to only
--- linux/kernel/fork.c.orig	
+++ linux/kernel/fork.c	
@@ -1179,9 +1179,23 @@ long do_fork(unsigned long clone_flags,
 			set_tsk_thread_flag(p, TIF_SIGPENDING);
 		}
 
-		if (!(clone_flags & CLONE_STOPPED))
-			wake_up_forked_process(p);	/* do this last */
-		else
+		if (!(clone_flags & CLONE_STOPPED)) {
+			/*
+			 * Do the wakeup last. On SMP we treat fork() and
+			 * CLONE_VM separately, because fork() has already
+			 * created cache footprint on this CPU (due to
+			 * copying the pagetables), hence migration would
+			 * probably be costy. Threads on the other hand
+			 * have less traction to the current CPU, and if
+			 * there's an imbalance then the scheduler can
+			 * migrate this fresh thread now, before it
+			 * accumulates a larger cache footprint:
+			 */
+			if (clone_flags & CLONE_VM)
+				wake_up_forked_thread(p);
+			else
+				wake_up_forked_process(p);
+		} else
 			p->state = TASK_STOPPED;
 		++total_forks;
 

  parent reply	other threads:[~2004-03-30 21:02 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-03-25 15:31 [Lse-tech] [patch] sched-domain cleanups, sched-2.6.5-rc2-mm2-A3 Nakajima, Jun
2004-03-25 15:40 ` Andi Kleen
2004-03-25 19:09   ` Ingo Molnar
2004-03-25 15:21     ` Andi Kleen
2004-03-25 19:39       ` Ingo Molnar
2004-03-25 20:30         ` Ingo Molnar
2004-03-29  8:45           ` Andi Kleen
2004-03-29 10:20             ` Rick Lindsley
2004-03-29  5:07               ` Andi Kleen
2004-03-29 11:28               ` Nick Piggin
2004-03-29 17:30                 ` Rick Lindsley
2004-03-30  0:01                   ` Nick Piggin
2004-03-30  1:26                     ` Rick Lindsley
2004-03-29 11:20             ` Nick Piggin
2004-03-29  6:01               ` Andi Kleen
2004-03-29 11:46                 ` Ingo Molnar
2004-03-29  7:03                   ` Andi Kleen
2004-03-29  7:10                     ` Andi Kleen
2004-03-29 20:14                   ` Andi Kleen
2004-03-29 23:51                     ` Nick Piggin
2004-03-30  6:34                       ` Andi Kleen
2004-03-30  6:40                         ` Ingo Molnar
2004-03-30  7:07                           ` Andi Kleen
2004-03-30  7:14                             ` Nick Piggin
2004-03-30  7:45                               ` Ingo Molnar
2004-03-30  7:58                                 ` Nick Piggin
2004-03-30  7:15                             ` Ingo Molnar
2004-03-30  7:18                               ` Nick Piggin
2004-03-30  7:48                               ` Andi Kleen
2004-03-30  8:18                                 ` Ingo Molnar
2004-03-30  9:36                                   ` Andi Kleen
2004-03-30  7:42                             ` Ingo Molnar
2004-03-30  7:03                         ` Nick Piggin
2004-03-30  7:13                           ` Andi Kleen
2004-03-30  7:24                             ` Nick Piggin
2004-03-30  7:38                             ` Arjan van de Ven
2004-03-30  7:13                           ` Martin J. Bligh
2004-03-30  7:31                             ` Nick Piggin
2004-03-30  7:38                               ` Martin J. Bligh
2004-03-30  8:05                               ` Ingo Molnar
2004-03-30  8:19                                 ` Nick Piggin
2004-03-30  8:45                                   ` Ingo Molnar
2004-03-30  8:53                                     ` Nick Piggin
2004-03-30 15:27                                       ` Martin J. Bligh
2004-03-25 19:24     ` Martin J. Bligh
2004-03-25 21:48       ` Ingo Molnar
2004-03-25 22:28         ` Martin J. Bligh
2004-03-29 22:30           ` Erich Focht
2004-03-30  9:05             ` Nick Piggin
2004-03-30 10:04               ` Erich Focht
2004-03-30 10:58                 ` Andi Kleen
2004-03-30 16:03                   ` [patch] sched-2.6.5-rc3-mm1-A0 Ingo Molnar
2004-03-31  2:30                     ` Nick Piggin
2004-03-30 11:02                 ` [Lse-tech] [patch] sched-domain cleanups, sched-2.6.5-rc2-mm2-A3 Andrew Morton
     [not found]                   ` <20040330161438.GA2257@elte.hu>
     [not found]                     ` <20040330161910.GA2860@elte.hu>
     [not found]                       ` <20040330162514.GA2943@elte.hu>
2004-03-30 21:03                         ` Ingo Molnar [this message]
2004-03-31  2:30                           ` [patch] new-context balancing, 2.6.5-rc3-mm1 Nick Piggin
2004-03-31 18:59                   ` [Lse-tech] [patch] sched-domain cleanups, sched-2.6.5-rc2-mm2-A3 Erich Focht
2004-03-31  2:08                 ` Nick Piggin
2004-03-31 22:23                   ` Erich Focht
2004-03-30 15:01             ` Martin J. Bligh
2004-03-31 21:23               ` Erich Focht
2004-03-31 21:33                 ` Martin J. Bligh
2004-03-25 21:59   ` Ingo Molnar
2004-03-25 22:26     ` Rick Lindsley
2004-03-25 22:30     ` Andrew Theurer
2004-03-25 22:38       ` Martin J. Bligh
2004-03-26  1:29       ` Andi Kleen
2004-03-26  3:23   ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040330210312.GA6706@elte.hu \
    --to=mingo@elte.hu \
    --cc=ak@suse.de \
    --cc=akpm@osdl.org \
    --cc=anton@samba.org \
    --cc=efocht@hpce.nec.com \
    --cc=jun.nakajima@intel.com \
    --cc=kernel@kolivas.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lse-tech@lists.sourceforge.net \
    --cc=mbligh@aracnet.com \
    --cc=nickpiggin@yahoo.com.au \
    --cc=ricklind@us.ibm.com \
    --cc=rusty@rustcorp.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.