From: Erich Focht <efocht@hpce.nec.com>
To: "Martin J. Bligh" <mbligh@aracnet.com>,
linux-kernel <linux-kernel@vger.kernel.org>,
LSE <lse-tech@lists.sourceforge.net>
Cc: Andi Kleen <ak@muc.de>
Subject: Re: [patch] scheduler fix for 1cpu/node case
Date: Thu, 31 Jul 2003 23:45:36 +0200 [thread overview]
Message-ID: <200307312345.36368.efocht@hpce.nec.com> (raw)
In-Reply-To: <59140000.1059663916@[10.10.2.4]>
[-- Attachment #1: Type: text/plain, Size: 248 bytes --]
On Thursday 31 July 2003 17:05, Martin J. Bligh wrote:
> you're using node_to_cpu_mask for ia64 ... others were using
> node_to_cpumask (1 less "_"), so this doesn't build ...
Ooops, you're right, of course. Sorry about this mistake :-(
Erich
[-- Attachment #2: 1cpufix-lb2-2.6.0t1.patch --]
[-- Type: text/x-diff, Size: 4253 bytes --]
diff -urNp 2.6.0t1/kernel/sched.c 2.6.0t1-1cpufix/kernel/sched.c
--- 2.6.0t1/kernel/sched.c 2003-07-14 05:37:14.000000000 +0200
+++ 2.6.0t1-1cpufix/kernel/sched.c 2003-07-31 20:46:30.000000000 +0200
@@ -164,6 +164,7 @@ struct runqueue {
prio_array_t *active, *expired, arrays[2];
int prev_cpu_load[NR_CPUS];
#ifdef CONFIG_NUMA
+ unsigned int nr_lb_failed;
atomic_t *node_nr_running;
int prev_node_load[MAX_NUMNODES];
#endif
@@ -856,6 +857,35 @@ static int find_busiest_node(int this_no
return node;
}
+/*
+ * Decide whether the scheduler should balance locally (inside the same node)
+ * or globally depending on the number of failed local balance attempts.
+ * The number of failed local balance attempts depends on the number of cpus
+ * in the current node. In case it's just one, go immediately for global
+ * balancing. On a busy cpu the number of retries is smaller.
+ */
+static inline unsigned long cpus_to_balance(int this_cpu, runqueue_t *this_rq)
+{
+ int node, retries, this_node = cpu_to_node(this_cpu);
+
+ retries = nr_cpus_node(this_node) - 1;
+ if (this_rq->curr != this_rq->idle)
+ retries >>= 1;
+ if (this_rq->nr_lb_failed >= retries) {
+ node = find_busiest_node(this_node);
+ this_rq->nr_lb_failed = 0;
+ if (node >= 0)
+ return (node_to_cpumask(node) | (1UL << this_cpu));
+ }
+ return node_to_cpumask(this_node);
+}
+
+#else /* !CONFIG_NUMA */
+
+static inline unsigned long cpus_to_balance(int this_cpu, runqueue_t *this_rq)
+{
+ return cpu_online_map;
+}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_SMP
@@ -960,6 +990,12 @@ static inline runqueue_t *find_busiest_q
busiest = NULL;
}
out:
+#ifdef CONFIG_NUMA
+ if (!busiest)
+ this_rq->nr_lb_failed++;
+ else
+ this_rq->nr_lb_failed = 0;
+#endif
return busiest;
}
@@ -995,7 +1031,7 @@ static inline void pull_task(runqueue_t
* We call this with the current runqueue locked,
* irqs disabled.
*/
-static void load_balance(runqueue_t *this_rq, int idle, unsigned long cpumask)
+static void load_balance(runqueue_t *this_rq, int idle)
{
int imbalance, idx, this_cpu = smp_processor_id();
runqueue_t *busiest;
@@ -1003,7 +1039,8 @@ static void load_balance(runqueue_t *thi
struct list_head *head, *curr;
task_t *tmp;
- busiest = find_busiest_queue(this_rq, this_cpu, idle, &imbalance, cpumask);
+ busiest = find_busiest_queue(this_rq, this_cpu, idle, &imbalance,
+ cpus_to_balance(this_cpu, this_rq));
if (!busiest)
goto out;
@@ -1085,29 +1122,9 @@ out:
*/
#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
#define BUSY_REBALANCE_TICK (HZ/5 ?: 1)
-#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 5)
-#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2)
-
-#ifdef CONFIG_NUMA
-static void balance_node(runqueue_t *this_rq, int idle, int this_cpu)
-{
- int node = find_busiest_node(cpu_to_node(this_cpu));
- unsigned long cpumask, this_cpumask = 1UL << this_cpu;
-
- if (node >= 0) {
- cpumask = node_to_cpumask(node) | this_cpumask;
- spin_lock(&this_rq->lock);
- load_balance(this_rq, idle, cpumask);
- spin_unlock(&this_rq->lock);
- }
-}
-#endif
static void rebalance_tick(runqueue_t *this_rq, int idle)
{
-#ifdef CONFIG_NUMA
- int this_cpu = smp_processor_id();
-#endif
unsigned long j = jiffies;
/*
@@ -1119,24 +1136,16 @@ static void rebalance_tick(runqueue_t *t
* are not balanced.)
*/
if (idle) {
-#ifdef CONFIG_NUMA
- if (!(j % IDLE_NODE_REBALANCE_TICK))
- balance_node(this_rq, idle, this_cpu);
-#endif
if (!(j % IDLE_REBALANCE_TICK)) {
spin_lock(&this_rq->lock);
- load_balance(this_rq, idle, cpu_to_node_mask(this_cpu));
+ load_balance(this_rq, idle);
spin_unlock(&this_rq->lock);
}
return;
}
-#ifdef CONFIG_NUMA
- if (!(j % BUSY_NODE_REBALANCE_TICK))
- balance_node(this_rq, idle, this_cpu);
-#endif
if (!(j % BUSY_REBALANCE_TICK)) {
spin_lock(&this_rq->lock);
- load_balance(this_rq, idle, cpu_to_node_mask(this_cpu));
+ load_balance(this_rq, idle);
spin_unlock(&this_rq->lock);
}
}
@@ -1306,7 +1315,7 @@ need_resched:
pick_next_task:
if (unlikely(!rq->nr_running)) {
#ifdef CONFIG_SMP
- load_balance(rq, 1, cpu_to_node_mask(smp_processor_id()));
+ load_balance(rq, 1);
if (rq->nr_running)
goto pick_next_task;
#endif
next prev parent reply other threads:[~2003-07-31 21:45 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-07-28 19:16 [patch] scheduler fix for 1cpu/node case Erich Focht
2003-07-28 19:55 ` Martin J. Bligh
2003-07-28 20:18 ` Erich Focht
2003-07-28 20:37 ` Martin J. Bligh
2003-07-29 2:24 ` Andrew Theurer
2003-07-29 10:08 ` Erich Focht
2003-07-29 13:33 ` [Lse-tech] " Andrew Theurer
2003-07-30 15:23 ` Erich Focht
2003-07-30 15:44 ` Andrew Theurer
2003-07-29 14:27 ` Martin J. Bligh
2003-08-13 20:49 ` Bill Davidsen
2003-08-22 15:46 ` [Lse-tech] " Andrew Theurer
2003-08-22 22:56 ` Nick Piggin
2003-08-23 0:12 ` Andrew Theurer
2003-08-23 0:29 ` Nick Piggin
2003-08-23 0:47 ` William Lee Irwin III
2003-08-23 8:48 ` Nick Piggin
2003-08-23 14:32 ` Andrew Theurer
2003-08-23 1:31 ` Martin J. Bligh
2003-07-29 10:08 ` Erich Focht
2003-07-29 14:41 ` Andi Kleen
2003-07-31 15:05 ` Martin J. Bligh
2003-07-31 21:45 ` Erich Focht [this message]
2003-08-01 0:26 ` Martin J. Bligh
2003-08-01 16:30 ` [Lse-tech] " Erich Focht
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200307312345.36368.efocht@hpce.nec.com \
--to=efocht@hpce.nec.com \
--cc=ak@muc.de \
--cc=linux-kernel@vger.kernel.org \
--cc=lse-tech@lists.sourceforge.net \
--cc=mbligh@aracnet.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.