All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nick Piggin <nickpiggin@yahoo.com.au>
To: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Cc: Con Kolivas <kernel@kolivas.org>,
	linux-kernel@vger.kernel.org, "'Chris Mason'" <mason@suse.com>,
	Ingo Molnar <mingo@elte.hu>
Subject: Re: [PATCH RFC] smt nice introduces significant lock contention
Date: Fri, 02 Jun 2006 18:56:21 +1000	[thread overview]
Message-ID: <447FFD35.9020909@yahoo.com.au> (raw)
In-Reply-To: <000201c6861f$6a2d4e20$0b4ce984@amr.corp.intel.com>

[-- Attachment #1: Type: text/plain, Size: 477 bytes --]

Chen, Kenneth W wrote:

> Ha, you beat me by one minute. It did cross my mind to use try lock there as
> well, take a look at my version, I think I have a better inner loop.

Actually you *have* to use trylocks I think, because the current runqueue
is already locked.

And why do we lock all siblings in the other case, for that matter? (not
that it makes much difference except on niagara today).

Rolled up patch with everyone's changes attached.

-- 
SUSE Labs, Novell Inc.

[-- Attachment #2: sched-smt.patch --]
[-- Type: text/plain, Size: 6272 bytes --]

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c	2006-06-02 18:51:25.000000000 +1000
+++ linux-2.6/kernel/sched.c	2006-06-02 18:54:38.000000000 +1000
@@ -239,7 +239,6 @@ struct runqueue {
 
 	task_t *migration_thread;
 	struct list_head migration_queue;
-	int cpu;
 #endif
 
 #ifdef CONFIG_SCHEDSTATS
@@ -1687,7 +1686,7 @@ unsigned long nr_active(void)
  * double_rq_lock - safely lock two runqueues
  *
  * We must take them in cpu order to match code in
- * dependent_sleeper and wake_dependent_sleeper.
+ * wake_dependent_sleeper.
  *
  * Note this does not disable interrupts like task_rq_lock,
  * you need to do so manually before calling.
@@ -1700,7 +1699,7 @@ static void double_rq_lock(runqueue_t *r
 		spin_lock(&rq1->lock);
 		__acquire(rq2->lock);	/* Fake it out ;) */
 	} else {
-		if (rq1->cpu < rq2->cpu) {
+		if (rq1 < rq2) {
 			spin_lock(&rq1->lock);
 			spin_lock(&rq2->lock);
 		} else {
@@ -1736,7 +1735,7 @@ static void double_lock_balance(runqueue
 	__acquires(this_rq->lock)
 {
 	if (unlikely(!spin_trylock(&busiest->lock))) {
-		if (busiest->cpu < this_rq->cpu) {
+		if (busiest < this_rq) {
 			spin_unlock(&this_rq->lock);
 			spin_lock(&busiest->lock);
 			spin_lock(&this_rq->lock);
@@ -2686,6 +2685,9 @@ static inline void wakeup_busy_runqueue(
 		resched_task(rq->idle);
 }
 
+/*
+ * Called with interrupts disabled and this_rq's runqueue locked.
+ */
 static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
 {
 	struct sched_domain *tmp, *sd = NULL;
@@ -2693,41 +2695,24 @@ static void wake_sleeping_dependent(int 
 	int i;
 
 	for_each_domain(this_cpu, tmp)
-		if (tmp->flags & SD_SHARE_CPUPOWER)
+		if (tmp->flags & SD_SHARE_CPUPOWER) {
 			sd = tmp;
-
+			break;
+		}
 	if (!sd)
 		return;
 
-	/*
-	 * Unlock the current runqueue because we have to lock in
-	 * CPU order to avoid deadlocks. Caller knows that we might
-	 * unlock. We keep IRQs disabled.
-	 */
-	spin_unlock(&this_rq->lock);
-
 	sibling_map = sd->span;
-
-	for_each_cpu_mask(i, sibling_map)
-		spin_lock(&cpu_rq(i)->lock);
-	/*
-	 * We clear this CPU from the mask. This both simplifies the
-	 * inner loop and keps this_rq locked when we exit:
-	 */
 	cpu_clear(this_cpu, sibling_map);
-
 	for_each_cpu_mask(i, sibling_map) {
 		runqueue_t *smt_rq = cpu_rq(i);
+		if (unlikely(!spin_trylock(&smt_rq->lock)))
+			continue;
 
 		wakeup_busy_runqueue(smt_rq);
-	}
 
-	for_each_cpu_mask(i, sibling_map)
-		spin_unlock(&cpu_rq(i)->lock);
-	/*
-	 * We exit with this_cpu's rq still held and IRQs
-	 * still disabled:
-	 */
+		spin_unlock(&smt_rq->lock);
+	}
 }
 
 /*
@@ -2740,48 +2725,38 @@ static inline unsigned long smt_slice(ta
 	return p->time_slice * (100 - sd->per_cpu_gain) / 100;
 }
 
-static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+/*
+ * To minimise lock contention and not have to drop this_rq's runlock we only
+ * trylock the sibling runqueues and bypass those runqueues if we fail to
+ * acquire their lock. As we only trylock the normal locking order does not
+ * need to be obeyed.
+ */
+static int dependent_sleeper(int this_cpu, struct runqueue *this_rq,
+	struct task_struct *p)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	cpumask_t sibling_map;
-	prio_array_t *array;
 	int ret = 0, i;
-	task_t *p;
 
 	for_each_domain(this_cpu, tmp)
-		if (tmp->flags & SD_SHARE_CPUPOWER)
+		if (tmp->flags & SD_SHARE_CPUPOWER) {
 			sd = tmp;
-
+			break;
+		}
 	if (!sd)
 		return 0;
 
-	/*
-	 * The same locking rules and details apply as for
-	 * wake_sleeping_dependent():
-	 */
-	spin_unlock(&this_rq->lock);
 	sibling_map = sd->span;
-	for_each_cpu_mask(i, sibling_map)
-		spin_lock(&cpu_rq(i)->lock);
 	cpu_clear(this_cpu, sibling_map);
+	for_each_cpu_mask(i, sibling_map) {
+		runqueue_t *smt_rq;
+		task_t *smt_curr;
 
-	/*
-	 * Establish next task to be run - it might have gone away because
-	 * we released the runqueue lock above:
-	 */
-	if (!this_rq->nr_running)
-		goto out_unlock;
-	array = this_rq->active;
-	if (!array->nr_active)
-		array = this_rq->expired;
-	BUG_ON(!array->nr_active);
-
-	p = list_entry(array->queue[sched_find_first_bit(array->bitmap)].next,
-		task_t, run_list);
+		smt_rq = cpu_rq(i);
+		if (unlikely(!spin_trylock(&smt_rq->lock)))
+			continue;
 
-	for_each_cpu_mask(i, sibling_map) {
-		runqueue_t *smt_rq = cpu_rq(i);
-		task_t *smt_curr = smt_rq->curr;
+		smt_curr = smt_rq->curr;
 
 		/* Kernel threads do not participate in dependent sleeping */
 		if (!p->mm || !smt_curr->mm || rt_task(p))
@@ -2834,10 +2809,10 @@ check_smt_task:
 			else
 				wakeup_busy_runqueue(smt_rq);
 		}
+
+		spin_unlock(&smt_rq->lock);
 	}
-out_unlock:
-	for_each_cpu_mask(i, sibling_map)
-		spin_unlock(&cpu_rq(i)->lock);
+
 	return ret;
 }
 #else
@@ -2845,7 +2820,8 @@ static inline void wake_sleeping_depende
 {
 }
 
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static inline int dependent_sleeper(int this_cpu, struct runqueue *this_rq,
+	struct task_struct *p)
 {
 	return 0;
 }
@@ -2967,32 +2943,13 @@ need_resched_nonpreemptible:
 
 	cpu = smp_processor_id();
 	if (unlikely(!rq->nr_running)) {
-go_idle:
 		idle_balance(cpu, rq);
 		if (!rq->nr_running) {
 			next = rq->idle;
 			rq->expired_timestamp = 0;
 			wake_sleeping_dependent(cpu, rq);
-			/*
-			 * wake_sleeping_dependent() might have released
-			 * the runqueue, so break out if we got new
-			 * tasks meanwhile:
-			 */
-			if (!rq->nr_running)
-				goto switch_tasks;
-		}
-	} else {
-		if (dependent_sleeper(cpu, rq)) {
-			next = rq->idle;
 			goto switch_tasks;
 		}
-		/*
-		 * dependent_sleeper() releases and reacquires the runqueue
-		 * lock, hence go into the idle loop if the rq went
-		 * empty meanwhile:
-		 */
-		if (unlikely(!rq->nr_running))
-			goto go_idle;
 	}
 
 	array = rq->active;
@@ -3030,6 +2987,8 @@ go_idle:
 		}
 	}
 	next->sleep_type = SLEEP_NORMAL;
+	if (dependent_sleeper(cpu, rq, next))
+		next = rq->idle;
 switch_tasks:
 	if (next == rq->idle)
 		schedstat_inc(rq, sched_goidle);
@@ -6126,7 +6085,6 @@ void __init sched_init(void)
 		rq->push_cpu = 0;
 		rq->migration_thread = NULL;
 		INIT_LIST_HEAD(&rq->migration_queue);
-		rq->cpu = i;
 #endif
 		atomic_set(&rq->nr_iowait, 0);
 

  reply	other threads:[~2006-06-02  8:56 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-06-01 22:55 [PATCH RFC] smt nice introduces significant lock contention Chris Mason
2006-06-01 23:57 ` Chen, Kenneth W
2006-06-02  1:59   ` Con Kolivas
2006-06-02  2:28     ` Con Kolivas
2006-06-02  3:55       ` Con Kolivas
2006-06-02  4:18         ` Nick Piggin
2006-06-02  6:08           ` Con Kolivas
2006-06-02  7:53             ` Nick Piggin
2006-06-02  8:17               ` Con Kolivas
2006-06-02  8:28                 ` Nick Piggin
2006-06-02  8:34                   ` Chen, Kenneth W
2006-06-02  8:56                     ` Nick Piggin [this message]
2006-06-02  9:17                       ` Chen, Kenneth W
2006-06-02  9:25                         ` Con Kolivas
2006-06-02  9:31                           ` Chen, Kenneth W
2006-06-02  9:34                             ` Con Kolivas
2006-06-02  9:53                               ` Chen, Kenneth W
2006-06-02 10:12                                 ` Con Kolivas
2006-06-02 20:53                                   ` Chen, Kenneth W
2006-06-02 22:15                                     ` Con Kolivas
2006-06-02 22:19                                       ` Chen, Kenneth W
2006-06-02 22:31                                         ` Con Kolivas
2006-06-02 22:58                                           ` Chen, Kenneth W
2006-06-03  0:02                                             ` Con Kolivas
2006-06-03  0:08                                               ` Chen, Kenneth W
2006-06-03  0:27                                                 ` Con Kolivas
2006-06-02  9:36                       ` Chen, Kenneth W
2006-06-02 10:30                       ` Con Kolivas
2006-06-02 13:16                         ` Con Kolivas
2006-06-02 21:54                           ` Chen, Kenneth W
2006-06-02 22:04                             ` Con Kolivas
2006-06-02 22:14                       ` Chen, Kenneth W
2006-06-02 10:19                   ` Con Kolivas
2006-06-02 20:59                     ` Chen, Kenneth W
2006-06-02  8:38               ` Chen, Kenneth W
2006-06-02  8:24           ` Chen, Kenneth W
2006-06-02  8:31         ` Chen, Kenneth W
2006-06-02  8:50         ` Chen, Kenneth W
2006-06-02  2:35     ` Chen, Kenneth W
2006-06-02  3:04       ` Con Kolivas
2006-06-02  3:23         ` Con Kolivas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=447FFD35.9020909@yahoo.com.au \
    --to=nickpiggin@yahoo.com.au \
    --cc=kenneth.w.chen@intel.com \
    --cc=kernel@kolivas.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mason@suse.com \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.