All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Rick Lindsley <ricklind@us.ibm.com>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH] schedstats additions
Date: Sat, 04 Sep 2004 15:07:05 +1000	[thread overview]
Message-ID: <41394D79.40205@yahoo.com.au> (raw)

[-- Attachment #1: Type: text/plain, Size: 443 bytes --]

Hi,
I have a patch here to provide more useful statistics for me. Basically
it moves a lot more of the balancing information into the domains instead
of the runqueue, where it is nearly useless on multi-domain setups (eg.
SMT+SMP, SMP+NUMA).

It requires a version number bump, but that isn't much of an issue because
I think we're about the only two using it at the moment. But your tools
will need a little bit of work.

What do you think?


[-- Attachment #2: sched-stat.patch --]
[-- Type: text/x-patch, Size: 10525 bytes --]




---

 linux-2.6-npiggin/kernel/sched.c |  155 ++++++++++++++++++++-------------------
 1 files changed, 82 insertions(+), 73 deletions(-)

diff -puN kernel/sched.c~sched-stat kernel/sched.c
--- linux-2.6/kernel/sched.c~sched-stat	2004-09-04 13:08:54.000000000 +1000
+++ linux-2.6-npiggin/kernel/sched.c	2004-09-04 15:02:07.000000000 +1000
@@ -194,7 +194,6 @@ struct runqueue {
 	unsigned long yld_cnt;
 
 	/* schedule() stats */
-	unsigned long sched_noswitch;
 	unsigned long sched_switch;
 	unsigned long sched_cnt;
 	unsigned long sched_goidle;
@@ -203,26 +202,9 @@ struct runqueue {
 	unsigned long pt_gained[MAX_IDLE_TYPES];
 	unsigned long pt_lost[MAX_IDLE_TYPES];
 
-	/* active_load_balance() stats */
-	unsigned long alb_cnt;
-	unsigned long alb_lost;
-	unsigned long alb_gained;
-	unsigned long alb_failed;
-
 	/* try_to_wake_up() stats */
 	unsigned long ttwu_cnt;
-	unsigned long ttwu_attempts;
-	unsigned long ttwu_moved;
-
-	/* wake_up_new_task() stats */
-	unsigned long wunt_cnt;
-	unsigned long wunt_moved;
-
-	/* sched_migrate_task() stats */
-	unsigned long smt_cnt;
-
-	/* sched_balance_exec() stats */
-	unsigned long sbe_cnt;
+	unsigned long ttwu_remote;
 #endif
 };
 
@@ -277,15 +259,24 @@ struct sched_domain {
 	/* load_balance() stats */
 	unsigned long lb_cnt[MAX_IDLE_TYPES];
 	unsigned long lb_failed[MAX_IDLE_TYPES];
+	unsigned long lb_balanced[MAX_IDLE_TYPES];
 	unsigned long lb_imbalance[MAX_IDLE_TYPES];
+	unsigned long lb_pulled[MAX_IDLE_TYPES];
+	unsigned long lb_hot_pulled[MAX_IDLE_TYPES];
 	unsigned long lb_nobusyg[MAX_IDLE_TYPES];
 	unsigned long lb_nobusyq[MAX_IDLE_TYPES];
 
+	/* Active load balancing */
+	unsigned long alb_cnt;
+	unsigned long alb_failed;
+	unsigned long alb_pushed;
+
 	/* sched_balance_exec() stats */
 	unsigned long sbe_attempts;
 	unsigned long sbe_pushed;
 
 	/* try_to_wake_up() stats */
+	unsigned long ttwu_wake_remote;
 	unsigned long ttwu_wake_affine;
 	unsigned long ttwu_wake_balance;
 #endif
@@ -409,7 +400,7 @@ static inline void task_rq_unlock(runque
  * bump this up when changing the output format or the meaning of an existing
  * format, so that tools can adapt (or abort)
  */
-#define SCHEDSTAT_VERSION 10
+#define SCHEDSTAT_VERSION 11
 
 static int show_schedstat(struct seq_file *seq, void *v)
 {
@@ -427,17 +418,12 @@ static int show_schedstat(struct seq_fil
 
 		/* runqueue-specific stats */
 		seq_printf(seq,
-		    "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu "
-		    "%lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+		    "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
 		    cpu, rq->yld_both_empty,
-		    rq->yld_act_empty, rq->yld_exp_empty,
-		    rq->yld_cnt, rq->sched_noswitch,
+		    rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
 		    rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
-		    rq->alb_cnt, rq->alb_gained, rq->alb_lost,
-		    rq->alb_failed,
-		    rq->ttwu_cnt, rq->ttwu_moved, rq->ttwu_attempts,
-		    rq->wunt_cnt, rq->wunt_moved,
-		    rq->smt_cnt, rq->sbe_cnt, rq->rq_sched_info.cpu_time,
+		    rq->ttwu_cnt, rq->ttwu_remote,
+		    rq->rq_sched_info.cpu_time,
 		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
 
 		for (itype = IDLE; itype < MAX_IDLE_TYPES; itype++)
@@ -453,16 +439,20 @@ static int show_schedstat(struct seq_fil
 			cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
 			seq_printf(seq, "domain%d %s", dcnt++, mask_str);
 			for (itype = IDLE; itype < MAX_IDLE_TYPES; itype++) {
-				seq_printf(seq, " %lu %lu %lu %lu %lu",
+				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu",
 				    sd->lb_cnt[itype],
+				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
 				    sd->lb_imbalance[itype],
+				    sd->lb_pulled[itype],
+				    sd->lb_hot_pulled[itype],
 				    sd->lb_nobusyq[itype],
 				    sd->lb_nobusyg[itype]);
 			}
-			seq_printf(seq, " %lu %lu %lu %lu\n",
+			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu\n",
+			    sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
 			    sd->sbe_pushed, sd->sbe_attempts,
-			    sd->ttwu_wake_affine, sd->ttwu_wake_balance);
+			    sd->ttwu_wake_remote, sd->ttwu_wake_affine, sd->ttwu_wake_balance);
 		}
 #endif
 	}
@@ -1058,6 +1048,10 @@ static int try_to_wake_up(task_t * p, un
 	unsigned long load, this_load;
 	struct sched_domain *sd;
 	int new_cpu;
+
+#ifdef CONFIG_SCHEDSTATS
+	struct sched_domain *stat_sd = NULL;
+#endif
 #endif
 
 	rq = task_rq_lock(p, &flags);
@@ -1076,8 +1070,19 @@ static int try_to_wake_up(task_t * p, un
 	if (unlikely(task_running(rq, p)))
 		goto out_activate;
 
-	new_cpu = cpu;
+#ifdef CONFIG_SCHEDSTATS
+	if (cpu != this_cpu) {
+		schedstat_inc(rq, ttwu_remote);
+		for_each_domain(this_cpu, stat_sd) {
+			if (cpu_isset(cpu, stat_sd->span)) {
+				schedstat_inc(stat_sd, ttwu_wake_remote);
+				break;
+			}
+		}
+	}
+#endif
 
+	new_cpu = cpu;
 	if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
 		goto out_set_cpu;
 
@@ -1103,30 +1108,32 @@ static int try_to_wake_up(task_t * p, un
 	 */
 	for_each_domain(this_cpu, sd) {
 		unsigned int imbalance;
-		/*
-		 * Start passive balancing when half the imbalance_pct
-		 * limit is reached.
-		 */
-		imbalance = sd->imbalance_pct + (sd->imbalance_pct - 100) / 2;
 
-		if ((sd->flags & SD_WAKE_AFFINE) &&
-				!task_hot(p, rq->timestamp_last_tick, sd)) {
+		if (cpu_isset(cpu, sd->span)) {
 			/*
-			 * This domain has SD_WAKE_AFFINE and p is cache cold
-			 * in this domain.
+			 * Start passive balancing when half the imbalance_pct
+			 * limit is reached.
 			 */
-			if (cpu_isset(cpu, sd->span)) {
-				schedstat_inc(sd, ttwu_wake_affine);
+			if ((sd->flags & SD_WAKE_AFFINE) &&
+				!task_hot(p, rq->timestamp_last_tick, sd)) {
+				/*
+				 * This domain has SD_WAKE_AFFINE and p is
+				 * cache cold in this domain.
+				 */
+				schedstat_inc(stat_sd, ttwu_wake_affine);
 				goto out_set_cpu;
 			}
-		} else if ((sd->flags & SD_WAKE_BALANCE) &&
+
+			imbalance = sd->imbalance_pct +
+					(sd->imbalance_pct - 100) / 2;
+
+			if ((sd->flags & SD_WAKE_BALANCE) &&
 				imbalance*this_load <= 100*load) {
-			/*
-			 * This domain has SD_WAKE_BALANCE and there is
-			 * an imbalance.
-			 */
-			if (cpu_isset(cpu, sd->span)) {
-				schedstat_inc(sd, ttwu_wake_balance);
+				/*
+				 * This domain has SD_WAKE_BALANCE and there is
+				 * an imbalance.
+				 */
+				schedstat_inc(stat_sd, ttwu_wake_balance);
 				goto out_set_cpu;
 			}
 		}
@@ -1134,10 +1141,8 @@ static int try_to_wake_up(task_t * p, un
 
 	new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
 out_set_cpu:
-	schedstat_inc(rq, ttwu_attempts);
 	new_cpu = wake_idle(new_cpu, p);
 	if (new_cpu != cpu && cpu_isset(new_cpu, p->cpus_allowed)) {
-		schedstat_inc(rq, ttwu_moved);
 		set_task_cpu(p, new_cpu);
 		task_rq_unlock(rq, &flags);
 		/* might preempt at this point */
@@ -1282,8 +1287,6 @@ void fastcall wake_up_new_task(task_t * 
 	this_cpu = smp_processor_id();
 	cpu = task_cpu(p);
 
-	schedstat_inc(rq, wunt_cnt);
-
 	array = rq->active;
 	if (unlikely(p->used_slice == -1)) {
 		p->used_slice = 0;
@@ -1329,8 +1332,6 @@ void fastcall wake_up_new_task(task_t * 
 		__activate_task(p, rq, array);
 		if (TASK_PREEMPTS_CURR(p, rq))
 			resched_task(rq->curr);
-
-		schedstat_inc(rq, wunt_moved);
 #endif
 	}
 	task_rq_unlock(rq, &flags);
@@ -1582,7 +1583,6 @@ static void sched_migrate_task(task_t *p
 	    || unlikely(cpu_is_offline(dest_cpu)))
 		goto out;
 
-	schedstat_inc(rq, smt_cnt);
 	/* force the process onto the specified CPU */
 	if (migrate_task(p, dest_cpu, &req)) {
 		/* Need to wait for migration thread (might exit: take ref). */
@@ -1610,7 +1610,6 @@ void sched_exec(void)
 	struct sched_domain *tmp, *sd = NULL;
 	int new_cpu, this_cpu = get_cpu();
 
-	schedstat_inc(this_rq(), sbe_cnt);
 	/* Prefer the current CPU if there's only this task running */
 	if (this_rq()->nr_running <= 1)
 		goto out;
@@ -1752,13 +1751,10 @@ skip_queue:
 		goto skip_bitmap;
 	}
 
-	/*
-	 * Right now, this is the only place pull_task() is called,
-	 * so we can safely collect pull_task() stats here rather than
-	 * inside pull_task().
-	 */
-	schedstat_inc(this_rq, pt_gained[idle]);
-	schedstat_inc(busiest, pt_lost[idle]);
+#ifdef CONFIG_SCHEDSTATS
+	if (task_hot(tmp, busiest->timestamp_last_tick, sd))
+		schedstat_inc(sd, lb_hot_pulled[idle]);
+#endif
 
 	pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
 	pulled++;
@@ -1771,6 +1767,15 @@ skip_queue:
 		goto skip_bitmap;
 	}
 out:
+	/*
+	 * Right now, this is the only place pull_task() is called,
+	 * so we can safely collect pull_task() stats here rather than
+	 * inside pull_task().
+	 */
+	schedstat_add(this_rq, pt_gained[idle], pulled);
+	schedstat_add(busiest, pt_lost[idle], pulled);
+	schedstat_add(sd, lb_pulled[idle], pulled);
+
 	return pulled;
 }
 
@@ -2025,6 +2030,8 @@ static int load_balance(int this_cpu, ru
 	return nr_moved;
 
 out_balanced:
+	schedstat_inc(sd, lb_balanced[idle]);
+
 	/* tune up the balancing interval */
 	if (sd->balance_interval < sd->max_interval)
 		sd->balance_interval *= 2;
@@ -2066,8 +2073,11 @@ static int load_balance_newidle(int this
 	schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance);
 	nr_moved = move_tasks(this_rq, this_cpu, busiest,
 					imbalance, sd, NEWLY_IDLE);
-	if (!nr_moved)
+	if (!nr_moved) {
 		schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
+	} else {
+		schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
+	}
 
 	spin_unlock(&busiest->lock);
 
@@ -2107,7 +2117,6 @@ static void active_load_balance(runqueue
 	struct sched_group *group, *busy_group;
 	int i;
 
-	schedstat_inc(busiest, alb_cnt);
 	if (busiest->nr_running <= 1)
 		return;
 
@@ -2117,6 +2126,8 @@ static void active_load_balance(runqueue
 	if (!sd)
 		return;
 
+	schedstat_inc(sd, alb_cnt);
+
 	group = sd->groups;
 	while (!cpu_isset(busiest_cpu, group->cpumask))
 		group = group->next;
@@ -2153,10 +2164,9 @@ static void active_load_balance(runqueue
 			goto next_group;
 		double_lock_balance(busiest, rq);
 		if (move_tasks(rq, push_cpu, busiest, 1, sd, IDLE)) {
-			schedstat_inc(busiest, alb_lost);
-			schedstat_inc(rq, alb_gained);
+			schedstat_inc(sd, alb_pushed);
 		} else {
-			schedstat_inc(busiest, alb_failed);
+			schedstat_inc(sd, alb_failed);
 		}
 		spin_unlock(&rq->lock);
 next_group:
@@ -2567,8 +2577,7 @@ go_idle:
 		rq->expired = array;
 		rq->expired->min_prio = MAX_PRIO;
 		array = rq->active;
-	} else
-		schedstat_inc(rq, sched_noswitch);
+	}
 
 	idx = sched_find_first_bit(array->bitmap);
 	queue = array->queue + idx;

_

             reply	other threads:[~2004-09-04  5:07 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-09-04  5:07 Nick Piggin [this message]
2004-09-04  8:26 ` [PATCH] schedstats additions Rafael J. Wysocki
2004-09-04  9:04   ` Nick Piggin
2004-09-08  8:09 ` Rick Lindsley
2004-09-08 22:56   ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41394D79.40205@yahoo.com.au \
    --to=nickpiggin@yahoo.com.au \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ricklind@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.