public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] sched: Add cpu based entries to debugfs
@ 2015-03-30  2:13 David Ahern
  2015-03-30  2:32 ` Mike Galbraith
  2015-03-30  7:44 ` Peter Zijlstra
  0 siblings, 2 replies; 15+ messages in thread
From: David Ahern @ 2015-03-30  2:13 UTC (permalink / raw)
  To: efault, peterz, mingo; +Cc: linux-kernel, David Ahern

Currently sched_debug can be added to the kernel commandline parameters
to dump domain information during boot. This method is not practical with
a large number of CPUs.

This patch adds per-cpu entries to debugfs under a sched directory.
Reading the per-cpu file shows the domain information in a human-readable
format:

$ cat /sys/kernel/debug/sched/cpu0
domain 0 / SMT:
    flags: 0x2af:  load-balance new-idle exec fork affine cpu-capacity share-pkg-resources
    span: 0-7
    groups:
        0 (cpu_capacity = 147)
        1 (cpu_capacity = 147)
        2 (cpu_capacity = 147)
        3 (cpu_capacity = 147)
        4 (cpu_capacity = 147)
        5 (cpu_capacity = 147)
        6 (cpu_capacity = 147)
        7 (cpu_capacity = 147)

domain 2 / DIE:
    flags: 0x102f:  load-balance new-idle exec fork affine prefer-sibling
    span: 0-127
    groups:
        0-7 (cpu_capacity = 1176)
        8-15 (cpu_capacity = 1176)
        16-23 (cpu_capacity = 1176)
        24-31 (cpu_capacity = 1176)
        32-39 (cpu_capacity = 1176)
        40-47 (cpu_capacity = 1176)
        48-55 (cpu_capacity = 1176)
        56-63 (cpu_capacity = 1176)
        64-71 (cpu_capacity = 1176)
        72-79 (cpu_capacity = 1176)
        80-87 (cpu_capacity = 1176)
        88-95 (cpu_capacity = 1176)
        96-103 (cpu_capacity = 1176)
        104-111 (cpu_capacity = 1176)
        112-119 (cpu_capacity = 1176)
        120-127 (cpu_capacity = 1176)

domain 3 / NUMA:
    flags: 0x642f:  load-balance new-idle exec fork affine serialize overlap numa
    span: 0-1023
    groups:
        0-127 (cpu_capacity = 18816)
        128-255 (cpu_capacity = 18816)
        256-383 (cpu_capacity = 18816)
        384-511 (cpu_capacity = 18816)
        512-639 (cpu_capacity = 18816)
        640-767 (cpu_capacity = 18816)
        768-895 (cpu_capacity = 18816)
        896-1023 (cpu_capacity = 18816)

Before spending too much time formalizing this I wanted to see if you guys
would entertain the idea of making this info available via debugfs. It does
move the existing sched_features file to sched/features -- not sure how 
acceptable it is to move files in debugfs.

TO-DO: handle hotplug

Signed-off-by: David Ahern <david.ahern@oracle.com>
---
 kernel/sched/core.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 164 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 62671f53202a..b4d8d0c8260e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -268,12 +268,173 @@ static const struct file_operations sched_feat_fops = {
 	.release	= single_release,
 };
 
+static const char * const sd_flag_names[] = {
+	"load-balance",
+	"new-idle",
+	"exec",
+	"fork",
+	"wake",
+	"affine",
+	"",
+	"cpu-capacity",
+	"power-domain",
+	"share-pkg-resources",
+	"serialize",
+	"asym-packing",
+	"prefer-sibling",
+	"overlap",
+	"numa",
+	"",
+};
+static void sched_cpu_domain_show(struct seq_file *m, struct sched_domain *sd,
+				  int cpu)
+{
+	struct cpumask groupmask;
+	struct sched_group *group = sd->groups;
+	int i;
+
+	cpumask_clear(&groupmask);
+
+	seq_printf(m, "domain %d / %s:\n", sd->level, sd->name);
+	seq_printf(m, "    flags: 0x%x: ", sd->flags);
+
+	for (i = 0; i < ARRAY_SIZE(sd_flag_names); ++i) {
+		if (sd->flags & (1 << i))
+			seq_printf(m, " %s", sd_flag_names[i]);
+	}
+	seq_puts(m, "\n");
+
+	if (!(sd->flags & SD_LOAD_BALANCE) && sd->parent)
+		seq_puts(m, "           ERROR: !SD_LOAD_BALANCE domain has parent\n");
+
+	seq_printf(m, "    span: %*pbl\n",
+		   cpumask_pr_args(sched_domain_span(sd)));
+
+	if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
+		seq_printf(m, "    ERROR: domain->span does not contain CPU%d\n", cpu);
+
+	if (!cpumask_test_cpu(cpu, sched_group_cpus(group)))
+		seq_printf(m, "    ERROR: domain->groups does not contain CPU%d\n", cpu);
+
+	seq_puts(m, "    groups:\n");
+	do {
+		if (!group) {
+			seq_puts(m, "            ERROR: group is NULL\n");
+			break;
+		}
+
+		/*
+		 * Even though we initialize ->capacity to something semi-sane,
+		 * we leave capacity_orig unset. This allows us to detect if
+		 * domain iteration is still funny without causing /0 traps.
+		 */
+		if (!group->sgc->capacity_orig) {
+			seq_puts(m, "        ERROR: domain->cpu_capacity not set\n");
+			break;
+		}
+
+		if (!cpumask_weight(sched_group_cpus(group))) {
+			seq_puts(m, "        ERROR: empty group\n");
+			break;
+		}
+
+		if (!(sd->flags & SD_OVERLAP) &&
+		    cpumask_intersects(&groupmask, sched_group_cpus(group))) {
+			seq_puts(m, "        ERROR: repeated CPUs\n");
+			break;
+		}
+
+		cpumask_or(&groupmask, &groupmask, sched_group_cpus(group));
+
+		seq_printf(m, "        %*pbl",
+			   cpumask_pr_args(sched_group_cpus(group)));
+
+		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
+			seq_printf(m, " (cpu_capacity = %d)",
+				   group->sgc->capacity);
+		}
+		seq_puts(m, "\n");
+
+		group = group->next;
+	} while (group != sd->groups);
+
+	if (!cpumask_equal(sched_domain_span(sd), &groupmask))
+		seq_puts(m, "    ERROR: groups don't span domain->span\n");
+
+	if (sd->parent &&
+	    !cpumask_subset(&groupmask, sched_domain_span(sd->parent))) {
+		seq_puts(m, "    ERROR: parent span is not a superset of domain->span\n");
+	}
+}
+
+static int sched_cpu_show(struct seq_file *m, void *unused)
+{
+	struct sched_domain *sd;
+	int cpu = (int) ((long) m->private);
+
+	if (cpu < 0 || cpu > CONFIG_NR_CPUS) {
+		seq_printf(m, "invalid CPU, %d\n", cpu);
+		return 0;
+	}
+
+	for_each_domain(cpu, sd) {
+		sched_cpu_domain_show(m, sd, cpu);
+		seq_puts(m, "\n");
+	}
+
+	return 0;
+}
+
+static int sched_cpu_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_cpu_show, inode->i_private);
+}
+static const struct file_operations sched_cpu_fops = {
+	.open		= sched_cpu_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+static struct dentry *d_sched_debug;
+static struct dentry *d_sched_cpu[CONFIG_NR_CPUS];
+
+static int sched_debugfs_add_cpu(int cpu)
+{
+	char buf[32];
+	long lcpu = cpu;
+
+	snprintf(buf, sizeof(buf), "cpu%d", cpu);
+	d_sched_cpu[cpu] = debugfs_create_file(buf, 0444, d_sched_debug,
+						(void *) lcpu, &sched_cpu_fops);
+
+	if (d_sched_cpu[cpu] == NULL)
+		pr_warn("Failed to create debugfs entry for cpu %d\n", cpu);
+
+	return 0;
+}
+
 static __init int sched_init_debug(void)
 {
-	debugfs_create_file("sched_features", 0644, NULL, NULL,
+	int cpu;
+	int rc = 0;
+
+	d_sched_debug = debugfs_create_dir("sched", NULL);
+	if (!d_sched_debug) {
+		pr_warn("Could not create debugfs 'sched' entry\n");
+		return 0;
+	}
+
+	debugfs_create_file("features", 0644, d_sched_debug, NULL,
 			&sched_feat_fops);
 
-	return 0;
+	for_each_online_cpu(cpu) {
+		rc = sched_debugfs_add_cpu(cpu);
+		if (rc)
+			goto out;
+	}
+
+out:
+	return rc;
 }
 late_initcall(sched_init_debug);
 #endif /* CONFIG_SCHED_DEBUG */
@@ -6689,7 +6850,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 
 		if (!cpumask_subset(sched_domain_span(child),
 				    sched_domain_span(sd))) {
-			pr_err("BUG: arch topology borken\n");
+			pr_err("BUG: arch topology broken\n");
 #ifdef CONFIG_SCHED_DEBUG
 			pr_err("     the %s domain not a subset of the %s domain\n",
 					child->name, sd->name);
-- 
2.3.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2015-04-06  4:04 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-30  2:13 [RFC PATCH] sched: Add cpu based entries to debugfs David Ahern
2015-03-30  2:32 ` Mike Galbraith
2015-03-30  2:48   ` David Ahern
2015-03-30  3:08     ` Mike Galbraith
2015-03-30 13:03       ` David Ahern
2015-03-30 14:45         ` Mike Galbraith
2015-03-30  7:44 ` Peter Zijlstra
2015-03-30  8:26   ` Mike Galbraith
2015-03-30  8:28     ` Peter Zijlstra
2015-03-30  8:43       ` Mike Galbraith
2015-03-30  9:18         ` Peter Zijlstra
2015-03-31  9:13           ` Ingo Molnar
2015-04-06  2:10             ` David Ahern
2015-04-06  4:04               ` Mike Galbraith
2015-03-30 13:02   ` David Ahern

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox