All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michael Wang <wangyun@linux.vnet.ibm.com>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>, Ingo Molnar <mingo@kernel.org>
Subject: [RFC PATCH] sched: dynamically schedule domain configuration
Date: Mon, 16 Jul 2012 17:16:39 +0800	[thread overview]
Message-ID: <5003DBF7.9090401@linux.vnet.ibm.com> (raw)

From: Michael Wang <wangyun@linux.vnet.ibm.com>

This patch is trying to provide a way for user to dynamically change
the behaviour of load balance by setting flags of schedule domain.

Currently it's rely on cpu cgroup and only SD_LOAD_BALANCE was
implemented, usage:

1. /sys/fs/cgroup/domain/domain.config_level
	the default config_level is 0, which means we currenlty configure
	the sibling domain for all cpus, we can use: 
		echo 'number' > /sys/fs/cgroup/domain/domain.config_level
	to change the level.

2. /sys/fs/cgroup/domain/domain.topology
	this will help to show the SD_LOAD_BALANCE status of all the cpu's
	all domain level, we can use:
		cat /sys/fs/cgroup/domain/domain.topology

3. /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
	this will help us to change the bit SD_LOAD_BALANCE in the flag of
	schedule domain on level 'config_level', we can use:
		echo 1 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
	to enable this bit, and:
		echo 0 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
	to disable it.

It may not works well now(may be even not work at all as I can't see any
changes on my server even after disabled SD_LOAD_BALANCE on all domains),
but it is interesting and should be liked by some people who desire a
way to 'kill' the load balance by their own hands if we can implement it.

Comments and questions are very welcomed ;-)

Signed-off-by: Michael Wang <wangyun@linux.vnet.ibm.com>
---
 include/linux/cgroup_subsys.h |    1 +
 kernel/sched/core.c           |  143 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 0 deletions(-)

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0bd390c..25eb842 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -21,6 +21,7 @@ SUBSYS(debug)
 
 #ifdef CONFIG_CGROUP_SCHED
 SUBSYS(cpu_cgroup)
+SUBSYS(domain_cgroup)
 #endif
 
 /* */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3987b9d..544bf78 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8423,6 +8423,149 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.early_init	= 1,
 };
 
+static struct cgroup_subsys_state domain_cgroup_css;
+static struct cgroup_subsys_state *domain_cgroup_create(struct cgroup *cgrp)
+{
+	if (!cgrp->parent) {
+		/* This is early initialization for the top cgroup */
+		return &domain_cgroup_css;
+	}
+
+	return ERR_PTR(-EPERM);
+}
+
+static void domain_cgroup_destroy(struct cgroup *cgrp)
+{
+	return;
+}
+
+static int domain_cgroup_can_attach(struct cgroup *cgrp,
+				    struct cgroup_taskset *tset)
+{
+	return -EINVAL;
+}
+
+static void domain_cgroup_attach(struct cgroup *cgrp,
+				 struct cgroup_taskset *tset)
+{
+	return;
+}
+
+static void domain_cgroup_exit(struct cgroup *cgrp,
+			       struct cgroup *old_cgrp,
+			       struct task_struct *task)
+{
+	return;
+}
+
+static int domain_config_level;
+
+static int domain_cl_write_u64(struct cgroup *cgrp,
+			       struct cftype *cftype,
+			       u64 shareval)
+{
+	domain_config_level = shareval;
+	return 0;
+}
+
+static u64 domain_cl_read_u64(struct cgroup *cgrp, struct cftype *cft)
+{
+	return (u64)domain_config_level;
+}
+
+static int domain_slb_write_u64(struct cgroup *cgrp,
+				struct cftype *cftype,
+				u64 shareval)
+{
+	int cpu;
+	struct sched_domain *sd;
+	if (shareval != 0 && shareval != 1)
+		return -EINVAL;
+
+	mutex_lock(&sched_domains_mutex);
+	for_each_cpu(cpu, cpu_active_mask) {
+		for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+			if (sd->level == domain_config_level) {
+				if (shareval)
+					sd->flags |= SD_LOAD_BALANCE;
+				else
+					sd->flags &= ~SD_LOAD_BALANCE;
+			}
+		}
+	}
+	mutex_unlock(&sched_domains_mutex);
+	return 0;
+}
+
+static u64 domain_slb_read_u64(struct cgroup *cgrp, struct cftype *cft)
+{
+	int cpu, ret = 0;
+	struct sched_domain *sd;
+	mutex_lock(&sched_domains_mutex);
+	for_each_cpu(cpu, cpu_active_mask) {
+		for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+			if (sd->level == domain_config_level) {
+				if (sd->flags & SD_LOAD_BALANCE)
+					ret = 1;
+				goto out;
+			}
+		}
+	}
+out:
+	mutex_unlock(&sched_domains_mutex);
+	return ret;
+}
+
+static int domain_topology_show(struct cgroup *cgrp,
+				struct cftype *cft,
+				struct cgroup_map_cb *cb)
+{
+	int cpu;
+	struct sched_domain *sd;
+	mutex_lock(&sched_domains_mutex);
+	for_each_cpu(cpu, cpu_active_mask) {
+		cb->fill(cb, "cpu", cpu);
+		for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+			cb->fill(cb, "\tlevel", sd->level);
+			cb->fill(cb, "\t\tSD_LOAD_BALANCE",
+						sd->flags & SD_LOAD_BALANCE);
+		}
+	}
+	mutex_unlock(&sched_domains_mutex);
+
+	return 0;
+}
+
+static struct cftype domain_files[] = {
+	{
+		.name = "config_level",
+		.read_u64 = domain_cl_read_u64,
+		.write_u64 = domain_cl_write_u64,
+	},
+	{
+		.name = "SD_LOAD_BALANCE",
+		.read_u64 = domain_slb_read_u64,
+		.write_u64 = domain_slb_write_u64,
+	},
+	{
+		.name = "topology",
+		.read_map = domain_topology_show,
+	},
+	{ }	/* terminate */
+};
+
+struct cgroup_subsys domain_cgroup_subsys = {
+	.name		= "domain",
+	.create		= domain_cgroup_create,
+	.destroy	= domain_cgroup_destroy,
+	.can_attach	= domain_cgroup_can_attach,
+	.attach		= domain_cgroup_attach,
+	.exit		= domain_cgroup_exit,
+	.subsys_id	= domain_cgroup_subsys_id,
+	.base_cftypes	= domain_files,
+	.early_init	= 1,
+};
+
 #endif	/* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_CPUACCT
-- 
1.7.4.1


             reply	other threads:[~2012-07-16  9:16 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-16  9:16 Michael Wang [this message]
     [not found] ` <5003DBF7.9090401-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2012-07-17  2:46   ` [RFC PATCH] sched: dynamically schedule domain configuration Michael Wang
2012-07-17  2:46     ` Michael Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5003DBF7.9090401@linux.vnet.ibm.com \
    --to=wangyun@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.