[PATCH 0/9] CPU controller

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/9] CPU controller
@ 2006-04-28  1:37 MAEDA Naoaki
  2006-04-28  1:37 ` [PATCH 1/9] CPU controller - Add class load estimation support MAEDA Naoaki
                   ` (9 more replies)
  0 siblings, 10 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:37 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

Andrew,

This patchset adds a CPU resource controller on top of Resource Groups. 
The CPU resource controller manages CPU resources by scaling timeslice
allocated for each task without changing the algorithm of the O(1)
scheduler.

Please consider these for inclusion in -mm tree.
--
Patch descriptions:

1/9: cpurc_load_estimation
	- Adds class load estimation support

2/9: cpurc_hungry_detection
	- Adds class hungry detection support

3/9: cpurc_timeslice_scaling
	- Adds CPU resource controll by scaling timeslice

4/9: cpurc_interface
	- Adds interface functions to CKRM CPU controller

5/9: cpurc_docs
	- Documentation how the CPU resource controller works 

6/9: cpu_init
	- Adds the basic functions and registering the CPU controller
	  on top of Resource Groups

7/9: cpu_shares_n_stats
	- Adds routines to change share values and show statistics

8/9: cpu_hotplug
	- Adds cpu hotplug support 

9/9: cpu_docs
	- Documentation how to use the CPU controller

Thanks,
MAEDA Naoaki

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/9] CPU controller - Add class load estimation support
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
@ 2006-04-28  1:37 ` MAEDA Naoaki
  2006-04-28  1:37 ` [PATCH 2/9] CPU controller - Add class hungry detection support MAEDA Naoaki
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:37 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

1/9: cpurc_load_estimation

This patch corresponds to section 1 in Documentation/res_group/cpurc-internals,
adding load estimation of task in a resource group that is
grouped by the cpurc structure.  Load estimation is necessary for controlling
CPU resource because the CPU resource controller need to know whether
the resource assigned to a resource group is enough or not.

Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>
Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>

 include/linux/cpu_rc.h |   65 ++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h  |    5 +++
 init/Kconfig           |    9 +++++
 kernel/Makefile        |    1 
 kernel/cpu_rc.c        |   79 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/exit.c          |    2 +
 kernel/sched.c         |   14 ++++++++
 7 files changed, 175 insertions(+)

Index: linux-2.6.17-rc3/include/linux/cpu_rc.h
===================================================================
--- /dev/null
+++ linux-2.6.17-rc3/include/linux/cpu_rc.h
@@ -0,0 +1,65 @@
+#ifndef _LINUX_CPU_RC_H_
+#define _LINUX_CPU_RC_H_
+/*
+ *  CPU resource controller interface
+ *
+ *  Copyright 2005-2006 FUJITSU LIMITED
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_CPU_RC
+
+#define CPU_RC_SPREAD_PERIOD	(10 * HZ)
+#define CPU_RC_LOAD_SCALE	(2 * CPU_RC_SPREAD_PERIOD)
+#define CPU_RC_GUAR_SCALE	100
+
+struct cpu_rc_domain {
+	spinlock_t lock;
+	unsigned long timestamp;
+	cpumask_t cpus;
+	int numcpus;
+	int numcrs;
+};
+
+struct cpu_rc {
+	struct cpu_rc_domain *rcd;
+	struct {
+		unsigned long timestamp;
+		unsigned int load;
+	} stat[NR_CPUS];	/* XXX  need alignment */
+};
+
+extern struct cpu_rc *cpu_rc_get(task_t *);
+extern unsigned int cpu_rc_load(struct cpu_rc *);
+extern void cpu_rc_account(task_t *, unsigned long);
+
+static inline void cpu_rc_record_allocation(task_t *tsk,
+					    unsigned int slice,
+					    unsigned long now)
+{
+	if (slice == 0) {
+		/* minimal allocated time_slice is 1 (see sched_fork()). */
+		slice = 1;
+	}
+
+	tsk->last_slice = slice;
+	tsk->ts_alloced = now;
+}
+
+#else /* CONFIG_CPU_RC */
+
+static inline void cpu_rc_account(task_t *tsk, unsigned long now) {}
+static inline void cpu_rc_record_allocation(task_t *tsk,
+					    unsigned int slice,
+					    unsigned long now) {}
+
+#endif /* CONFIG_CPU_RC */
+
+#endif /* _LINUX_CPU_RC_H_ */
+
Index: linux-2.6.17-rc3/include/linux/sched.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/sched.h
+++ linux-2.6.17-rc3/include/linux/sched.h
@@ -892,6 +892,11 @@ struct task_struct {
 	struct resource_group *res_group;
 	struct list_head member_list; /* list of tasks in the resource group */
 #endif /* CONFIG_RES_GROUPS */
+#ifdef CONFIG_CPU_RC
+	unsigned int last_slice;
+	unsigned long ts_alloced;
+#endif
+
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
Index: linux-2.6.17-rc3/init/Kconfig
===================================================================
--- linux-2.6.17-rc3.orig/init/Kconfig
+++ linux-2.6.17-rc3/init/Kconfig
@@ -261,6 +261,15 @@ config RELAY
 
 	  If unsure, say N.
 
+config CPU_RC
+	bool "CPU resource controller"
+	depends on RES_GROUPS_RES_CPU
+	help
+	  This options will let you control the CPU resource by scaling
+	  the timeslice allocated for each tasks.
+
+	  Say N if unsure.
+
 source "usr/Kconfig"
 
 config UID16
Index: linux-2.6.17-rc3/kernel/Makefile
===================================================================
--- linux-2.6.17-rc3.orig/kernel/Makefile
+++ linux-2.6.17-rc3/kernel/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
+obj-$(CONFIG_CPU_RC) += cpu_rc.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
Index: linux-2.6.17-rc3/kernel/cpu_rc.c
===================================================================
--- /dev/null
+++ linux-2.6.17-rc3/kernel/cpu_rc.c
@@ -0,0 +1,79 @@
+/*
+ *  kernel/cpu_rc.c
+ *
+ *  CPU resource controller by scaling time_slice of the task.
+ *
+ *  Copyright 2005-2006 FUJITSU LIMITED
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/cpu_rc.h>
+
+/*
+ * cpu_rc_load() calculates a resource group load
+ */
+unsigned int cpu_rc_load(struct cpu_rc *cr)
+{
+	unsigned int load;
+	int i, n;
+
+	BUG_ON(!cr);
+
+	load = 0;
+	n = 0;
+
+	/* Just reading the value, so no locking... */
+	for_each_cpu_mask(i, cr->rcd->cpus) {
+		if (jiffies - cr->stat[i].timestamp <= CPU_RC_SPREAD_PERIOD)
+			load += cr->stat[i].load;
+		n++;
+	}
+
+	return load / n * CPU_RC_GUAR_SCALE / CPU_RC_LOAD_SCALE;
+}
+
+/*
+ * cpu_rc_account() calculates the task load when the timeslice is expired
+ */
+void cpu_rc_account(task_t *tsk, unsigned long now)
+{
+	struct cpu_rc *cr;
+	int cpu = get_cpu();
+	unsigned long last;
+	unsigned int resgrp_load, tsk_load;
+	unsigned long base, update;
+
+	if (tsk == idle_task(task_cpu(tsk)))
+		goto out;
+
+	cr = cpu_rc_get(tsk);
+	if (!cr)
+		goto out;
+
+	base = now - tsk->ts_alloced;
+	if (base == 0)
+		goto out;  /* duration too small. can not collect statistics. */
+
+	tsk_load = CPU_RC_LOAD_SCALE * (tsk->last_slice - tsk->time_slice)
+			+ (CPU_RC_LOAD_SCALE / 2);
+	if (base > CPU_RC_SPREAD_PERIOD)
+		tsk_load = CPU_RC_SPREAD_PERIOD * tsk_load / base;
+
+	last = cr->stat[cpu].timestamp;
+	update = now - last;
+	if (update > CPU_RC_SPREAD_PERIOD)
+		resgrp_load = 0;  /* statistics data obsolete. */
+	else
+		resgrp_load = cr->stat[cpu].load
+			 * (CPU_RC_SPREAD_PERIOD - update);
+
+	cr->stat[cpu].timestamp = now;
+	cr->stat[cpu].load = (resgrp_load + tsk_load) / CPU_RC_SPREAD_PERIOD;
+out:
+	put_cpu();
+}
Index: linux-2.6.17-rc3/kernel/sched.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/sched.c
+++ linux-2.6.17-rc3/kernel/sched.c
@@ -43,6 +43,7 @@
 #include <linux/rcupdate.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/cpu_rc.h>
 #include <linux/percpu.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
@@ -1377,6 +1378,7 @@ int fastcall wake_up_state(task_t *p, un
 void fastcall sched_fork(task_t *p, int clone_flags)
 {
 	int cpu = get_cpu();
+	unsigned long now;
 
 #ifdef CONFIG_SMP
 	cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
@@ -1416,6 +1418,9 @@ void fastcall sched_fork(task_t *p, int 
 	p->first_time_slice = 1;
 	current->time_slice >>= 1;
 	p->timestamp = sched_clock();
+	now = jiffies;
+	cpu_rc_record_allocation(current, current->time_slice, now);
+	cpu_rc_record_allocation(p, p->time_slice, now);
 	if (unlikely(!current->time_slice)) {
 		/*
 		 * This case is rare, it happens when the parent has only
@@ -1533,6 +1538,8 @@ void fastcall sched_exit(task_t *p)
 		p->parent->time_slice += p->time_slice;
 		if (unlikely(p->parent->time_slice > task_timeslice(p)))
 			p->parent->time_slice = task_timeslice(p);
+		cpu_rc_record_allocation(p->parent,
+					 p->parent->time_slice, jiffies);
 	}
 	if (p->sleep_avg < p->parent->sleep_avg)
 		p->parent->sleep_avg = p->parent->sleep_avg /
@@ -2617,6 +2624,7 @@ void scheduler_tick(void)
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
 	unsigned long long now = sched_clock();
+	unsigned long jnow;
 
 	update_cpu_clock(p, rq, now);
 
@@ -2651,6 +2659,9 @@ void scheduler_tick(void)
 			p->time_slice = task_timeslice(p);
 			p->first_time_slice = 0;
 			set_tsk_need_resched(p);
+#ifdef CONFIG_CPU_RC
+			/* XXX  need accounting even for rt_task? */
+#endif
 
 			/* put it at the end of the queue: */
 			requeue_task(p, rq->active);
@@ -2660,9 +2671,12 @@ void scheduler_tick(void)
 	if (!--p->time_slice) {
 		dequeue_task(p, rq->active);
 		set_tsk_need_resched(p);
+		jnow = jiffies;
+		cpu_rc_account(p, jnow);
 		p->prio = effective_prio(p);
 		p->time_slice = task_timeslice(p);
 		p->first_time_slice = 0;
+		cpu_rc_record_allocation(p, p->time_slice, jnow);
 
 		if (!rq->expired_timestamp)
 			rq->expired_timestamp = jiffies;
Index: linux-2.6.17-rc3/kernel/exit.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/exit.c
+++ linux-2.6.17-rc3/kernel/exit.c
@@ -36,6 +36,7 @@
 #include <linux/compat.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/res_group.h>
+#include <linux/cpu_rc.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -852,6 +853,7 @@ fastcall NORET_TYPE void do_exit(long co
 	int group_dead;
 
 	profile_task_exit(tsk);
+	cpu_rc_account(tsk, jiffies);
 
 	WARN_ON(atomic_read(&tsk->fs_excl));
 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 2/9] CPU controller - Add class hungry detection support
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
  2006-04-28  1:37 ` [PATCH 1/9] CPU controller - Add class load estimation support MAEDA Naoaki
@ 2006-04-28  1:37 ` MAEDA Naoaki
  2006-04-28  1:37 ` [PATCH 3/9] CPU controller - Add timeslice scaling support MAEDA Naoaki
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:37 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

2/9: cpurc_hungry_detection

This patch corresponds to section 2 in Documentation/res_group/cpurc-internals,
adding the detection code that checks whether a task group needs more CPU
resource or not.  The CPU resource controller have to distinguish whether
tasks in the group actually need more resource or they are just sleepy.
If they need more resource, the resource controller must give more resource,
otherwise it must not.

Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>
Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>

 include/linux/cpu_rc.h |   17 ++++++++
 include/linux/sched.h  |    1 
 kernel/cpu_rc.c        |   96 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c         |    5 ++
 4 files changed, 118 insertions(+), 1 deletion(-)

Index: linux-2.6.17-rc3/include/linux/cpu_rc.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/cpu_rc.h
+++ linux-2.6.17-rc3/include/linux/cpu_rc.h
@@ -17,10 +17,14 @@
 
 #define CPU_RC_SPREAD_PERIOD	(10 * HZ)
 #define CPU_RC_LOAD_SCALE	(2 * CPU_RC_SPREAD_PERIOD)
-#define CPU_RC_GUAR_SCALE	100
+#define CPU_RC_SHARE_SCALE	100
+#define CPU_RC_TSFACTOR_MAX	CPU_RC_SHARE_SCALE
+#define CPU_RC_HCOUNT_INC	2
+#define CPU_RC_RECALC_INTERVAL	HZ
 
 struct cpu_rc_domain {
 	spinlock_t lock;
+	unsigned int hungry_count;
 	unsigned long timestamp;
 	cpumask_t cpus;
 	int numcpus;
@@ -28,16 +32,25 @@ struct cpu_rc_domain {
 };
 
 struct cpu_rc {
+	int share;
+	int is_hungry;
 	struct cpu_rc_domain *rcd;
 	struct {
 		unsigned long timestamp;
 		unsigned int load;
+		int maybe_hungry;
 	} stat[NR_CPUS];	/* XXX  need alignment */
 };
 
 extern struct cpu_rc *cpu_rc_get(task_t *);
 extern unsigned int cpu_rc_load(struct cpu_rc *);
 extern void cpu_rc_account(task_t *, unsigned long);
+extern void cpu_rc_detect_hunger(task_t *);
+
+static inline void cpu_rc_record_activated(task_t *tsk, unsigned long now)
+{
+	tsk->last_activated = now;
+}
 
 static inline void cpu_rc_record_allocation(task_t *tsk,
 					    unsigned int slice,
@@ -55,6 +68,8 @@ static inline void cpu_rc_record_allocat
 #else /* CONFIG_CPU_RC */
 
 static inline void cpu_rc_account(task_t *tsk, unsigned long now) {}
+static inline void cpu_rc_detect_hunger(task_t *tsk) {}
+static inline void cpu_rc_record_activated(task_t *tsk, unsigned long now) {}
 static inline void cpu_rc_record_allocation(task_t *tsk,
 					    unsigned int slice,
 					    unsigned long now) {}
Index: linux-2.6.17-rc3/include/linux/sched.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/sched.h
+++ linux-2.6.17-rc3/include/linux/sched.h
@@ -895,6 +895,7 @@ struct task_struct {
 #ifdef CONFIG_CPU_RC
 	unsigned int last_slice;
 	unsigned long ts_alloced;
+	unsigned long last_activated;
 #endif
 
 };
Index: linux-2.6.17-rc3/kernel/cpu_rc.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/cpu_rc.c
+++ linux-2.6.17-rc3/kernel/cpu_rc.c
@@ -14,6 +14,72 @@
 #include <linux/sched.h>
 #include <linux/cpu_rc.h>
 
+static inline int cpu_rc_is_hungry(struct cpu_rc *cr)
+{
+	return cr->is_hungry;
+}
+
+static inline void cpu_rc_set_hungry(struct cpu_rc *cr)
+{
+	cr->is_hungry++;
+	cr->rcd->hungry_count += CPU_RC_HCOUNT_INC;
+}
+
+static inline void cpu_rc_set_satisfied(struct cpu_rc *cr)
+{
+	cr->is_hungry = 0;
+}
+
+static inline int cpu_rc_is_anyone_hungry(struct cpu_rc *cr)
+{
+	return cr->rcd->hungry_count > 0;
+}
+
+/*
+ * cpu_rc_recalc_tsfactor() uptates the timeslice scale factor
+ */
+static inline void cpu_rc_recalc_tsfactor(struct cpu_rc *cr)
+{
+	unsigned long now = jiffies;
+	unsigned long interval = now - cr->rcd->timestamp;
+	unsigned int load;
+	int maybe_hungry;
+	int i, n;
+
+	n = 0;
+	load = 0;
+	maybe_hungry = 0;
+
+	cpu_rcd_lock(cr);
+	if (cr->rcd->timestamp == 0)	{
+		cr->rcd->timestamp = now;
+	} else	if (interval > CPU_RC_SPREAD_PERIOD) {
+		cr->rcd->hungry_count = 0;
+		cr->rcd->timestamp = now;
+	} else if (interval > CPU_RC_RECALC_INTERVAL) {
+		cr->rcd->hungry_count >>= 1;
+		cr->rcd->timestamp = now;
+	}
+
+	for_each_cpu_mask(i, cr->rcd->cpus) {
+		load += cr->stat[i].load;
+		maybe_hungry += cr->stat[i].maybe_hungry;
+		cr->stat[i].maybe_hungry = 0;
+		n++;
+	}
+
+	BUG_ON(n == 0);
+	load = load / n;
+
+	if ((load * CPU_RC_SHARE_SCALE >= cr->share * CPU_RC_LOAD_SCALE) ||
+	    !maybe_hungry)
+		cpu_rc_set_satisfied(cr);
+	else
+		cpu_rc_set_hungry(cr);
+
+	cpu_rcd_unlock(cr);
+}
+
 /*
  * cpu_rc_load() calculates a resource group load
  */
@@ -77,3 +143,33 @@ void cpu_rc_account(task_t *tsk, unsigne
 out:
 	put_cpu();
 }
+
+/*
+ * cpu_rc_detect_hunger() judges if the rerouce group is maybe hungry
+ */
+void cpu_rc_detect_hunger(task_t *tsk)
+{
+	struct cpu_rc *cr;
+	unsigned long wait;
+	int cpu = smp_processor_id();
+
+	if (tsk == idle_task(task_cpu(tsk)))
+		return;
+
+	if (tsk->last_activated == 0)
+		return;
+
+	cr = cpu_rc_get(tsk);
+	if (!cr) {
+		tsk->last_activated = 0;
+		return;
+	}
+
+	BUG_ON(tsk->last_slice == 0);
+	wait = jiffies - tsk->last_activated;
+	if (CPU_RC_GUAR_SCALE * tsk->last_slice	/ (wait + tsk->last_slice)
+			< cr->share)
+		cr->stat[cpu].maybe_hungry++;
+
+	tsk->last_activated = 0;
+}
Index: linux-2.6.17-rc3/kernel/sched.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/sched.c
+++ linux-2.6.17-rc3/kernel/sched.c
@@ -716,6 +716,7 @@ static void __activate_task(task_t *p, r
 
 	if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p))))
 		target = rq->expired;
+	cpu_rc_record_activated(p, jiffies);
 	enqueue_task(p, target);
 	rq->nr_running++;
 }
@@ -1478,6 +1479,7 @@ void fastcall wake_up_new_task(task_t *p
 				p->array = current->array;
 				p->array->nr_active++;
 				rq->nr_running++;
+				cpu_rc_record_activated(p, jiffies);
 			}
 			set_need_resched();
 		} else
@@ -2686,6 +2688,8 @@ void scheduler_tick(void)
 				rq->best_expired_prio = p->static_prio;
 		} else
 			enqueue_task(p, rq->active);
+
+		cpu_rc_record_activated(p, jnow);
 	} else {
 		/*
 		 * Prevent a too long timeslice allowing a task to monopolize
@@ -3079,6 +3083,7 @@ switch_tasks:
 	rcu_qsctr_inc(task_cpu(prev));
 
 	update_cpu_clock(prev, rq, now);
+	cpu_rc_detect_hunger(next);
 
 	prev->sleep_avg -= run_time;
 	if ((long)prev->sleep_avg <= 0)

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 3/9] CPU controller - Add timeslice scaling support
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
  2006-04-28  1:37 ` [PATCH 1/9] CPU controller - Add class load estimation support MAEDA Naoaki
  2006-04-28  1:37 ` [PATCH 2/9] CPU controller - Add class hungry detection support MAEDA Naoaki
@ 2006-04-28  1:37 ` MAEDA Naoaki
  2006-04-28  1:37 ` [PATCH 4/9] CPU controller - Add interface functions MAEDA Naoaki
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:37 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

3/9: cpurc_timeslice_scaling

This patch corresponds to section 3 in Documentation/res_group/cpurc-internals, 
adding the CPU resource control by scaling timeslices given to each tasks.
The scaling factors of timeslices are changed based on the difference between
the share of the resource and the actual load.

Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>
Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>

 include/linux/cpu_rc.h |   12 +++++++++
 kernel/cpu_rc.c        |   63 +++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched.c         |   11 +++++++-
 3 files changed, 82 insertions(+), 4 deletions(-)

Index: linux-2.6.17-rc3/include/linux/cpu_rc.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/cpu_rc.h
+++ linux-2.6.17-rc3/include/linux/cpu_rc.h
@@ -17,8 +17,11 @@
 
 #define CPU_RC_SPREAD_PERIOD	(10 * HZ)
 #define CPU_RC_LOAD_SCALE	(2 * CPU_RC_SPREAD_PERIOD)
+#define CPU_RC_LOAD_MARGIN	1
 #define CPU_RC_SHARE_SCALE	100
 #define CPU_RC_TSFACTOR_MAX	CPU_RC_SHARE_SCALE
+#define CPU_RC_TSFACTOR_INC_HI	5
+#define CPU_RC_TSFACTOR_INC_LO	2
 #define CPU_RC_HCOUNT_INC	2
 #define CPU_RC_RECALC_INTERVAL	HZ
 
@@ -34,6 +37,8 @@ struct cpu_rc_domain {
 struct cpu_rc {
 	int share;
 	int is_hungry;
+	unsigned int ts_factor;
+	unsigned long last_recalc;
 	struct cpu_rc_domain *rcd;
 	struct {
 		unsigned long timestamp;
@@ -44,6 +49,7 @@ struct cpu_rc {
 
 extern struct cpu_rc *cpu_rc_get(task_t *);
 extern unsigned int cpu_rc_load(struct cpu_rc *);
+extern unsigned int cpu_rc_scale_timeslice(task_t *, unsigned int);
 extern void cpu_rc_account(task_t *, unsigned long);
 extern void cpu_rc_detect_hunger(task_t *);
 
@@ -74,6 +80,12 @@ static inline void cpu_rc_record_allocat
 					    unsigned int slice,
 					    unsigned long now) {}
 
+static inline unsigned int cpu_rc_scale_timeslice(task_t *tsk,
+						  unsigned int slice)
+{
+	return slice;
+}
+
 #endif /* CONFIG_CPU_RC */
 
 #endif /* _LINUX_CPU_RC_H_ */
Index: linux-2.6.17-rc3/kernel/cpu_rc.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/cpu_rc.c
+++ linux-2.6.17-rc3/kernel/cpu_rc.c
@@ -14,6 +14,16 @@
 #include <linux/sched.h>
 #include <linux/cpu_rc.h>
 
+static inline void cpu_rcd_lock(struct cpu_rc *cr)
+{
+	spin_lock(&cr->rcd->lock);
+}
+
+static inline void cpu_rcd_unlock(struct cpu_rc *cr)
+{
+	spin_unlock(&cr->rcd->lock);
+}
+
 static inline int cpu_rc_is_hungry(struct cpu_rc *cr)
 {
 	return cr->is_hungry;
@@ -77,6 +87,33 @@ static inline void cpu_rc_recalc_tsfacto
 	else
 		cpu_rc_set_hungry(cr);
 
+	if (!cpu_rc_is_anyone_hungry(cr)) {
+		/* Everyone satisfied.  Extend time_slice. */
+		cr->ts_factor += CPU_RC_TSFACTOR_INC_HI;
+	} else {
+		if (cpu_rc_is_hungry(cr)) {
+			/* Extend time_slice a little. */
+			cr->ts_factor += CPU_RC_TSFACTOR_INC_LO;
+		} else if (load * CPU_RC_SHARE_SCALE >
+			   (cr->share + CPU_RC_LOAD_MARGIN)
+				* CPU_RC_LOAD_SCALE) {
+			/*
+			 * scale time_slice only when load is higher than
+			 * the share.
+			 */
+			cr->ts_factor = cr->ts_factor * cr->share
+				* CPU_RC_LOAD_SCALE
+				/ (load * CPU_RC_SHARE_SCALE);
+		}
+	}
+
+	if (cr->ts_factor == 0)
+		cr->ts_factor = 1;
+	else if (cr->ts_factor > CPU_RC_TSFACTOR_MAX)
+		cr->ts_factor = CPU_RC_TSFACTOR_MAX;
+
+	cr->last_recalc = now;
+
 	cpu_rcd_unlock(cr);
 }
 
@@ -100,7 +137,29 @@ unsigned int cpu_rc_load(struct cpu_rc *
 		n++;
 	}
 
-	return load / n * CPU_RC_GUAR_SCALE / CPU_RC_LOAD_SCALE;
+	return load / n * CPU_RC_SHARE_SCALE / CPU_RC_LOAD_SCALE;
+}
+
+/*
+ * cpu_rc_scale_timeslice scales the task timeslice based on the scale factor
+ */
+unsigned int cpu_rc_scale_timeslice(task_t *tsk, unsigned int slice)
+{
+	struct cpu_rc *cr;
+	unsigned int scaled;
+
+	cr = cpu_rc_get(tsk);
+	if (!cr)
+		return slice;
+
+	if (jiffies - cr->last_recalc > CPU_RC_RECALC_INTERVAL)
+		cpu_rc_recalc_tsfactor(cr);
+
+	scaled = slice * cr->ts_factor / CPU_RC_TSFACTOR_MAX;
+	if (scaled == 0)
+		scaled = 1;
+
+	return scaled;
 }
 
 /*
@@ -167,7 +226,7 @@ void cpu_rc_detect_hunger(task_t *tsk)
 
 	BUG_ON(tsk->last_slice == 0);
 	wait = jiffies - tsk->last_activated;
-	if (CPU_RC_GUAR_SCALE * tsk->last_slice	/ (wait + tsk->last_slice)
+	if (CPU_RC_SHARE_SCALE * tsk->last_slice / (wait + tsk->last_slice)
 			< cr->share)
 		cr->stat[cpu].maybe_hungry++;
 
Index: linux-2.6.17-rc3/kernel/sched.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/sched.c
+++ linux-2.6.17-rc3/kernel/sched.c
@@ -173,10 +173,17 @@
 
 static unsigned int task_timeslice(task_t *p)
 {
+	unsigned int timeslice;
+
 	if (p->static_prio < NICE_TO_PRIO(0))
-		return SCALE_PRIO(DEF_TIMESLICE*4, p->static_prio);
+		timeslice = SCALE_PRIO(DEF_TIMESLICE*4, p->static_prio);
 	else
-		return SCALE_PRIO(DEF_TIMESLICE, p->static_prio);
+		timeslice = SCALE_PRIO(DEF_TIMESLICE, p->static_prio);
+
+	if (!TASK_INTERACTIVE(p))
+		timeslice = cpu_rc_scale_timeslice(p, timeslice);
+
+	return timeslice;
 }
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)	\
 				< (long long) (sd)->cache_hot_time)

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 4/9] CPU controller - Add interface functions
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
                   ` (2 preceding siblings ...)
  2006-04-28  1:37 ` [PATCH 3/9] CPU controller - Add timeslice scaling support MAEDA Naoaki
@ 2006-04-28  1:37 ` MAEDA Naoaki
  2006-04-28  1:37 ` [PATCH 5/9] CPU controller - Documentation how the controller works MAEDA Naoaki
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:37 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

4/9: cpurc_interface

Adds interface functions to resource group CPU controller.

Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>
Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>

 include/linux/cpu_rc.h |    6 ++++++
 kernel/cpu_rc.c        |   45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

Index: linux-2.6.17-rc3/kernel/cpu_rc.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/cpu_rc.c
+++ linux-2.6.17-rc3/kernel/cpu_rc.c
@@ -232,3 +232,48 @@ void cpu_rc_detect_hunger(task_t *tsk)
 
 	tsk->last_activated = 0;
 }
+
+void cpu_rc_clear_stat(struct cpu_rc *cr, int cpu)
+{
+	cr->stat[cpu].timestamp = 0;
+	cr->stat[cpu].load = 0;
+	cr->stat[cpu].maybe_hungry = 0;
+}
+
+void cpu_rc_init_cr(struct cpu_rc *cr, struct cpu_rc_domain *rcd)
+{
+	cr->rcd = rcd;
+	cr->share = 0;
+	cr->ts_factor = CPU_RC_TSFACTOR_MAX;
+}
+
+void cpu_rc_get_cr(struct cpu_rc *cr)
+{
+	cpu_rcd_lock(cr);
+	cr->rcd->numcrs++;
+	cpu_rcd_unlock(cr);
+}
+
+void cpu_rc_put_cr(struct cpu_rc *cr)
+{
+	cpu_rcd_lock(cr);
+	cr->is_hungry = 0;
+	cr->rcd->numcrs--;
+	cpu_rcd_unlock(cr);
+}
+
+void cpu_rc_init_rcd(struct cpu_rc_domain *rcd)
+{
+	rcd->cpus = cpu_online_map;
+	spin_lock_init(&rcd->lock);
+	rcd->hungry_count = 0;
+	rcd->numcpus = cpus_weight(cpu_online_map);
+	rcd->numcrs = 0;
+}
+
+void cpu_rc_set_share(struct cpu_rc *cr, int val)
+{
+	cpu_rcd_lock(cr);
+	cr->share = val;
+	cpu_rcd_unlock(cr);
+}
Index: linux-2.6.17-rc3/include/linux/cpu_rc.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/cpu_rc.h
+++ linux-2.6.17-rc3/include/linux/cpu_rc.h
@@ -52,6 +52,12 @@ extern unsigned int cpu_rc_load(struct c
 extern unsigned int cpu_rc_scale_timeslice(task_t *, unsigned int);
 extern void cpu_rc_account(task_t *, unsigned long);
 extern void cpu_rc_detect_hunger(task_t *);
+extern void cpu_rc_clear_stat(struct cpu_rc *, int);
+extern void cpu_rc_init_cr(struct cpu_rc *, struct cpu_rc_domain *);
+extern void cpu_rc_get_cr(struct cpu_rc *);
+extern void cpu_rc_put_cr(struct cpu_rc *);
+extern void cpu_rc_init_rcd(struct cpu_rc_domain *);
+extern void cpu_rc_set_share(struct cpu_rc *, int);
 
 static inline void cpu_rc_record_activated(task_t *tsk, unsigned long now)
 {

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 5/9] CPU controller - Documentation how the controller works
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
                   ` (3 preceding siblings ...)
  2006-04-28  1:37 ` [PATCH 4/9] CPU controller - Add interface functions MAEDA Naoaki
@ 2006-04-28  1:37 ` MAEDA Naoaki
  2006-04-28  1:38 ` [PATCH 6/9] CPU controller - Add basic functions and registering the controller MAEDA Naoaki
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:37 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

5/9: cpurc_docs

Documentation that describes how the CPU resource controller works.

Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>
Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>

 Documentation/res_groups/cpurc-internals |  167 +++++++++++++++++++++++++++++++
 1 files changed, 167 insertions(+)

Index: linux-2.6.17-rc3/Documentation/res_groups/cpurc-internals
===================================================================
--- /dev/null
+++ linux-2.6.17-rc3/Documentation/res_groups/cpurc-internals
@@ -0,0 +1,167 @@
+CPU resource controller internals
+
+ There are 3 components in the CPU resource controller:
+
+ (1)  load estimation
+ (2)  hungry detection
+ (3)  timeslice scaling
+
+ We need to estimate the resource group load in order to check whether
+ the share is satisfied or not.  Resource group load also gets lower than
+ the share when all the tasks in the resoruce group tends to sleep. We need to
+ check whether the resource group needs to schedule more or not by hungry
+ detection.  If a resource group needs to schedule more, timeslices of tasks
+ are scaled by timeslice scaling.
+
+1. Load estimation
+
+ We calculate the resource group load as the accumulation of task loads in the
+ resource group.  We need to calculate the task load first, then calculate the
+ resource group load from the task loads.
+
+ Task load estimation
+
+  Task load is estimated as the ratio of:
+   * the timeslice value allocated to the task (Ts)
+  to:
+   * the time that is taken for the task to run out the allocated timeslice
+     (Tr).
+  If a task can use all the CPU time, Ts / Tr becomes 1 for example.
+
+  The detailed procedure of the calculation is as follows:
+  (1) Record the timeslice (Ts) and the time when the timeslice is
+      allocated to the task (by calling cpu_rc_record_allocation()).
+      * The timeslice value is recorded to task->last_slice ( = Ts).
+      * The time is recorded to task->ts_alloced.
+  (2) Calculate the task load when the timeslice is expired
+      (by calling cpu_rc_account()).
+      Tr is calculated as:
+       Tr = jiffies - task->ts_alloced
+      Then task load (Ts / Tr) becomes:
+       Ts / Tr = task->last_slice / (jiffies - task->ts_alloced)
+
+      The load value is scaled by CPU_RC_LOAD_SCALE.
+      If the load value equals to CPU_RC_LOAD_SCALE, it indicates 100%
+      CPU usage.
+
+          task->ts_alloced   task scheduled             now
+             v               v                          v
+             |---------------===========================|
+
+                             |<------------------------>|
+                               Ts ( = task->last_slice)
+
+             |<---------------------------------------->|
+                Tr ( = now - task->ts_alloced)
+
+             |<------------->|
+               the time that the task isn't scheduled
+
+
+      Note that task load calculation is also needed for strict
+      accuracy when a task forks or exits, because timeslice is
+      changed on fork and exit.  But we don't do that in order to
+      simplify the code and in order not to introduce overhead on fork
+      and exit.  Probably we can get enough accurate number without
+      calculating the task load on fork/exit.
+
+ Resource group load estimation:
+
+  Resource group load is the accumulation of load values of tasks in
+  the resource group in the duration of CPU_RC_SPREAD_PERIOD.
+  Per-CPU resource group load is recalculated each time the task load is
+  calculated in the cpu_rc_account() function.
+  Then on CPU_RC_RECALC_INTERVAL intervals, the resource group load value
+  per-CPU value is calculated as the average of the per-CPU resource group load.
+
+  Task load is accumulated to the per-CPU resource group load as if the resource
+  group uses Ts/Tr of the CPU time from task->ts_alloced to now (the time
+  the timeslice expired).
+
+  So the time that the task has used the CPU from (now - CPU_RC_SPREAD_PERIOD)
+  to now (Ttsk) should be:
+
+   if task->ts_alloced < now - CPU_RC_SPREAD_PERIOD:
+     Ts/Tr * CPU_RC_SPREAD_PERIOD
+     (We assume that the task has used the CPU at the constant rate of Ts/Tr.)
+
+                    now-CPU_RC_SPREAD_PERIOD                now
+                    v                                       v
+                    |---------------------------------------|
+         |==================================================| load: Ts/Tr
+         ^
+         task->ts_alloced
+
+   else:
+     Ts
+
+                    now-CPU_RC_SPREAD_PERIOD                now
+                    v                                       v
+                    |---------------------------------------|
+                               |============================| load: Ts/Tr
+                               ^
+                               task->ts_alloced
+
+  Also, we assume that the resource group uses the CPU at the rate of
+  the resource group load from (now - CPU_RC_SPREAD_PERIOD) to the last time
+  the per-CPU resource group load was calculated 
+  (stored in struct cpu_rc::stat[cpu].timestamp).  
+  If cpu_rc::stat[cpu].timestamp < now - CPU_RC_SPREAD_PERIOD, we assume that
+  the resource group doesn't use the CPU from (now - CPU_RC_SPREAD_PERIOD) to
+  task->ts_alloced.
+
+  So the time that the resource group use the CPU from 
+  (now - CPU_RC_SPREAD_PERIOD) to now (Trgrp) should be:
+   if cpu_rc::stat[cpu].timestamp < now - CPU_RC_SPREAD_PERIOD:
+     0
+   else:
+     cpu_rc::stat[cpu].load * (cpu_rc::stat[cpu].timestamp - (now - CPU_RC_SPREAD_PERIOD))
+
+  The new per-CPU resource group load that will be assigned to
+  cpu_rc::stat[cpu].load is calculated as:
+    (Ttsk + Trgrp) / CPU_RC_SPREAD_PERIOD
+
+2. Hungry detection
+
+ When the resource group load is less than the share, there are 2 cases:
+  (a) the share is enough and tasks in the resource group have time for sleep
+  (b) tasks in other resource groups overuse the CPU
+
+ We should not scale the timeslice in case (a) even if the resource group load
+ is lower than the share.  In order to distinguish case (b) from
+ case (a), we measure the time (Tsch) from when a task is activated
+ (stored in task->last_activated) till when the task is actually
+ scheduled.  If the resource group load is lower than the share but tasks
+ in the resource group are quickly scheduled, it can be classified to case (a).
+ If Tsch / timeslice of a task is lower than the share, the resource group
+ that has the task is marked as "maybe hungry."  If the resource group load of
+ the resource group that is marked as "maybe hungry" is lower than the
+ share, it is treated as hungry and the timeslices of tasks in
+ other resource groups will be scaled down.
+
+
+3. Timeslice scaling
+
+ If there are hungry resource groups, we need to adjust timeslices to satisfy
+ the share.  To scale timeslices, we introduce a scaling factor
+ used for scaling timeslices.  The scaling factor is associated with
+ the resource group (stored in the cpu_rc structure) and adaptively adjusted
+ according to the resource group load and the share.
+
+ If some resource groups are hungry, the scaling factor of the resource group
+ that is not hungry is calculated as follows (note: F is the scaling factor):
+   F_new = F * share / resource_group_load
+
+ And the scaling factor of the hungry resource group is calculated as:
+   F_new = F + CPU_RC_TSFACTOR_INC_LO   (CPU_RC_TSFACTOR_INC_LO is defined as 2)
+
+ When all the resource groups are not hungry, the scaling factor is calculated
+ as follows in order to recover the timeslices:
+   F_new = F + CPU_RC_TSFACTOR_INC_HI   (CPU_RC_TSFACTOR_INC_HI is defined as 5)
+
+ Note that the maximum value of F is limited to CPU_RC_TSFACTOR_MAX.
+ The timeslice assigned to each task is:
+   timeslice_scaled = timeslice_orig * F / CPU_RC_TSFACTOR_MAX
+
+ where timeslice_orig is the value that is calculated by the conventional
+ O(1) scheduler.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 6/9] CPU controller - Add basic functions and registering the controller
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
                   ` (4 preceding siblings ...)
  2006-04-28  1:37 ` [PATCH 5/9] CPU controller - Documentation how the controller works MAEDA Naoaki
@ 2006-04-28  1:38 ` MAEDA Naoaki
  2006-04-28  1:38 ` [PATCH 7/9] CPU controller - Add routines to change share values and show stat MAEDA Naoaki
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:38 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

6/9: cpu_init

Adds the basic functions and registering the CPU controller to resource group.

Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>
Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>

 init/Kconfig              |   10 +++
 kernel/res_group/Makefile |    1 
 kernel/res_group/cpu.c    |  142 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 153 insertions(+)

Index: linux-2.6.17-rc3/init/Kconfig
===================================================================
--- linux-2.6.17-rc3.orig/init/Kconfig
+++ linux-2.6.17-rc3/init/Kconfig
@@ -185,6 +185,16 @@ config RES_GROUPS_NUMTASKS
 
 	  Say N if unsure, Y to use the feature.
 
+config RES_GROUPS_CPU
+	bool "CPU Resource Controller"
+	select CPU_RC
+	depends on RES_GROUPS
+	default y
+	help
+	  Provides a CPU Resource Controller for Resource Groups.
+
+	  Say N if unsure, Y to use the feature.
+
 endmenu
 config SYSCTL
 	bool "Sysctl support"
Index: linux-2.6.17-rc3/kernel/res_group/Makefile
===================================================================
--- linux-2.6.17-rc3.orig/kernel/res_group/Makefile
+++ linux-2.6.17-rc3/kernel/res_group/Makefile
@@ -1,3 +1,4 @@
 obj-y = res_group.o shares.o task.o
 obj-$(CONFIG_RES_GROUPS_NUMTASKS) += numtasks.o
+obj-$(CONFIG_RES_GROUPS_CPU) += cpu.o
 obj-$(CONFIG_RGCS) += rgcs.o
Index: linux-2.6.17-rc3/kernel/res_group/cpu.c
===================================================================
--- /dev/null
+++ linux-2.6.17-rc3/kernel/res_group/cpu.c
@@ -0,0 +1,142 @@
+/*
+ *  kernel/res_group/cpu.c
+ *
+ *  CPU resource controller for Resource Groups
+ *
+ *  Copyright 2005-2006 FUJITSU LIMITED
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpu_rc.h>
+#include <linux/res_group_rc.h>
+
+static const char res_ctlr_name[] = "cpu";
+
+struct cpu_res {
+	struct resource_group *rgroup;	/* resouce group I belong to */
+	struct res_shares shares;
+	struct cpu_rc	cpu_rc;	/* cpu resource controller */
+	int 	cnt_total_min_shares;/* total min_shares behind the res_group */
+};
+
+static struct cpu_rc_domain grcd; /* system wide resource controller domain */
+struct res_controller cpu_ctlr;
+
+static struct cpu_res *get_shares_cpu(struct res_shares *shares)
+{
+	if (shares)
+		return container_of(shares, struct cpu_res, shares);
+	return NULL;
+}
+
+static struct cpu_res *get_res_group_cpu(struct resource_group *rgroup)
+{
+	return get_shares_cpu(get_controller_shares(rgroup, &cpu_ctlr));
+}
+
+struct cpu_rc *cpu_rc_get(task_t *tsk)
+{
+	struct resoruce_group *rgroup = tsk->res_group;
+	struct cpu_res *res;
+
+	/* controller is not registered; no resource group is given */
+	if ((cpu_ctlr.ctlr_id == NO_RES_ID) || (rgroup == NULL))
+		return NULL;
+
+	res = get_res_group_cpu(rgroup);
+	/* cpu controller is not available for this resource group */
+	if (!res)
+		return NULL;
+
+	return &res->cpu_rc;
+}
+
+static void cpu_res_init_one(struct cpu_res *cpu_res)
+{
+	cpu_res->shares.min_shares = 0;
+	cpu_res->shares.max_shares = SHARE_UNSUPPORTED;
+	cpu_res->shares.child_shares_divisor = SHARE_DEFAULT_DIVISOR;
+	cpu_res->shares.unused_min_shares = SHARE_DEFAULT_DIVISOR;
+
+	cpu_res->cnt_total_min_shares = 0;
+	cpu_rc_init_cr(&cpu_res->cpu_rc, &grcd);
+	cpu_rc_get_cr(&cpu_res->cpu_rc);
+}
+
+static struct res_shares *cpu_alloc_shares_struct(
+						struct resource_group *rgroup)
+{
+	struct cpu_res *res;
+
+	res = kzalloc(sizeof(struct cpu), GFP_KERNEL);
+	if (!res)
+		return NULL;
+	res->rgroup = rgroup;
+	cpu_res_init_one(res);
+	if (is_res_group_root(rgroup))	{
+		res->cpu_rc.share = SHARE_DEFAULT_DIVISOR;
+		res->cnt_total_min_shares = SHARE_DEFAULT_DIVISOR;
+		res->shares.min_shares = SHARE_DONT_CARE;
+		res->shares.max_shares = SHARE_DONT_CARE;
+	}
+	return &res->shares;
+}
+
+static void cpu_free_shares_struct(struct res_shares *my_res)
+{
+	struct cpu_res *res, *parres;
+	u64	temp = 0;
+
+	res = get_shares_cpu(my_res);
+	if (!res)
+		return;
+
+	parres = get_res_group_cpu(res->rgroup->parent);
+	/* return child's min_shares to parent resource group */
+	spin_lock(&parres->rgroup->group_lock);
+	if (parres->shares.child_shares_divisor) {
+		temp = (u64) parres->shares.unused_min_shares
+				* parres->cnt_total_min_shares;
+		do_div(temp, parres->shares.child_shares_divisor);
+	}
+	cpu_rc_set_share(&parres->cpu_rc, (int)temp);
+	spin_unlock(&parres->rgroup->group_lock);
+
+	cpu_rc_put_cr(&res->cpu_rc);
+	kfree(res);
+}
+
+struct res_controller cpu_ctlr = {
+	.name = res_ctlr_name,
+	.depth_supported = 3,
+	.ctlr_id = NO_RES_ID,
+	.alloc_shares_struct = cpu_alloc_shares_struct,
+	.free_shares_struct = cpu_free_shares_struct,
+};
+
+int __init init_cpu_res(void)
+{
+	if (cpu_ctlr.ctlr_id != NO_RES_ID)
+		return -EBUSY; /* already registered */
+	cpu_rc_init_rcd(&grcd);
+	return register_controller(&cpu_ctlr);
+}
+
+void __exit exit_cpu_res(void)
+{
+	int rc;
+	do {
+		rc = unregister_controller(&cpu_ctlr);
+	} while (rc == -EBUSY);
+	BUG_ON(rc != 0);
+}
+
+module_init(init_cpu_res)
+module_exit(exit_cpu_res)

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 7/9] CPU controller - Add routines to change share values and show stat
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
                   ` (5 preceding siblings ...)
  2006-04-28  1:38 ` [PATCH 6/9] CPU controller - Add basic functions and registering the controller MAEDA Naoaki
@ 2006-04-28  1:38 ` MAEDA Naoaki
  2006-04-28  1:38 ` [PATCH 8/9] CPU controller - Add cpu hotplug support MAEDA Naoaki
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:38 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

7/9: cpu_shares_n_stats

Adds routine to change share values and show statistics.

Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>
Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>

 kernel/res_group/cpu.c |  120 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 120 insertions(+)

Index: linux-2.6.17-rc3/kernel/res_group/cpu.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/res_group/cpu.c
+++ linux-2.6.17-rc3/kernel/res_group/cpu.c
@@ -113,12 +113,132 @@ static void cpu_free_shares_struct(struc
 	kfree(res);
 }
 
+static int recalc_shares(int self_shares, int parent_shares, int parent_divisor)
+{
+	u64 numerator;
+
+	if (parent_divisor == 0)
+		return 0;
+	numerator = (u64) self_shares * parent_shares;
+	do_div(numerator, parent_divisor);
+	return numerator;
+}
+
+static int recalc_unused_shares(int self_cnt_min_shares,
+				int self_unused_min_shares, int self_divisor)
+{
+	u64 numerator;
+
+	if (self_divisor == 0)
+		return 0;
+	numerator = (u64) self_unused_min_shares * self_cnt_min_shares;
+	do_div(numerator, self_divisor);
+	return numerator;
+}
+
+static void recalc_self(struct cpu_res *res, struct cpu_res *parres)
+{
+	struct res_shares *par = &parres->shares;
+	struct res_shares *self = &res->shares;
+	u64 cnt_total, cnt_min_shares;
+
+	/* calculate total and current min_shares */
+	cnt_total = recalc_shares(self->min_shares,
+					parres->cnt_total_min_shares,
+					par->child_shares_divisor);
+	cnt_min_shares = recalc_unused_shares(self->unused_min_shares,
+					cnt_total,
+					par->child_shares_divisor);
+	cpu_rc_set_share(&res->cpu_rc, (int) cnt_min_shares);
+	res->cnt_total_min_shares = (int) cnt_total;
+}
+
+static void
+recalc_and_propagate(struct cpu_res *res)
+{
+	struct resource_group *child = NULL;
+	struct cpu_res *parres, *childres;
+
+	parres = get_res_group_cpu(res->rgroup->parent);
+
+	if (parres)
+		recalc_self(res, parres);
+
+	/* propagate to children */
+	spin_lock(&res->rgroup->group_lock);
+	for_each_child(child, res->rgroup) {
+		childres = get_res_group_cpu(child);
+		if (childres)
+			recalc_and_propagate(childres);
+	}
+	spin_unlock(&res->rgroup->group_lock);
+	return;
+}
+
+static void cpu_shares_changed(struct res_shares *my_res)
+{
+	struct cpu_res *parres, *res;
+	struct res_shares *cur, *par;
+	u64    temp = 0;
+
+	res = get_shares_cpu(my_res);
+	if (!res)
+		return;
+	cur = &res->shares;
+
+	if (!is_res_group_root(res->rgroup)) {
+		spin_lock(&res->rgroup->parent->group_lock);
+		parres = get_res_group_cpu(res->rgroup->parent);
+		par = &parres->shares;
+	} else {
+		par = NULL;
+		parres = NULL;
+	}
+
+	if (parres) {
+		/* adjust parent's unused min_shares */
+		temp = recalc_unused_shares(parres->cnt_total_min_shares,
+					par->unused_min_shares,
+					par->child_shares_divisor);
+		cpu_rc_set_share(&parres->cpu_rc, temp);
+	} else {
+		/* adjust root resouce group's unused min_shares */
+		temp = recalc_unused_shares(SHARE_DEFAULT_DIVISOR,
+					cur->unused_min_shares,
+					cur->child_shares_divisor);
+		cpu_rc_set_share(&res->cpu_rc, temp);
+	}
+	recalc_and_propagate(res);
+
+	if (!is_res_group_root(res->rgroup))
+		spin_unlock(&res->rgroup->parent->group_lock);
+}
+
+static ssize_t cpu_show_stats(struct res_shares *my_res, char *buf,
+							size_t buf_size)
+{
+	struct cpu_res *res;
+	unsigned int load = 0;
+	ssize_t	i;
+
+	res = get_shares_cpu(my_res);
+	if (!res)
+		return -EINVAL;
+
+	load = cpu_rc_load(&res->cpu_rc);
+	i = snprintf(buf, buf_size, "%s:effective_min_shares=%d, load=%d\n",
+				res_ctlr_name, res->cpu_rc.share, load);
+	return i;
+}
+
 struct res_controller cpu_ctlr = {
 	.name = res_ctlr_name,
 	.depth_supported = 3,
 	.ctlr_id = NO_RES_ID,
 	.alloc_shares_struct = cpu_alloc_shares_struct,
 	.free_shares_struct = cpu_free_shares_struct,
+	.shares_changed = cpu_shares_changed,
+	.show_stats = cpu_show_stats,
 };
 
 int __init init_cpu_res(void)

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 8/9] CPU controller - Add cpu hotplug support
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
                   ` (6 preceding siblings ...)
  2006-04-28  1:38 ` [PATCH 7/9] CPU controller - Add routines to change share values and show stat MAEDA Naoaki
@ 2006-04-28  1:38 ` MAEDA Naoaki
  2006-04-28  1:38 ` [PATCH 9/9] CPU controller - Documentation how to use the controller MAEDA Naoaki
  2006-04-28  5:25 ` [PATCH 0/9] CPU controller Mike Galbraith
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:38 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

8/9: cpu_hotplug

Adds cpu hotplug notifier for the Resouce Groups CPU controller.

Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>
Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>

 kernel/res_group/cpu.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 47 insertions(+)

Index: linux-2.6.17-rc3/kernel/res_group/cpu.c
===================================================================
--- linux-2.6.17-rc3.orig/kernel/res_group/cpu.c
+++ linux-2.6.17-rc3/kernel/res_group/cpu.c
@@ -231,6 +231,50 @@ static ssize_t cpu_show_stats(struct res
 	return i;
 }
 
+static void clear_stat_and_propagate(struct cpu_res * res, int cpu)
+{
+	struct resource_group *child = NULL;
+	struct cpu_res *childres;
+
+	cpu_rc_clear_stat(&res->cpu_rc, cpu);
+
+	/* propagate to children */
+	spin_lock(&res->rgroup->group_lock);
+	for_each_child(child, res->rgroup) {
+		childres = get_res_group_cpu(child);
+		if (childres)
+			clear_stat_and_propagate(childres, cpu);
+	}
+	spin_unlock(&res->rgroup->group_lock);
+}
+
+static int __devinit cpu_notify(struct notifier_block *self,
+				unsigned long action, void *hcpu)
+{
+	struct resource_group *root = &default_res_group;
+	struct cpu_res *res;
+	int	cpu = (long) hcpu;
+
+	switch (action)	{
+
+	case CPU_DEAD:
+		res = get_res_group_cpu(root);
+		clear_stat_and_propagate(res, cpu);
+		/* FALL THROUGH */
+	case CPU_ONLINE:
+		grcd.cpus = cpu_online_map;
+		grcd.numcpus = cpus_weight(cpu_online_map);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpu_nb = {
+	.notifier_call	= cpu_notify,
+};
+
 struct res_controller cpu_ctlr = {
 	.name = res_ctlr_name,
 	.depth_supported = 3,
@@ -246,6 +290,8 @@ int __init init_cpu_res(void)
 	if (cpu_ctlr.ctlr_id != NO_RES_ID)
 		return -EBUSY; /* already registered */
 	cpu_rc_init_rcd(&grcd);
+ 	/* Register notifier for hot plugged/unplugged CPUs */
+ 	register_cpu_notifier(&cpu_nb);
 	return register_controller(&cpu_ctlr);
 }
 
@@ -256,6 +302,7 @@ void __exit exit_cpu_res(void)
 		rc = unregister_controller(&cpu_ctlr);
 	} while (rc == -EBUSY);
 	BUG_ON(rc != 0);
+	unregister_cpu_notifier(&cpu_nb);
 }
 
 module_init(init_cpu_res)

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 9/9] CPU controller - Documentation how to use the controller
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
                   ` (7 preceding siblings ...)
  2006-04-28  1:38 ` [PATCH 8/9] CPU controller - Add cpu hotplug support MAEDA Naoaki
@ 2006-04-28  1:38 ` MAEDA Naoaki
  2006-04-28  5:25 ` [PATCH 0/9] CPU controller Mike Galbraith
  9 siblings, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  1:38 UTC (permalink / raw)
  To: akpm, linux-kernel, ckrm-tech; +Cc: MAEDA Naoaki

9/9: cpu_docs

Documentation how to use the CPU controller

Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>
Signed-off-by: Kurosawa Takahiro <kurosawa@valinux.co.jp>

 Documentation/res_groups/cpurc |   72 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 72 insertions(+)

Index: linux-2.6.17-rc3/Documentation/res_groups/cpurc
===================================================================
--- /dev/null
+++ linux-2.6.17-rc3/Documentation/res_groups/cpurc
@@ -0,0 +1,73 @@
+Introduction
+------------
+
+CPU resource controller enables user/sysadmin to control CPU time
+percentage of tasks in a resouce group. It controls time_slice of tasks based on
+the feedback of difference between the target value and the current usage
+in order to control the percentage of the CPU usage to the target value.
+
+Installation
+------------
+
+1. Configure "CPU Resource Controller" under RES_GROUPS. Currently, this cannot
+be configured as a module.
+
+2. Reboot the system with the new kernel.
+
+3. Verify that the CPU resource controller is present by reading
+the file /config/res_groups/shares (should show a line with res=cpu).
+
+Assigning shares
+----------------
+
+Follows the general approach of setting shares for a resource group in Resource
+Groups.
+
+# echo "res=cpu,min_shares=val" > shares
+
+sets the min_shares of a resource group.
+
+The CPU resource controller calculates an effective min_shares in percent
+for each resoruce group. Following is an example of resource groups/min_shares
+settings and each effective min_shares.
+
+			/
+			  effective_min_shares
+			  = 100% - 50% - 30%
+			  = 20%
+	+---------------+---------------+
+	/A min_shares=50%		/B min_shares=30%
+	   effective_min_shares		   effective_min_shares
+	   = 50% - 10% - 25%	    	   = 30% - 0%
+	   = 15%			   = 30%
++---------------+---------------+
+/C min_shares=20%		/D min_shares=50%
+effective_min_shares		   effective_min_shares
+= 20% of 50% - 0% = 10%	   = 50% of 50% - 0 %
+= 10%			   = 25%
+
+If the min_shares in the resource group /A is changed 50% to 40% in the above
+example, the effective_min_shares of the resource group /A, /C and /D are
+automatically changed to 12%, 8% and 20% respectively.
+
+Although the child_shares_divisor can be changed, the effective_min_shares is
+always calculated in percent.
+
+Note that the CPU resource controller doesn't support the limit, so assigning
+the limit for "res=cpu" will have no effect.
+
+Monitoring
+----------
+
+stats file shows the effective min_shares and the current cpu usage of a resouce
+group in percentage.
+
+# cat stats
+cpu:effective_min_shares=50, load=40
+
+That means the effective min_shares of the resource group is 50% and the current
+load average of the resource group is 40%.
+
+Since the tasks in the resource group do not always try to consume CPU,
+the load could be less or greater than the effective_min_shares. Both cases
+are normal.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/9] CPU controller
  2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
                   ` (8 preceding siblings ...)
  2006-04-28  1:38 ` [PATCH 9/9] CPU controller - Documentation how to use the controller MAEDA Naoaki
@ 2006-04-28  5:25 ` Mike Galbraith
  2006-04-28  5:48   ` MAEDA Naoaki
  2006-04-28  5:56   ` Kirill Korotaev
  9 siblings, 2 replies; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28  5:25 UTC (permalink / raw)
  To: MAEDA Naoaki; +Cc: akpm, linux-kernel, ckrm-tech

On Fri, 2006-04-28 at 10:37 +0900, MAEDA Naoaki wrote:
> Andrew,
> 
> This patchset adds a CPU resource controller on top of Resource Groups. 
> The CPU resource controller manages CPU resources by scaling timeslice
> allocated for each task without changing the algorithm of the O(1)
> scheduler.
> 
> Please consider these for inclusion in -mm tree.

This patch set professes to be a resource controller, yet 100% of high
priority tasks are uncontrolled.  Distribution of CPU among high
priority tasks isn't important, but distribution of what they leave
behind is?

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/9] CPU controller
  2006-04-28  5:25 ` [PATCH 0/9] CPU controller Mike Galbraith
@ 2006-04-28  5:48   ` MAEDA Naoaki
  2006-04-28  6:59     ` Mike Galbraith
  2006-04-28  5:56   ` Kirill Korotaev
  1 sibling, 1 reply; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  5:48 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: akpm, linux-kernel, ckrm-tech

Hi Mike,

On Fri, 28 Apr 2006 07:25:35 +0200
Mike Galbraith <efault@gmx.de> wrote:

> On Fri, 2006-04-28 at 10:37 +0900, MAEDA Naoaki wrote:
> > Andrew,
> > 
> > This patchset adds a CPU resource controller on top of Resource Groups. 
> > The CPU resource controller manages CPU resources by scaling timeslice
> > allocated for each task without changing the algorithm of the O(1)
> > scheduler.
> > 
> > Please consider these for inclusion in -mm tree.
> 
> This patch set professes to be a resource controller, yet 100% of high
> priority tasks are uncontrolled.  Distribution of CPU among high
> priority tasks isn't important, but distribution of what they leave
> behind is?

Do you mean niced tasks are uncontrolled by the controller? 
TASK_INTERACTIVEs are left untouched intentionally, but niced tasks
are also controlled.

Thanks,
MAEDA Naoaki

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  5:25 ` [PATCH 0/9] CPU controller Mike Galbraith
  2006-04-28  5:48   ` MAEDA Naoaki
@ 2006-04-28  5:56   ` Kirill Korotaev
  2006-04-28  7:11     ` Mike Galbraith
  2006-04-28  8:28     ` MAEDA Naoaki
  1 sibling, 2 replies; 34+ messages in thread
From: Kirill Korotaev @ 2006-04-28  5:56 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: MAEDA Naoaki, akpm, linux-kernel, ckrm-tech

>>Andrew,
>>
>>This patchset adds a CPU resource controller on top of Resource Groups. 
>>The CPU resource controller manages CPU resources by scaling timeslice
>>allocated for each task without changing the algorithm of the O(1)
>>scheduler.
>>
>>Please consider these for inclusion in -mm tree.
> 
> 
> This patch set professes to be a resource controller, yet 100% of high
> priority tasks are uncontrolled.  Distribution of CPU among high
> priority tasks isn't important, but distribution of what they leave
> behind is?

Also, as it turned out these doesn't do good fair scheduling under some 
curcemstances (with busy loops on SMP) :(. Which was reported to MAEDA.
And it doesn't provide limits. as Andrew noticed already, the 
infrastructe is ok, but without much content (or at least good plan) we 
can end up in the only infrastracture.

I'm also pretty sure, that CPU controller based on timeslice tricks 
behaves poorly on burstable load patterns as well and with interactive 
tasks. So before commiting I propose to perform a good testing on 
different load patterns.

Thanks,
Kirill

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/9] CPU controller
  2006-04-28  5:48   ` MAEDA Naoaki
@ 2006-04-28  6:59     ` Mike Galbraith
  2006-04-28  7:26       ` MAEDA Naoaki
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28  6:59 UTC (permalink / raw)
  To: MAEDA Naoaki; +Cc: akpm, linux-kernel, ckrm-tech

On Fri, 2006-04-28 at 14:48 +0900, MAEDA Naoaki wrote:
> Hi Mike,
> 
> On Fri, 28 Apr 2006 07:25:35 +0200
> Mike Galbraith <efault@gmx.de> wrote:
> 
> > On Fri, 2006-04-28 at 10:37 +0900, MAEDA Naoaki wrote:
> > > Andrew,
> > > 
> > > This patchset adds a CPU resource controller on top of Resource Groups. 
> > > The CPU resource controller manages CPU resources by scaling timeslice
> > > allocated for each task without changing the algorithm of the O(1)
> > > scheduler.
> > > 
> > > Please consider these for inclusion in -mm tree.
> > 
> > This patch set professes to be a resource controller, yet 100% of high
> > priority tasks are uncontrolled.  Distribution of CPU among high
> > priority tasks isn't important, but distribution of what they leave
> > behind is?
> 
> Do you mean niced tasks are uncontrolled by the controller? 
> TASK_INTERACTIVEs are left untouched intentionally, but niced tasks
> are also controlled.

Until they attain interactive status.  Note that attaining this status
requires only one sleep, and once attained, it can be sustained.  I
don't know what the current exact numbers are, but until recently, the
numbers were that once sleep_avg became full, a non-niced task could
sustain ~95% cpu indefinitely.

You simply cannot ignore interactive tasks.  At the very least, you have
to disallow requeue if the resource limit has been exceeded, otherwise,
this patch set is non-functional.

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  5:56   ` Kirill Korotaev
@ 2006-04-28  7:11     ` Mike Galbraith
  2006-04-28  7:46       ` Mike Galbraith
  2006-04-28  8:28     ` MAEDA Naoaki
  1 sibling, 1 reply; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28  7:11 UTC (permalink / raw)
  To: Kirill Korotaev; +Cc: MAEDA Naoaki, akpm, linux-kernel, ckrm-tech

On Fri, 2006-04-28 at 09:56 +0400, Kirill Korotaev wrote:
> I'm also pretty sure, that CPU controller based on timeslice tricks 
> behaves poorly on burstable load patterns as well and with interactive 
> tasks. So before commiting I propose to perform a good testing on 
> different load patterns.

Yes, it can only react very slowly.

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/9] CPU controller
  2006-04-28  6:59     ` Mike Galbraith
@ 2006-04-28  7:26       ` MAEDA Naoaki
  2006-04-28  7:41         ` Mike Galbraith
  0 siblings, 1 reply; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  7:26 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: akpm, linux-kernel, ckrm-tech, maeda.naoaki

On Fri, 28 Apr 2006 08:59:49 +0200
Mike Galbraith <efault@gmx.de> wrote:

> On Fri, 2006-04-28 at 14:48 +0900, MAEDA Naoaki wrote:
> > Hi Mike,
> > 
> > On Fri, 28 Apr 2006 07:25:35 +0200
> > Mike Galbraith <efault@gmx.de> wrote:
> > 
> > > On Fri, 2006-04-28 at 10:37 +0900, MAEDA Naoaki wrote:
> > > > Andrew,
> > > > 
> > > > This patchset adds a CPU resource controller on top of Resource Groups. 
> > > > The CPU resource controller manages CPU resources by scaling timeslice
> > > > allocated for each task without changing the algorithm of the O(1)
> > > > scheduler.
> > > > 
> > > > Please consider these for inclusion in -mm tree.
> > > 
> > > This patch set professes to be a resource controller, yet 100% of high
> > > priority tasks are uncontrolled.  Distribution of CPU among high
> > > priority tasks isn't important, but distribution of what they leave
> > > behind is?
> > 
> > Do you mean niced tasks are uncontrolled by the controller? 
> > TASK_INTERACTIVEs are left untouched intentionally, but niced tasks
> > are also controlled.
> 
> Until they attain interactive status.  Note that attaining this status
> requires only one sleep, and once attained, it can be sustained.  I
> don't know what the current exact numbers are, but until recently, the
> numbers were that once sleep_avg became full, a non-niced task could
> sustain ~95% cpu indefinitely.
> 
> You simply cannot ignore interactive tasks.  At the very least, you have
> to disallow requeue if the resource limit has been exceeded, otherwise,
> this patch set is non-functional.

It can be easily implemented on top of the current code. Do you know a good
sample program that is judged as interactive but consumes lots of cpu?

Thanks,
MAEDA Naoaki



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/9] CPU controller
  2006-04-28  7:26       ` MAEDA Naoaki
@ 2006-04-28  7:41         ` Mike Galbraith
  2006-04-28  7:56           ` [ckrm-tech] " MAEDA Naoaki
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28  7:41 UTC (permalink / raw)
  To: MAEDA Naoaki; +Cc: akpm, linux-kernel, ckrm-tech

On Fri, 2006-04-28 at 16:26 +0900, MAEDA Naoaki wrote:
> On Fri, 28 Apr 2006 08:59:49 +0200
> Mike Galbraith <efault@gmx.de> wrote:
> > You simply cannot ignore interactive tasks.  At the very least, you have
> > to disallow requeue if the resource limit has been exceeded, otherwise,
> > this patch set is non-functional.
> 
> It can be easily implemented on top of the current code. Do you know a good
> sample program that is judged as interactive but consumes lots of cpu?

X sometimes, Mozilla sometimes,... KDE konsole when scrolling,...
anything that on average sleeps more than roughly 5% of it's slice can
starve you to death either alone, or (worse) with peers.

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  7:11     ` Mike Galbraith
@ 2006-04-28  7:46       ` Mike Galbraith
  2006-04-28  8:13         ` Kirill Korotaev
  2006-04-28 10:09         ` Con Kolivas
  0 siblings, 2 replies; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28  7:46 UTC (permalink / raw)
  To: Kirill Korotaev; +Cc: MAEDA Naoaki, akpm, linux-kernel, ckrm-tech

On Fri, 2006-04-28 at 09:11 +0200, Mike Galbraith wrote:
> On Fri, 2006-04-28 at 09:56 +0400, Kirill Korotaev wrote:
> > I'm also pretty sure, that CPU controller based on timeslice tricks 
> > behaves poorly on burstable load patterns as well and with interactive 
> > tasks. So before commiting I propose to perform a good testing on 
> > different load patterns.
> 
> Yes, it can only react very slowly.

Actually, this might not be that much of a problem.  I know I can
traverse queue heads periodically very cheaply.  Traversing both active
and expired arrays to requeue starving tasks once every 100ms costs max
4usecs (3GHz P4) for a typical distribution.

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  7:41         ` Mike Galbraith
@ 2006-04-28  7:56           ` MAEDA Naoaki
  2006-04-28  9:29             ` Mike Galbraith
  0 siblings, 1 reply; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  7:56 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: akpm, linux-kernel, ckrm-tech

On Fri, 28 Apr 2006 09:41:09 +0200
Mike Galbraith <efault@gmx.de> wrote:

> On Fri, 2006-04-28 at 16:26 +0900, MAEDA Naoaki wrote:
> > On Fri, 28 Apr 2006 08:59:49 +0200
> > Mike Galbraith <efault@gmx.de> wrote:
> > > You simply cannot ignore interactive tasks.  At the very least, you have
> > > to disallow requeue if the resource limit has been exceeded, otherwise,
> > > this patch set is non-functional.
> > 
> > It can be easily implemented on top of the current code. Do you know a good
> > sample program that is judged as interactive but consumes lots of cpu?
> 
> X sometimes, Mozilla sometimes,... KDE konsole when scrolling,...
> anything that on average sleeps more than roughly 5% of it's slice can
> starve you to death either alone, or (worse) with peers.

They are true interactive tasks, aren't they? 
Oh! I should say "that is not interactive, but judged as interactive
and consumes lots of cpu". 

Thanks,
MAEDA Naoaki

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  7:46       ` Mike Galbraith
@ 2006-04-28  8:13         ` Kirill Korotaev
  2006-04-28  9:35           ` Mike Galbraith
  2006-04-28 10:09         ` Con Kolivas
  1 sibling, 1 reply; 34+ messages in thread
From: Kirill Korotaev @ 2006-04-28  8:13 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: MAEDA Naoaki, akpm, linux-kernel, ckrm-tech

>>>I'm also pretty sure, that CPU controller based on timeslice tricks 
>>>behaves poorly on burstable load patterns as well and with interactive 
>>>tasks. So before commiting I propose to perform a good testing on 
>>>different load patterns.
>>
>>Yes, it can only react very slowly.
> 
> 
> Actually, this might not be that much of a problem.  I know I can
> traverse queue heads periodically very cheaply.  Traversing both active
> and expired arrays to requeue starving tasks once every 100ms costs max
> 4usecs (3GHz P4) for a typical distribution.

with fair scheduling with can be a big problem, as tasks working less 
then a tick are hard to account :/

Thanks,
Kirill



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  5:56   ` Kirill Korotaev
  2006-04-28  7:11     ` Mike Galbraith
@ 2006-04-28  8:28     ` MAEDA Naoaki
  1 sibling, 0 replies; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28  8:28 UTC (permalink / raw)
  To: Kirill Korotaev; +Cc: efault, akpm, linux-kernel, ckrm-tech, maeda.naoaki

On Fri, 28 Apr 2006 09:56:52 +0400
Kirill Korotaev <dev@sw.ru> wrote:

> Also, as it turned out these doesn't do good fair scheduling under some 
> curcemstances (with busy loops on SMP) :(. Which was reported to MAEDA.

Although it has buggy behaviour under some circumstances, 
the foundamental problem of load unfairness on SMP comes from
the fact that a single task can not use more than one CPU at a time.
On condtion that there aren't enough number of runnable tasks on SMP,
achievable shares very depend on how tasks are allocated to CPU. 

So a few busy loops on SMP is a tough case. It is alleviated
by increasing the number of runnable tasks.

Thanks,
MAEDA Naoaki

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  7:56           ` [ckrm-tech] " MAEDA Naoaki
@ 2006-04-28  9:29             ` Mike Galbraith
  2006-04-28 10:01               ` Mike Galbraith
  2006-04-28 10:11               ` Con Kolivas
  0 siblings, 2 replies; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28  9:29 UTC (permalink / raw)
  To: MAEDA Naoaki; +Cc: akpm, linux-kernel, ckrm-tech

On Fri, 2006-04-28 at 16:56 +0900, MAEDA Naoaki wrote:
> On Fri, 28 Apr 2006 09:41:09 +0200
> Mike Galbraith <efault@gmx.de> wrote:
> 
> > On Fri, 2006-04-28 at 16:26 +0900, MAEDA Naoaki wrote:
> > > On Fri, 28 Apr 2006 08:59:49 +0200
> > > Mike Galbraith <efault@gmx.de> wrote:
> > > > You simply cannot ignore interactive tasks.  At the very least, you have
> > > > to disallow requeue if the resource limit has been exceeded, otherwise,
> > > > this patch set is non-functional.
> > > 
> > > It can be easily implemented on top of the current code. Do you know a good
> > > sample program that is judged as interactive but consumes lots of cpu?
> > 
> > X sometimes, Mozilla sometimes,... KDE konsole when scrolling,...
> > anything that on average sleeps more than roughly 5% of it's slice can
> > starve you to death either alone, or (worse) with peers.
> 
> They are true interactive tasks, aren't they? 
> Oh! I should say "that is not interactive, but judged as interactive
> and consumes lots of cpu". 

Why do you care?  There is only one thing that matters, and that is the
fact that cpu can be used and remain utterly uncontrolled.  This renders
your system non-functional for resource management.  Period.  All stop.

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  8:13         ` Kirill Korotaev
@ 2006-04-28  9:35           ` Mike Galbraith
  0 siblings, 0 replies; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28  9:35 UTC (permalink / raw)
  To: Kirill Korotaev; +Cc: MAEDA Naoaki, akpm, linux-kernel, ckrm-tech

On Fri, 2006-04-28 at 12:13 +0400, Kirill Korotaev wrote:
> >>>I'm also pretty sure, that CPU controller based on timeslice tricks 
> >>>behaves poorly on burstable load patterns as well and with interactive 
> >>>tasks. So before commiting I propose to perform a good testing on 
> >>>different load patterns.
> >>
> >>Yes, it can only react very slowly.
> > 
> > 
> > Actually, this might not be that much of a problem.  I know I can
> > traverse queue heads periodically very cheaply.  Traversing both active
> > and expired arrays to requeue starving tasks once every 100ms costs max
> > 4usecs (3GHz P4) for a typical distribution.
> 
> with fair scheduling with can be a big problem, as tasks working less 
> then a tick are hard to account :/

Yeah, tasks dodging the timer interrupt can steal considerable time.  I
instrumented this once, and caught tasks stealing in excess of 30% of
the timeslice of their more lethargic brothers.  Generally, they get
caught often enough that statistics ~evens the playing field.

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  9:29             ` Mike Galbraith
@ 2006-04-28 10:01               ` Mike Galbraith
  2006-04-28 10:11               ` Con Kolivas
  1 sibling, 0 replies; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28 10:01 UTC (permalink / raw)
  To: MAEDA Naoaki; +Cc: akpm, linux-kernel, ckrm-tech

On Fri, 2006-04-28 at 11:29 +0200, Mike Galbraith wrote:
> On Fri, 2006-04-28 at 16:56 +0900, MAEDA Naoaki wrote:
> > On Fri, 28 Apr 2006 09:41:09 +0200
> > Mike Galbraith <efault@gmx.de> wrote:
> > 
> > > On Fri, 2006-04-28 at 16:26 +0900, MAEDA Naoaki wrote:
> > > > On Fri, 28 Apr 2006 08:59:49 +0200
> > > > Mike Galbraith <efault@gmx.de> wrote:
> > > > > You simply cannot ignore interactive tasks.  At the very least, you have
> > > > > to disallow requeue if the resource limit has been exceeded, otherwise,
> > > > > this patch set is non-functional.
> > > > 
> > > > It can be easily implemented on top of the current code. Do you know a good
> > > > sample program that is judged as interactive but consumes lots of cpu?
> > > 
> > > X sometimes, Mozilla sometimes,... KDE konsole when scrolling,...
> > > anything that on average sleeps more than roughly 5% of it's slice can
> > > starve you to death either alone, or (worse) with peers.
> > 
> > They are true interactive tasks, aren't they? 
> > Oh! I should say "that is not interactive, but judged as interactive
> > and consumes lots of cpu". 
> 
> Why do you care?  There is only one thing that matters, and that is the
> fact that cpu can be used and remain utterly uncontrolled.  This renders
> your system non-functional for resource management.  Period.  All stop.

Here's an example: this is a snippet of me doing a modest parallel
kernel compile in an nfs mounted localhost directory.  What part of this
is truly interactive, and what part do you think should be excluded from
resource management?

14467 mikeg     16   0 17052  13m 3712 R 20.7  1.3   0:00.33 cc1
14498 mikeg     16   0 17052  13m 3708 S 19.7  1.3   0:00.35 cc1
14523 mikeg     16   0 14852  11m 3532 D 13.1  1.1   0:00.26 cc1
14445 mikeg     16   0 14716  11m 3692 S 12.2  1.2   0:00.25 cc1
14492 mikeg     16   0 15912  11m 3096 R  8.5  1.2   0:00.21 cc1
14469 mikeg     15   0 14892  10m 1988 D  7.5  1.0   0:00.19 cc1
14513 mikeg     25   0 11480 6508 1976 R  3.8  0.6   0:00.10 cc1
14579 mikeg     25   0  9128 4260 1908 R  3.8  0.4   0:00.04 cc1
14532 mikeg     15   0     0    0    0 Z  1.9  0.0   0:00.02 as




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  7:46       ` Mike Galbraith
  2006-04-28  8:13         ` Kirill Korotaev
@ 2006-04-28 10:09         ` Con Kolivas
  2006-04-28 10:16           ` Mike Galbraith
  1 sibling, 1 reply; 34+ messages in thread
From: Con Kolivas @ 2006-04-28 10:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mike Galbraith, Kirill Korotaev, MAEDA Naoaki, akpm, ckrm-tech

On Friday 28 April 2006 17:46, Mike Galbraith wrote:
> On Fri, 2006-04-28 at 09:11 +0200, Mike Galbraith wrote:
> > On Fri, 2006-04-28 at 09:56 +0400, Kirill Korotaev wrote:
> > > I'm also pretty sure, that CPU controller based on timeslice tricks
> > > behaves poorly on burstable load patterns as well and with interactive
> > > tasks. So before commiting I propose to perform a good testing on
> > > different load patterns.
> >
> > Yes, it can only react very slowly.
>
> Actually, this might not be that much of a problem.  I know I can
> traverse queue heads periodically very cheaply.  Traversing both active
> and expired arrays to requeue starving tasks once every 100ms costs max
> 4usecs (3GHz P4) for a typical distribution.

How many tasks? Your function was O(n) so the more tasks the longer that max 
value was.

-- 
-ck

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28  9:29             ` Mike Galbraith
  2006-04-28 10:01               ` Mike Galbraith
@ 2006-04-28 10:11               ` Con Kolivas
  2006-04-28 12:07                 ` MAEDA Naoaki
  1 sibling, 1 reply; 34+ messages in thread
From: Con Kolivas @ 2006-04-28 10:11 UTC (permalink / raw)
  To: linux-kernel; +Cc: Mike Galbraith, MAEDA Naoaki, akpm, ckrm-tech

On Friday 28 April 2006 19:29, Mike Galbraith wrote:
> On Fri, 2006-04-28 at 16:56 +0900, MAEDA Naoaki wrote:
> > On Fri, 28 Apr 2006 09:41:09 +0200
> >
> > Mike Galbraith <efault@gmx.de> wrote:
> > > On Fri, 2006-04-28 at 16:26 +0900, MAEDA Naoaki wrote:
> > > > On Fri, 28 Apr 2006 08:59:49 +0200
> > > >
> > > > Mike Galbraith <efault@gmx.de> wrote:
> > > > > You simply cannot ignore interactive tasks.  At the very least, you
> > > > > have to disallow requeue if the resource limit has been exceeded,
> > > > > otherwise, this patch set is non-functional.
> > > >
> > > > It can be easily implemented on top of the current code. Do you know
> > > > a good sample program that is judged as interactive but consumes lots
> > > > of cpu?
> > >
> > > X sometimes, Mozilla sometimes,... KDE konsole when scrolling,...
> > > anything that on average sleeps more than roughly 5% of it's slice can
> > > starve you to death either alone, or (worse) with peers.
> >
> > They are true interactive tasks, aren't they?
> > Oh! I should say "that is not interactive, but judged as interactive
> > and consumes lots of cpu".
>
> Why do you care?  There is only one thing that matters, and that is the
> fact that cpu can be used and remain utterly uncontrolled.  This renders
> your system non-functional for resource management.  Period.  All stop.

I agree with Mike here. It's either global resource management or it isn't. If 
one user is using all interactive tasks and the other user none it's unfair 
resource management.

-- 
-ck

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28 10:09         ` Con Kolivas
@ 2006-04-28 10:16           ` Mike Galbraith
  2006-04-28 10:26             ` Con Kolivas
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28 10:16 UTC (permalink / raw)
  To: Con Kolivas; +Cc: linux-kernel, Kirill Korotaev, MAEDA Naoaki, akpm, ckrm-tech

On Fri, 2006-04-28 at 20:09 +1000, Con Kolivas wrote:
> On Friday 28 April 2006 17:46, Mike Galbraith wrote:
> > On Fri, 2006-04-28 at 09:11 +0200, Mike Galbraith wrote:
> > > On Fri, 2006-04-28 at 09:56 +0400, Kirill Korotaev wrote:
> > > > I'm also pretty sure, that CPU controller based on timeslice tricks
> > > > behaves poorly on burstable load patterns as well and with interactive
> > > > tasks. So before commiting I propose to perform a good testing on
> > > > different load patterns.
> > >
> > > Yes, it can only react very slowly.
> >
> > Actually, this might not be that much of a problem.  I know I can
> > traverse queue heads periodically very cheaply.  Traversing both active
> > and expired arrays to requeue starving tasks once every 100ms costs max
> > 4usecs (3GHz P4) for a typical distribution.
> 
> How many tasks? Your function was O(n) so the more tasks the longer that max 
> value was.

Nope.  It's not O(tasks), it's O(occupied_queues).  Occupied queues is
generally not a large number.

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28 10:16           ` Mike Galbraith
@ 2006-04-28 10:26             ` Con Kolivas
  2006-04-28 10:42               ` Mike Galbraith
  0 siblings, 1 reply; 34+ messages in thread
From: Con Kolivas @ 2006-04-28 10:26 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: linux-kernel, Kirill Korotaev, MAEDA Naoaki, akpm, ckrm-tech

On Friday 28 April 2006 20:16, Mike Galbraith wrote:
> On Fri, 2006-04-28 at 20:09 +1000, Con Kolivas wrote:
> > On Friday 28 April 2006 17:46, Mike Galbraith wrote:
> > > On Fri, 2006-04-28 at 09:11 +0200, Mike Galbraith wrote:
> > > > On Fri, 2006-04-28 at 09:56 +0400, Kirill Korotaev wrote:
> > > > > I'm also pretty sure, that CPU controller based on timeslice tricks
> > > > > behaves poorly on burstable load patterns as well and with
> > > > > interactive tasks. So before commiting I propose to perform a good
> > > > > testing on different load patterns.
> > > >
> > > > Yes, it can only react very slowly.
> > >
> > > Actually, this might not be that much of a problem.  I know I can
> > > traverse queue heads periodically very cheaply.  Traversing both active
> > > and expired arrays to requeue starving tasks once every 100ms costs max
> > > 4usecs (3GHz P4) for a typical distribution.
> >
> > How many tasks? Your function was O(n) so the more tasks the longer that
> > max value was.
>
> Nope.  It's not O(tasks), it's O(occupied_queues).  Occupied queues is
> generally not a large number.

Ok well that P4 does about 700,000 context switches per second so 4us sounds 
large to me.

-- 
-ck

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28 10:26             ` Con Kolivas
@ 2006-04-28 10:42               ` Mike Galbraith
  0 siblings, 0 replies; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28 10:42 UTC (permalink / raw)
  To: Con Kolivas; +Cc: linux-kernel, Kirill Korotaev, MAEDA Naoaki, akpm, ckrm-tech

On Fri, 2006-04-28 at 20:26 +1000, Con Kolivas wrote:
> On Friday 28 April 2006 20:16, Mike Galbraith wrote:
> > > How many tasks? Your function was O(n) so the more tasks the longer that
> > > max value was.
> >
> > Nope.  It's not O(tasks), it's O(occupied_queues).  Occupied queues is
> > generally not a large number.
> 
> Ok well that P4 does about 700,000 context switches per second so 4us sounds 
> large to me.

I'm not always calling it now, only when necessary.  In any case, I'd
much rather pay 4us (it averages 1) every 100ms when at 100% cpu than
take a multi-second latency hit for high priority tasks as now occurs
with a heavy load when the array switch is forced.  This hit is more
likely with my (unfortunately necessary) change to wake tasks on the
expired array.  That's why I started trying to eliminate the switch.

	-Mike

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28 10:11               ` Con Kolivas
@ 2006-04-28 12:07                 ` MAEDA Naoaki
  2006-04-28 13:09                   ` Con Kolivas
  0 siblings, 1 reply; 34+ messages in thread
From: MAEDA Naoaki @ 2006-04-28 12:07 UTC (permalink / raw)
  To: Con Kolivas; +Cc: linux-kernel, Mike Galbraith, akpm, ckrm-tech

Con Kolivas wrote:

> I agree with Mike here. It's either global resource management or it isn't. If 
> one user is using all interactive tasks and the other user none it's unfair 
> resource management.

My intention was not to hurt interactive task's response, but it seems
that just ignoring interactive tasks is not good. I'll consider
regulating interactive tasks also.

Thanks,
MAEDA Naoaki





^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28 12:07                 ` MAEDA Naoaki
@ 2006-04-28 13:09                   ` Con Kolivas
  2006-04-28 13:55                     ` Hirokazu Takahashi
  0 siblings, 1 reply; 34+ messages in thread
From: Con Kolivas @ 2006-04-28 13:09 UTC (permalink / raw)
  To: MAEDA Naoaki; +Cc: linux-kernel, Mike Galbraith, akpm, ckrm-tech

On Friday 28 April 2006 22:07, MAEDA Naoaki wrote:
> Con Kolivas wrote:
> > I agree with Mike here. It's either global resource management or it
> > isn't. If one user is using all interactive tasks and the other user none
> > it's unfair resource management.
>
> My intention was not to hurt interactive task's response, but it seems
> that just ignoring interactive tasks is not good. I'll consider
> regulating interactive tasks also.

I appreciate the gesture of concern over interactive tasks :-) Unfortunately 
it doesn't change the fact that interactive tasks can also consume large 
proportions of the resources, and that any interactivity estimator will get 
it wrong on occasion and flag a non interactive task as interactive.

-- 
-ck

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28 13:09                   ` Con Kolivas
@ 2006-04-28 13:55                     ` Hirokazu Takahashi
  2006-04-28 14:55                       ` Con Kolivas
  2006-04-28 15:39                       ` Mike Galbraith
  0 siblings, 2 replies; 34+ messages in thread
From: Hirokazu Takahashi @ 2006-04-28 13:55 UTC (permalink / raw)
  To: kernel; +Cc: maeda.naoaki, linux-kernel, efault, akpm, ckrm-tech

Hi,

> On Friday 28 April 2006 22:07, MAEDA Naoaki wrote:
> > Con Kolivas wrote:
> > > I agree with Mike here. It's either global resource management or it
> > > isn't. If one user is using all interactive tasks and the other user none
> > > it's unfair resource management.
> >
> > My intention was not to hurt interactive task's response, but it seems
> > that just ignoring interactive tasks is not good. I'll consider
> > regulating interactive tasks also.
> 
> I appreciate the gesture of concern over interactive tasks :-) Unfortunately 
> it doesn't change the fact that interactive tasks can also consume large 
> proportions of the resources, and that any interactivity estimator will get 
> it wrong on occasion and flag a non interactive task as interactive.

I think you can introduce some threshold to estimate whether
a process should be treated as an interactive process or not
while vanilla kernel defines it statically.
It will make processes in a resource group consuming large cpu-time
hard to be treated as interactive processes.


Thanks,
Hirokazu Takahashi.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28 13:55                     ` Hirokazu Takahashi
@ 2006-04-28 14:55                       ` Con Kolivas
  2006-04-28 15:39                       ` Mike Galbraith
  1 sibling, 0 replies; 34+ messages in thread
From: Con Kolivas @ 2006-04-28 14:55 UTC (permalink / raw)
  To: Hirokazu Takahashi; +Cc: maeda.naoaki, linux-kernel, efault, akpm, ckrm-tech

On Friday 28 April 2006 23:55, Hirokazu Takahashi wrote:
> I think you can introduce some threshold to estimate whether
> a process should be treated as an interactive process or not
> while vanilla kernel defines it statically.

The static definition (TASK_INTERACTIVE) used is based on what the cpu 
scheduler already knows about the tasks so although it's static, it is based 
on the dynamic behaviour and most recent sleep/run data. Unfortunately we 
can't define it any clearer than that. We have no better metric that states 
clearly that anything is definitely interactive. Thus there is no clearly 
defined threshold we can use either. If it was that simple the estimator 
would be simpler and we wouldn't have half a dozen alternative cpu schedulers 
available all looking to tackle much the same thing.

-- 
-ck

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [ckrm-tech] Re: [PATCH 0/9] CPU controller
  2006-04-28 13:55                     ` Hirokazu Takahashi
  2006-04-28 14:55                       ` Con Kolivas
@ 2006-04-28 15:39                       ` Mike Galbraith
  1 sibling, 0 replies; 34+ messages in thread
From: Mike Galbraith @ 2006-04-28 15:39 UTC (permalink / raw)
  To: Hirokazu Takahashi; +Cc: kernel, maeda.naoaki, linux-kernel, akpm, ckrm-tech

On Fri, 2006-04-28 at 22:55 +0900, Hirokazu Takahashi wrote:
> I think you can introduce some threshold to estimate whether
> a process should be treated as an interactive process or not
> while vanilla kernel defines it statically.

Hmm.  What do you mean by static?  It is dynamic to my eyes.  The entire
mechanism is a dynamic priority mechanism.

If you know of anything that might help differentiate interactive tasks,
please speak up :)  It's really a bugger of a problem.

	-Mike


^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2006-04-28 15:39 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-04-28  1:37 [PATCH 0/9] CPU controller MAEDA Naoaki
2006-04-28  1:37 ` [PATCH 1/9] CPU controller - Add class load estimation support MAEDA Naoaki
2006-04-28  1:37 ` [PATCH 2/9] CPU controller - Add class hungry detection support MAEDA Naoaki
2006-04-28  1:37 ` [PATCH 3/9] CPU controller - Add timeslice scaling support MAEDA Naoaki
2006-04-28  1:37 ` [PATCH 4/9] CPU controller - Add interface functions MAEDA Naoaki
2006-04-28  1:37 ` [PATCH 5/9] CPU controller - Documentation how the controller works MAEDA Naoaki
2006-04-28  1:38 ` [PATCH 6/9] CPU controller - Add basic functions and registering the controller MAEDA Naoaki
2006-04-28  1:38 ` [PATCH 7/9] CPU controller - Add routines to change share values and show stat MAEDA Naoaki
2006-04-28  1:38 ` [PATCH 8/9] CPU controller - Add cpu hotplug support MAEDA Naoaki
2006-04-28  1:38 ` [PATCH 9/9] CPU controller - Documentation how to use the controller MAEDA Naoaki
2006-04-28  5:25 ` [PATCH 0/9] CPU controller Mike Galbraith
2006-04-28  5:48   ` MAEDA Naoaki
2006-04-28  6:59     ` Mike Galbraith
2006-04-28  7:26       ` MAEDA Naoaki
2006-04-28  7:41         ` Mike Galbraith
2006-04-28  7:56           ` [ckrm-tech] " MAEDA Naoaki
2006-04-28  9:29             ` Mike Galbraith
2006-04-28 10:01               ` Mike Galbraith
2006-04-28 10:11               ` Con Kolivas
2006-04-28 12:07                 ` MAEDA Naoaki
2006-04-28 13:09                   ` Con Kolivas
2006-04-28 13:55                     ` Hirokazu Takahashi
2006-04-28 14:55                       ` Con Kolivas
2006-04-28 15:39                       ` Mike Galbraith
2006-04-28  5:56   ` Kirill Korotaev
2006-04-28  7:11     ` Mike Galbraith
2006-04-28  7:46       ` Mike Galbraith
2006-04-28  8:13         ` Kirill Korotaev
2006-04-28  9:35           ` Mike Galbraith
2006-04-28 10:09         ` Con Kolivas
2006-04-28 10:16           ` Mike Galbraith
2006-04-28 10:26             ` Con Kolivas
2006-04-28 10:42               ` Mike Galbraith
2006-04-28  8:28     ` MAEDA Naoaki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.