All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ed Tomlinson <tomlins@cam.org>
To: elenstev@mesatop.com, Andrew Morton <akpm@digeo.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: 2.5.65-mm2
Date: Wed, 19 Mar 2003 23:27:45 -0500	[thread overview]
Message-ID: <200303192327.45883.tomlins@cam.org> (raw)
In-Reply-To: <1048117516.1602.6.camel@spc1.esa.lanl.gov>

[-- Attachment #1: Type: text/plain, Size: 1395 bytes --]

On March 19, 2003 06:45 pm, Steven P. Cole wrote:
> On Wed, 2003-03-19 at 17:33, Andrew Morton wrote:
> > "Steven P. Cole" <elenstev@mesatop.com> wrote:
> > > > Summary: using ext3, the simple window shake and scrollbar wiggle
> > > > tests were much improved, but really using Evolution left much to be
> > > > desired.
> > >
> > > Replying to myself for a followup,
> > >
> > > I repeated the tests with 2.5.65-mm2 elevator=deadline and the
> > > situation was similar to elevator=as.  Running dbench on ext3, the
> > > response to desktop switches and window wiggles was improved over
> > > running dbench on reiserfs, but typing in Evolution was subject to long
> > > delays with dbench clients greater than 16.
> >
> > OK, final question before I get off my butt and find a way to reproduce
> > this:
> >
> > Does reverting
> >
> > http://www.zip.com.au/~akpm/linux/patches/2.5/2.5.65/2.5.65-mm2/broken-ou
> >t/sched-2.5.64-D3.patch
> >
> > help?
>
> Sorry, didn't have much time for a lot of testing, but no miracles
> occurred.  With 5 minutes of testing 2.5.65-mm2 and dbench 24 on ext3
> and that patch reverted (first hunk had to be manually fixed), I don't
> see any improvement.  Still the same long long delays in trying to use
> Evolution.

Steven,

Do things improve with the patch below applied?  You have to backout the 
schedule-tuneables patch before appling it.

Ed Tomlinson

[-- Attachment #2: ptg-D3-mm2 --]
[-- Type: text/plain, Size: 9225 bytes --]

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.1087  -> 1.1088 
#	include/linux/sched.h	1.137   -> 1.138  
#	       kernel/fork.c	1.112   -> 1.113  
#	       kernel/user.c	1.6     -> 1.7    
#	      kernel/sched.c	1.163   -> 1.164  
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/03/07	ed@oscar.et.ca	1.1088
# Add user and thread group governors to prevent either from monoplizing
# the system.  The governors work by limiting the sum of the timeslices
# of active tasks in a group to <n> timeslices.  The defaults set <n> to
# 1.5 for thread groups and to 10 for user tasks.
# 
# For numa systems the governors are per node.  Idealy the storage
# should also be local to the node however, we do not have dynamic per
# node storage yet...
# --------------------------------------------
#
diff -Nru a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h	Sat Mar  8 14:18:48 2003
+++ b/include/linux/sched.h	Sat Mar  8 14:18:48 2003
@@ -178,6 +178,11 @@
 
 #include <linux/aio.h>
 
+struct ptg_struct {		/* pseudo thread groups */
+	atomic_t active[MAX_NUMNODES];
+        atomic_t count;         /* number of refs */
+};
+
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
@@ -278,6 +283,7 @@
 struct user_struct {
 	atomic_t __count;	/* reference count */
 	atomic_t processes;	/* How many processes does this user have? */
+	atomic_t active[MAX_NUMNODES];
 	atomic_t files;		/* How many open files does this user have? */
 
 	/* Hash table maintenance information */
@@ -344,6 +350,8 @@
 	struct list_head ptrace_list;
 
 	struct mm_struct *mm, *active_mm;
+	struct ptg_struct * ptgroup;		/* pseudo thread group for this task */
+	atomic_t *governor;			/* the atomic_t that governs this task */
 
 /* task state */
 	struct linux_binfmt *binfmt;
diff -Nru a/kernel/fork.c b/kernel/fork.c
--- a/kernel/fork.c	Sat Mar  8 14:18:48 2003
+++ b/kernel/fork.c	Sat Mar  8 14:18:48 2003
@@ -96,12 +96,24 @@
 	}
 }
 
+void free_ptgroup(struct task_struct *tsk)
+{
+	if (tsk->ptgroup && atomic_sub_and_test(1,&tsk->ptgroup->count)) {
+                kfree(tsk->ptgroup);
+                tsk->ptgroup = NULL;
+                tsk->governor = &tsk->user->active[cpu_to_node(task_cpu(tsk))];
+                if (tsk == current)
+                        atomic_inc(tsk->governor);
+        }
+}
+
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!(tsk->state & (TASK_DEAD | TASK_ZOMBIE)));
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	free_ptgroup(tsk);
 	security_task_free(tsk);
 	free_uid(tsk->user);
 	free_task_struct(tsk);
@@ -469,6 +481,7 @@
 
 	tsk->mm = NULL;
 	tsk->active_mm = NULL;
+	tsk->ptgroup = NULL;
 
 	/*
 	 * Are we cloning a kernel thread?
@@ -734,6 +747,32 @@
 	p->flags = new_flags;
 }
 
+static inline int setup_governor(unsigned long clone_flags, struct task_struct *p)
+{
+	if ( ((clone_flags & CLONE_VM) && (clone_flags & CLONE_FILES)) ||
+	     (clone_flags & CLONE_THREAD)) {
+		if (current->ptgroup)
+			atomic_inc(&current->ptgroup->count);
+		else {
+			int i;
+			current->ptgroup = kmalloc(sizeof(struct ptg_struct), GFP_ATOMIC);
+			if (!current->ptgroup)
+				return 1;
+			/* printk(KERN_INFO "ptgroup - pid %u\n",current->pid); */
+			atomic_set(&current->ptgroup->count,2);
+			for(i=0; i < MAX_NUMNODES; i++)
+				atomic_set(&current->ptgroup->active[i], 0);
+			atomic_set(&current->ptgroup->active[numa_node_id()], 1);
+			atomic_dec(current->governor);
+			current->governor = &current->ptgroup->active[numa_node_id()];
+		}
+		p->ptgroup = current->ptgroup;
+		p->governor = &p->ptgroup->active[numa_node_id()];
+	} else
+		p->governor = &p->user->active[numa_node_id()];
+	return 0;
+}
+
 asmlinkage int sys_set_tid_address(int *tidptr)
 {
 	current->clear_child_tid = tidptr;
@@ -876,6 +915,12 @@
 		goto bad_fork_cleanup_mm;
 	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
 	if (retval)
+		goto bad_fork_cleanup_namespace;
+	/*
+	 * Setup the governor pointer for the new process, allocating a new ptg as
+	 * required if the process is a thread. 
+	 */
+	if (setup_governor(clone_flags, p))
 		goto bad_fork_cleanup_namespace;
 
 	if (clone_flags & CLONE_CHILD_SETTID)
diff -Nru a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c	Sat Mar  8 14:18:48 2003
+++ b/kernel/sched.c	Sat Mar  8 14:18:48 2003
@@ -68,6 +68,9 @@
 #define MAX_SLEEP_AVG		(10*HZ)
 #define STARVATION_LIMIT	(10*HZ)
 #define NODE_THRESHOLD		125
+#define THREAD_GOVERNOR		15      /* allow threads groups 1.5 full timeslices */
+#define USER_GOVERNOR		100     /* allow user 10 full timeslices */
+
 
 /*
  * If a task is 'interactive' then we reinsert it in the active
@@ -123,7 +126,26 @@
 
 static inline unsigned int task_timeslice(task_t *p)
 {
-	return BASE_TIMESLICE(p);
+	int slice = BASE_TIMESLICE(p);
+	int threads = atomic_read(p->governor) * 10;
+	int govern = threads;
+	if (p->user->uid)
+		govern = (p->ptgroup) ? THREAD_GOVERNOR : USER_GOVERNOR;
+	if (threads > govern) {
+		slice = (slice * govern) / threads;
+		slice = (slice > MIN_TIMESLICE) ? slice : MIN_TIMESLICE;
+	}
+#if 0
+	{
+		static int next;
+		if (time_after(jiffies, next)) {
+			printk(KERN_INFO "uid %d pid %d nod %d ptg %x gov %x threads %d lim %d slice %d\n",
+			  p->uid, p->pid, numa_node_id(), p->ptgroup, p->governor, threads/10, govern, slice);
+			next = jiffies + HZ*300;
+		}
+	}
+#endif
+	return slice;
 }
 
 /*
@@ -197,16 +219,18 @@
 	rq->node_nr_running = &node_nr_running[0];
 }
 
-static inline void nr_running_inc(runqueue_t *rq)
+static inline void nr_running_inc(task_t *p, runqueue_t *rq)
 {
 	atomic_inc(rq->node_nr_running);
 	rq->nr_running++;
+	atomic_inc(p->governor);
 }
 
-static inline void nr_running_dec(runqueue_t *rq)
+static inline void nr_running_dec(task_t *p, runqueue_t *rq)
 {
 	atomic_dec(rq->node_nr_running);
 	rq->nr_running--;
+	atomic_dec(p->governor);
 }
 
 __init void node_nr_running_init(void)
@@ -220,8 +244,8 @@
 #else /* !CONFIG_NUMA */
 
 # define nr_running_init(rq)   do { } while (0)
-# define nr_running_inc(rq)    do { (rq)->nr_running++; } while (0)
-# define nr_running_dec(rq)    do { (rq)->nr_running--; } while (0)
+# define nr_running_inc(p, rq)    do { (rq)->nr_running++; atomic_inc((p)->governor); } while (0)
+# define nr_running_dec(p, rq)    do { (rq)->nr_running--; atomic_dec((p)->governor); } while (0)
 
 #endif /* CONFIG_NUMA */
 
@@ -326,7 +350,7 @@
 static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
 	enqueue_task(p, rq->active);
-	nr_running_inc(rq);
+	nr_running_inc(p, rq);
 }
 
 /*
@@ -387,7 +411,7 @@
  */
 static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
-	nr_running_dec(rq);
+	nr_running_dec(p, rq);
 	if (p->state == TASK_UNINTERRUPTIBLE)
 		rq->nr_uninterruptible++;
 	dequeue_task(p, p->array);
@@ -586,7 +610,7 @@
 		list_add_tail(&p->run_list, &current->run_list);
 		p->array = current->array;
 		p->array->nr_active++;
-		nr_running_inc(rq);
+		nr_running_inc(p, rq);
 	}
 	task_rq_unlock(rq, &flags);
 }
@@ -1004,9 +1028,15 @@
 static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu)
 {
 	dequeue_task(p, src_array);
-	nr_running_dec(src_rq);
+	nr_running_dec(p, src_rq);
 	set_task_cpu(p, this_cpu);
-	nr_running_inc(this_rq);
+#ifdef CONFIG_NUMA
+        if (p->ptgroup)
+                p->governor = &p->ptgroup->active[cpu_to_node(this_cpu)];
+        else
+                p->governor = &p->user->active[cpu_to_node(this_cpu)];
+#endif
+	nr_running_inc(p, this_rq);
 	enqueue_task(p, this_rq->active);
 	/*
 	 * Note that idle threads have a prio of MAX_PRIO, for this test
@@ -2521,6 +2551,8 @@
 	rq->curr = current;
 	rq->idle = current;
 	set_task_cpu(current, smp_processor_id());
+        current->governor = &current->user->active[numa_node_id()];
+	atomic_inc(current->governor);
 	wake_up_forked_process(current);
 	current->prio = MAX_PRIO;
 
diff -Nru a/kernel/user.c b/kernel/user.c
--- a/kernel/user.c	Sat Mar  8 14:18:48 2003
+++ b/kernel/user.c	Sat Mar  8 14:18:48 2003
@@ -30,6 +30,7 @@
 struct user_struct root_user = {
 	.__count	= ATOMIC_INIT(1),
 	.processes	= ATOMIC_INIT(1),
+	.active		= {[0 ...MAX_NUMNODES-1] = ATOMIC_INIT(0)},
 	.files		= ATOMIC_INIT(0)
 };
 
@@ -89,6 +90,7 @@
 
 	if (!up) {
 		struct user_struct *new;
+		int i;
 
 		new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
 		if (!new)
@@ -96,6 +98,8 @@
 		new->uid = uid;
 		atomic_set(&new->__count, 1);
 		atomic_set(&new->processes, 0);
+		for(i=0; i < MAX_NUMNODES; i++)
+			atomic_set(&new->active[i], 0);
 		atomic_set(&new->files, 0);
 
 		/*
@@ -130,6 +134,11 @@
 	atomic_inc(&new_user->processes);
 	atomic_dec(&old_user->processes);
 	current->user = new_user;
+	if (!current->ptgroup) {
+		atomic_dec(current->governor);
+		current->governor = &current->user->active[numa_node_id()];
+		atomic_inc(current->governor);
+	}
 	free_uid(old_user);
 }
 

  reply	other threads:[~2003-03-20  4:16 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-03-19  9:21 2.5.65-mm2 Andrew Morton
2003-03-19  9:21 ` 2.5.65-mm2 Andrew Morton
2003-03-19 10:07 ` 2.5.65-mm2 Alexander Hoogerhuis
2003-03-19 10:16 ` 2.5.65-mm2 Alexander Hoogerhuis
2003-03-19 10:16   ` 2.5.65-mm2 Alexander Hoogerhuis
2003-03-19 10:26 ` 2.5.65-mm2 Danny ter Haar
2003-03-19 19:51 ` 2.5.65-mm2 Steven Cole
2003-03-19 19:51   ` 2.5.65-mm2 Steven Cole
2003-03-19 20:10   ` 2.5.65-mm2 Andrew Morton
2003-03-19 20:10     ` 2.5.65-mm2 Andrew Morton
2003-03-19 20:57     ` 2.5.65-mm2 Steven P. Cole
2003-03-19 20:57       ` 2.5.65-mm2 Steven P. Cole
2003-03-19 22:02       ` 2.5.65-mm2 Steven P. Cole
2003-03-19 22:02         ` 2.5.65-mm2 Steven P. Cole
2003-03-19 22:17         ` 2.5.65-mm2 jjs
2003-03-19 22:51           ` 2.5.65-mm2 Steven P. Cole
2003-03-19 23:04             ` 2.5.65-mm2 Robert Love
2003-03-19 23:19               ` 2.5.65-mm2 Steven P. Cole
2003-03-20  4:50               ` 2.5.65-mm2 Mike Galbraith
2003-03-19 23:09             ` 2.5.65-mm2 jjs
2003-03-20  0:33         ` 2.5.65-mm2 Andrew Morton
2003-03-20  0:33           ` 2.5.65-mm2 Andrew Morton
2003-03-19 23:45           ` 2.5.65-mm2 Steven P. Cole
2003-03-19 23:45             ` 2.5.65-mm2 Steven P. Cole
2003-03-20  4:27             ` Ed Tomlinson [this message]
2003-03-20  5:04               ` 2.5.65-mm2 Steven Cole
2003-03-20  5:04                 ` 2.5.65-mm2 Steven Cole
2003-03-20 14:36               ` 2.5.65-mm2 Steven Cole
2003-03-20 14:36                 ` 2.5.65-mm2 Steven Cole
2003-03-20 19:48                 ` 2.5.65-mm2 Mike Galbraith
2003-03-20 19:48                   ` 2.5.65-mm2 Mike Galbraith
2003-03-20 20:12                   ` 2.5.65-mm2 Steven P. Cole
2003-03-20 20:12                     ` 2.5.65-mm2 Steven P. Cole
2003-03-20 21:07                     ` 2.5.65-mm2 Mike Galbraith
2003-03-20 21:15                       ` 2.5.65-mm2 Steven P. Cole
2003-03-20 21:15                         ` 2.5.65-mm2 Steven P. Cole
2003-03-21  5:20                         ` 2.5.65-mm2 Mike Galbraith
2003-03-21  6:06                   ` 2.5.65-mm2 Ingo Molnar
2003-03-21  6:06                     ` 2.5.65-mm2 Ingo Molnar
2003-03-21  6:16                   ` 2.5.65-mm2 Ingo Molnar
2003-03-21  6:16                     ` 2.5.65-mm2 Ingo Molnar
     [not found]                   ` <Pine.LNX.4.44.0303210710490.2533-100000@localhost.localdom ain>
2003-03-22 19:50                     ` 2.5.65-mm2 Mike Galbraith
2003-03-22 19:50                       ` 2.5.65-mm2 Mike Galbraith
  -- strict thread matches above, loose matches on Subject: below --
2003-03-19 23:17 2.5.65-mm2 Charles Baylis
2003-03-20  1:38 ` 2.5.65-mm2 Andrew Morton
2003-03-20  4:56 ` 2.5.65-mm2 Mike Galbraith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200303192327.45883.tomlins@cam.org \
    --to=tomlins@cam.org \
    --cc=akpm@digeo.com \
    --cc=elenstev@mesatop.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.