public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] oom killer (Core)
@ 2004-12-01  9:49 tglx
  2004-12-01 21:16 ` Andrea Arcangeli
                   ` (2 more replies)
  0 siblings, 3 replies; 66+ messages in thread
From: tglx @ 2004-12-01  9:49 UTC (permalink / raw)
  To: akpm; +Cc: andrea, marcelo.tosatti, linux-kernel

The oom killer has currently some strange effects when triggered.
It gets invoked multiple times and the selection of the task to kill
does not take processes into account which fork a lot of child processes.

The patch solves this by
- Preventing reentrancy
- Checking for memory threshold before selection and kill.
- Taking child processes into account when selecting the process to kill

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

---
 oom_kill.c   |  113 ++++++++++++++++++++++++++++++++++++++++++-----------------
 page_alloc.c |    5 ++
 2 files changed, 86 insertions(+), 32 deletions(-)
---
diff -urN 2.6.10-rc2-mm4.orig/mm/oom_kill.c 2.6.10-rc2-mm4/mm/oom_kill.c
--- 2.6.10-rc2-mm4.orig/mm/oom_kill.c	2004-12-01 09:33:44.000000000 +0100
+++ 2.6.10-rc2-mm4/mm/oom_kill.c	2004-12-01 09:37:41.628396951 +0100
@@ -45,8 +45,10 @@
 static unsigned long badness(struct task_struct *p, unsigned long uptime)
 {
 	unsigned long points, cpu_time, run_time, s;
+        struct list_head *tsk;
 
-	if (!p->mm)
+	/* Ignore mm-less tasks and init */
+	if (!p->mm || p->pid == 1)
 		return 0;
 
 	if (p->flags & PF_MEMDIE)
@@ -57,6 +59,19 @@
 	points = p->mm->total_vm;
 
 	/*
+	 * Processes which fork a lot of child processes are likely 
+	 * a good choice. We add the vmsize of the childs if they
+	 * have an own mm. This prevents forking servers to flood the
+	 * machine with an endless amount of childs
+	 */
+	list_for_each(tsk, &p->children) {
+		struct task_struct *chld;
+		chld = list_entry(tsk, struct task_struct, sibling);
+		if (chld->mm != p->mm && chld->mm)
+			points += chld->mm->total_vm;
+	}
+
+	/*
 	 * CPU time is in tens of seconds and run time is in thousands
          * of seconds. There is no particular reason for this other than
          * that it turned out to work very well in practice.
@@ -176,6 +191,27 @@
 	return mm;
 }
 
+static struct mm_struct *oom_kill_process(task_t *p)
+{
+	struct mm_struct *mm;
+	struct task_struct *g, *q;
+
+	mm = oom_kill_task(p);
+	if (!mm)
+		return NULL;
+	/*
+	 * kill all processes that share the ->mm (i.e. all threads),
+	 * but are in a different thread group
+	 */
+	do_each_thread(g, q)
+		if (q->mm == mm && q->tgid != p->tgid)
+			__oom_kill_task(q);
+
+	while_each_thread(g, q);
+	if (!p->mm)
+		printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
+	return mm;
+}
 
 /**
  * oom_kill - kill the "best" process when we run out of memory
@@ -188,7 +224,9 @@
 void oom_kill(void)
 {
 	struct mm_struct *mm;
-	struct task_struct *g, *p, *q;
+	struct task_struct *c, *p;
+	struct list_head *tsk;
+	int mmcnt = 0;
 	
 	read_lock(&tasklist_lock);
 retry:
@@ -200,21 +238,32 @@
 		panic("Out of memory and no killable processes...\n");
 	}
 
-	mm = oom_kill_task(p);
-	if (!mm)
-		goto retry;
 	/*
-	 * kill all processes that share the ->mm (i.e. all threads),
-	 * but are in a different thread group
+	 * Kill the forked child processes first
 	 */
-	do_each_thread(g, q)
-		if (q->mm == mm && q->tgid != p->tgid)
-			__oom_kill_task(q);
-	while_each_thread(g, q);
-	if (!p->mm)
-		printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
+	list_for_each(tsk, &p->children) {
+		c = list_entry(tsk, struct task_struct, sibling);
+		/* Do not touch threads, as they get killed later */
+		if (c->mm == p->mm)
+			continue;
+		mm = oom_kill_process(c);
+		if (mm) {
+			mmcnt ++;
+			mmput(mm);
+		}
+	}
+
+	/*
+	 * If we managed to kill one or more child processes
+	 * then let the parent live for now
+	 */
+	if (!mmcnt) {
+		mm = oom_kill_process(p);
+		if (!mm)
+			goto retry;
+		mmput(mm);
+	}
 	read_unlock(&tasklist_lock);
-	mmput(mm);
 	return;
 }
 
@@ -224,14 +273,21 @@
 void out_of_memory(int gfp_mask)
 {
 	/*
-	 * oom_lock protects out_of_memory()'s static variables.
-	 * It's a global lock; this is not performance-critical.
-	 */
-	static DEFINE_SPINLOCK(oom_lock);
+ 	 * inprogress protects out_of_memory()'s static variables
+	 * and prevents reentrancy.
+  	 */
+ 	static unsigned long inprogress;
+ 	static unsigned int  freepages = 1000000;
 	static unsigned long first, last, count, lastkill;
 	unsigned long now, since;
 
-	spin_lock(&oom_lock);
+ 	if (test_and_set_bit(0, &inprogress))
+ 		return;
+ 	
+ 	/* Check, if memory was freed since the last oom kill */
+ 	if (freepages < nr_free_pages())
+ 		goto out_unlock;
+
 	now = jiffies;
 	since = now - last;
 	last = now;
@@ -271,12 +327,11 @@
 	 * Ok, really out of memory. Kill something.
 	 */
 	lastkill = now;
-
-	printk("oom-killer: gfp_mask=0x%x\n", gfp_mask);
+	printk(KERN_ERR "oom-killer: gfp_mask=0x%x\n", gfp_mask);
 	show_free_areas();
-
-	/* oom_kill() sleeps */
-	spin_unlock(&oom_lock);
+	dump_stack();
+	/* Store free pages  * 2 for the check above */
+	freepages = (nr_free_pages() << 1);
 	oom_kill();
 	/*
 	 * Make kswapd go out of the way, so "p" has a good chance of
@@ -284,17 +339,11 @@
 	 * for more memory.
 	 */
 	yield();
-	spin_lock(&oom_lock);
 
 reset:
-	/*
-	 * We dropped the lock above, so check to be sure the variable
-	 * first only ever increases to prevent false OOM's.
-	 */
-	if (time_after(now, first))
-		first = now;
+	first = jiffies;
 	count = 0;
 
 out_unlock:
-	spin_unlock(&oom_lock);
+	clear_bit(0, &inprogress);
 }
diff -urN 2.6.10-rc2-mm4.orig/mm/page_alloc.c 2.6.10-rc2-mm4/mm/page_alloc.c
--- 2.6.10-rc2-mm4.orig/mm/page_alloc.c	2004-12-01 09:33:44.000000000 +0100
+++ 2.6.10-rc2-mm4/mm/page_alloc.c	2004-12-01 09:36:55.010034604 +0100
@@ -872,6 +872,11 @@
 	p->reclaim_state = NULL;
 	p->flags &= ~PF_MEMALLOC;
 
+	/* Check if try_to_free_pages called out_of_memory and
+	 * if the current task is the sacrifice  */
+	if ((p->flags & PF_MEMDIE) && !in_interrupt())
+		goto nopage;
+
 	/* go through the zonelist yet one more time */
 	for (i = 0; (z = zones[i]) != NULL; i++) {
 		if (!zone_watermark_ok(z, order, z->pages_min,

^ permalink raw reply	[flat|nested] 66+ messages in thread
* Re: [PATCH] oom killer (Core)
@ 2004-12-01 10:21 tvrtko.ursulin
  0 siblings, 0 replies; 66+ messages in thread
From: tvrtko.ursulin @ 2004-12-01 10:21 UTC (permalink / raw)
  To: tglx; +Cc: akpm, andrea, linux-kernel, marcelo.tosatti

On 01/12/2004 09:49:03 linux-kernel-owner wrote:

>The oom killer has currently some strange effects when triggered.
>It gets invoked multiple times and the selection of the task to kill
>does not take processes into account which fork a lot of child processes.
>
>The patch solves this by
>- Preventing reentrancy
>- Checking for memory threshold before selection and kill.
>- Taking child processes into account when selecting the process to kill

Ah, again. :) Rusty Lynch and me tried something similar at leat twice but 
with no avail. 

We had a modular OOM killers infrastructure with two new killers. One 
would just panic on OOM situation, and other did account for parents which 
repeatedly spawned 'bad' children. Some people called it 'a bit right 
winged'. :)

Take a look at http://linux.ursulin.net if you are interested. I did not 
sync it with the latest kernel since nobody was interested. It worked for 
me. 


^ permalink raw reply	[flat|nested] 66+ messages in thread
* Re: [PATCH] oom killer (Core)
@ 2004-12-04  7:00 Voluspa
  2004-12-04  8:08 ` Andrea Arcangeli
  0 siblings, 1 reply; 66+ messages in thread
From: Voluspa @ 2004-12-04  7:00 UTC (permalink / raw)
  To: andrea; +Cc: linux-kernel


On 2004-12-03 23:08:55 Andrea Arcangeli wrote:

>You mean my patch is preventing your machine to boot? Then you're doing
>something else wrong because it's impossible my patch is preventing 
>your machine to boot.

Same experience as Thomas here. Full stop like his first log (no errors)
. PIII (Celeron) 900@1 gig, 256 meg mem, 1 gig swap, preempt enabled.

Tried your patch since the oom killer slaughtered a very important app 
here when another one ran amok. Not fork spawnings, just ram-eating. Was 
blender (3d renderer) in "Sequence Editor" mode when i hit alt-a (for 
animate) on a pretty large set of stills. Eventually blender got killed 
also, twice...

Kernel 2.6.9 with nick p-s? patch for the buggy kswapd (100 percent cpu, 
without using any swap).

Mvh
Mats Johannesson


^ permalink raw reply	[flat|nested] 66+ messages in thread
* Re: [PATCH] oom killer (Core)
@ 2004-12-04 12:42 Voluspa
  2004-12-04 16:43 ` Andrea Arcangeli
  0 siblings, 1 reply; 66+ messages in thread
From: Voluspa @ 2004-12-04 12:42 UTC (permalink / raw)
  To: andrea; +Cc: linux-kernel


On 2004-12-04 8:08:40 Andrea Arcangeli wrote:

> You can try to put back a might_slee_if(wait), but if it deadlocks 
with
> that change sure it's not a bug in my patch, it's instead a bug
> somewhere else that calls alloc_pages w/o GFP_ATOMIC. Ingo's
> lowlatency patch would expose the same bug too since they're aliasing
> the might_sleep to cond_resched.

Putting it back doesn't alter the outcome - hanging. And the original 
patch, (hope it was the right one) from:

http://marc.theaimsgroup.com/?l=linux-kernel&m=110204117506557&w=2

root:loke:/usr/src/linux-2.6.9-oomkill# patch -Np1 -i ../oomkill.patch 
patching file mm/oom_kill.c
patching file mm/page_alloc.c
Hunk #1 succeeded at 608 (offset -3 lines).
Hunk #3 succeeded at 681 (offset -3 lines).
patching file mm/swap_state.c
patching file mm/vmscan.c

has been tried with the following variations. With and without 
optimizing for size, with and without preempt, with and without kernel 
boot params (cfq, lapic), cold and hot starts, and then I threw in a smp 
compile for measure. All have the same behaviour:

[...]
Checking 'hlt' instruction... OK.

[10 minutes wait. Then a long callback trace
 scrolls off the screen ending like Thomas']

<0>Kernel panic - not syncing: Fatal exception in interrupt

My toolchain (well, the whole software system) is quite contemporary 
within the stable branches. Built from scratch with gcc-3.4.3, glibc-
20041011 (nptl) and binutils-2.15.92.0.2

No energy control, acpi-pm or whatever it's called, is used here. The 
machine is extremely stable. Running with 100 percent utilization 24/7.

Don't shoot the messenger ;)
Mats Johannesson


^ permalink raw reply	[flat|nested] 66+ messages in thread
* Re: [PATCH] oom killer (Core)
@ 2004-12-05  2:22 Voluspa
  0 siblings, 0 replies; 66+ messages in thread
From: Voluspa @ 2004-12-05  2:22 UTC (permalink / raw)
  To: tglx; +Cc: andrea, linux-kernel


On 2004-12-04 21:02:03 Thomas Gleixner wrote:

> Mats. I don't understand why this did not work for you. The change has
> to be reverted to the original line "might_sleep_if(wait)" !

Well, I'm no coder and therefore follow instructions to the letter, 
almost (; <-- ehum), which meant that "put back" became lines 613-615 in 
mm/page_alloc.c

might_sleep_if(wait);
if (wait)
        cond_resched();

Now interpreting it as the lines should be

might_sleep_if(wait);

/*

the kernel boots without problems. Have yet to test the oom-killer with 
my rogue app. Won't return unless there's problems.

Mvh
Mats Johannesson


^ permalink raw reply	[flat|nested] 66+ messages in thread
* Re: [PATCH] oom killer (Core)
@ 2004-12-05  8:32 Voluspa
  0 siblings, 0 replies; 66+ messages in thread
From: Voluspa @ 2004-12-05  8:32 UTC (permalink / raw)
  To: andrea; +Cc: tglx, linux-kernel


I know... Said that I wouldn't come back unless there was a problem. But 
with these results! The patch is fantastic.

My problem app, blender, allocated all remaining physical memory and 360 
megs (of the 1 gig) swap but remained running. No oom-kill involved at 
all. This is a first with that app (in such a mode, with such a large 
working set of pictures). 500 1.2 meg uncompressed targa pictures 
animated in the graphical window of "Sequence Editor" on my system (256 
meg real mem). Wow.

The oom-kill/swap/memory handling/whatever must have been really sick.

Thank You!
Mats Johannesson (time to pay back by trying 2.6.10-rc3)


^ permalink raw reply	[flat|nested] 66+ messages in thread

end of thread, other threads:[~2004-12-24  1:19 UTC | newest]

Thread overview: 66+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-12-01  9:49 [PATCH] oom killer (Core) tglx
2004-12-01 21:16 ` Andrea Arcangeli
2004-12-01 22:06   ` Thomas Gleixner
2004-12-01 22:33     ` Andrea Arcangeli
2004-12-02  3:36     ` Andrea Arcangeli
2004-12-02 11:09       ` Thomas Gleixner
2004-12-02 13:48         ` Thomas Gleixner
2004-12-02 16:47           ` Andrea Arcangeli
2004-12-02 16:55             ` Andrew Morton
2004-12-02 11:18               ` Marcelo Tosatti
2004-12-02 17:17               ` Thomas Gleixner
2004-12-02 17:27                 ` Andrew Morton
2004-12-02 18:08               ` Andrea Arcangeli
2004-12-02 18:29                 ` Andrew Morton
2004-12-02 19:01                   ` Thomas Gleixner
2004-12-02 18:55                 ` Thomas Gleixner
2004-12-02 19:07                   ` Andrew Morton
2004-12-02 19:08                     ` Thomas Gleixner
2004-12-02 19:22                       ` Andrew Morton
2004-12-02 19:24                         ` Thomas Gleixner
2004-12-02 20:11                           ` Andre Tomt
2004-12-03 22:45                             ` Thomas Gleixner
2004-12-02 23:47                           ` Andrea Arcangeli
2004-12-03 14:41                           ` Helge Hafting
2004-12-03 21:20                             ` Thomas Gleixner
2004-12-05 21:14                               ` Helge Hafting
2004-12-02 23:35                   ` Andrea Arcangeli
2004-12-03  2:28                     ` Andrea Arcangeli
2004-12-03 22:37                       ` Thomas Gleixner
2004-12-03 22:51                         ` Thomas Gleixner
2004-12-03 23:08                           ` Andrea Arcangeli
2004-12-10 16:36                       ` William Lee Irwin III
2004-12-10 17:35                         ` Andrea Arcangeli
2004-12-10 17:43                           ` William Lee Irwin III
2004-12-10 17:55                             ` Andrea Arcangeli
2004-12-10 18:00                               ` William Lee Irwin III
2004-12-10 18:15                                 ` Andrea Arcangeli
2004-12-10 18:19                                   ` William Lee Irwin III
2004-12-10 19:05                                     ` Andrea Arcangeli
2004-12-10 16:51                       ` William Lee Irwin III
2004-12-03 21:10                     ` Thomas Gleixner
2004-12-03 22:21                       ` Andrea Arcangeli
2004-12-05  2:52 ` William Lee Irwin III
2004-12-05 13:38   ` Thomas Gleixner
2004-12-05 15:22     ` Andrea Arcangeli
2004-12-10 16:32 ` William Lee Irwin III
2004-12-10 16:52   ` Thomas Gleixner
2004-12-10 17:43     ` William Lee Irwin III
2004-12-10 17:47     ` William Lee Irwin III
2004-12-10 17:49     ` Andrea Arcangeli
2004-12-10 17:57       ` William Lee Irwin III
2004-12-12  0:12         ` William Lee Irwin III
2004-12-24  1:18     ` Andrea Arcangeli
  -- strict thread matches above, loose matches on Subject: below --
2004-12-01 10:21 tvrtko.ursulin
2004-12-04  7:00 Voluspa
2004-12-04  8:08 ` Andrea Arcangeli
2004-12-04 12:42 Voluspa
2004-12-04 16:43 ` Andrea Arcangeli
2004-12-04 18:33   ` Thomas Gleixner
2004-12-04 21:02     ` Thomas Gleixner
2004-12-05  0:27       ` Andrea Arcangeli
2004-12-05 14:55         ` Thomas Gleixner
2004-12-05 15:34           ` Andrea Arcangeli
2004-12-05 16:29             ` Thomas Gleixner
2004-12-05  2:22 Voluspa
2004-12-05  8:32 Voluspa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox