All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: Marcelo Tosatti <marcelo.tosatti@cyclades.com>
Cc: Andrew Morton <akpm@osdl.org>,
	Martin MOKREJ__ <mmokrejs@ribosome.natur.cuni.cz>,
	piggin@cyberone.com.au, chris@tebibyte.org, andrea@novell.com,
	LKML <linux-kernel@vger.kernel.org>,
	linux-mm@kvack.org, Rik van Riel <riel@redhat.com>
Subject: Re: [PATCH] fix spurious OOM kills
Date: Fri, 19 Nov 2004 17:17:22 +0100	[thread overview]
Message-ID: <1100881042.2635.140.camel@thomas> (raw)
In-Reply-To: <20041119080946.GA30845@logos.cnet>

[-- Attachment #1: Type: text/plain, Size: 682 bytes --]

On Fri, 2004-11-19 at 06:09 -0200, Marcelo Tosatti wrote:
> As Thomas Gleixner has investigated, the OOM killer selection is problematic.
> 
> When testing your ignore-page-referenced patch it first killed the memory hog
> then shortly afterwards the shell I was running it on.
> 
> You've seen Thomas emails, he has nice description there.

I had another go on 2.6.10-rc2-mm2. 

The reentrancy blocking and the additional test of freepages in
out_of_memory() make all the ugly time and counter checks superfluid. 

I think they were neccecary to make the spurious kill triggering less
obvious. :)

Can somebody else check with his test cases, if the behaviour is
correct ?

tglx



[-- Attachment #2: 2.6.10-rc2-mm2-oom.diff --]
[-- Type: text/x-patch, Size: 4954 bytes --]

diff -urN 2.6.10-rc2-mm2.orig/mm/oom_kill.c 2.6.10-rc2-mm2/mm/oom_kill.c
--- 2.6.10-rc2-mm2.orig/mm/oom_kill.c	2004-11-19 14:52:16.000000000 +0100
+++ 2.6.10-rc2-mm2/mm/oom_kill.c	2004-11-19 17:12:40.000000000 +0100
@@ -45,8 +45,10 @@
 static unsigned long badness(struct task_struct *p, unsigned long uptime)
 {
 	unsigned long points, cpu_time, run_time, s;
+        struct list_head *tsk;
 
-	if (!p->mm)
+	/* Ignore mm-less tasks and init */
+	if (!p->mm || p->pid == 1)
 		return 0;
 
 	if (p->flags & PF_MEMDIE)
@@ -57,6 +59,19 @@
 	points = p->mm->total_vm;
 
 	/*
+	 * Processes which fork a lot of child processes are likely 
+	 * a good choice. We add the vmsize of the childs if they
+	 * have an own mm. This prevents forking servers to flood the
+	 * machine with an endless amount of childs
+	 */
+	list_for_each(tsk, &p->children) {
+		struct task_struct *chld;
+		chld = list_entry(tsk, struct task_struct, sibling);
+		if (chld->mm != p->mm && chld->mm)
+			points += chld->mm->total_vm;
+	}
+
+	/*
 	 * CPU time is in tens of seconds and run time is in thousands
          * of seconds. There is no particular reason for this other than
          * that it turned out to work very well in practice.
@@ -176,6 +191,27 @@
 	return mm;
 }
 
+static struct mm_struct *oom_kill_process(task_t *p)
+{
+	struct mm_struct *mm;
+	struct task_struct *g, *q;
+
+	mm = oom_kill_task(p);
+	if (!mm)
+		return NULL;
+	/*
+	 * kill all processes that share the ->mm (i.e. all threads),
+	 * but are in a different thread group
+	 */
+	do_each_thread(g, q)
+		if (q->mm == mm && q->tgid != p->tgid)
+			__oom_kill_task(q);
+
+	while_each_thread(g, q);
+	if (!p->mm)
+		printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
+	return mm;
+}
 
 /**
  * oom_kill - kill the "best" process when we run out of memory
@@ -188,7 +224,9 @@
 void oom_kill(void)
 {
 	struct mm_struct *mm;
-	struct task_struct *g, *p, *q;
+	struct task_struct *c, *p;
+	struct list_head *tsk;
+	int mmcnt = 0;
 	
 	read_lock(&tasklist_lock);
 retry:
@@ -200,21 +238,25 @@
 		panic("Out of memory and no killable processes...\n");
 	}
 
-	mm = oom_kill_task(p);
-	if (!mm)
-		goto retry;
 	/*
-	 * kill all processes that share the ->mm (i.e. all threads),
-	 * but are in a different thread group
+	 * Kill the child processes first
 	 */
-	do_each_thread(g, q)
-		if (q->mm == mm && q->tgid != p->tgid)
-			__oom_kill_task(q);
-	while_each_thread(g, q);
-	if (!p->mm)
-		printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
+	list_for_each(tsk, &p->children) {
+		c = list_entry(tsk, struct task_struct, sibling);
+		if (c->mm == p->mm)
+			continue;
+		mm = oom_kill_process(c);
+		if (mm) {
+			mmcnt ++;
+			mmput(mm);
+		}
+	}
+	mm = oom_kill_process(p);
+	if (!mmcnt && !mm)
+		goto retry;
+	if (mm)
+		mmput(mm);
 	read_unlock(&tasklist_lock);
-	mmput(mm);
 	return;
 }
 
@@ -224,59 +266,23 @@
 void out_of_memory(int gfp_mask)
 {
 	/*
-	 * oom_lock protects out_of_memory()'s static variables.
-	 * It's a global lock; this is not performance-critical.
-	 */
-	static DEFINE_SPINLOCK(oom_lock);
-	static unsigned long first, last, count, lastkill;
-	unsigned long now, since;
-
-	spin_lock(&oom_lock);
-	now = jiffies;
-	since = now - last;
-	last = now;
-
-	/*
-	 * If it's been a long time since last failure,
-	 * we're not oom.
-	 */
-	if (since > 5*HZ)
-		goto reset;
-
-	/*
-	 * If we haven't tried for at least one second,
-	 * we're not really oom.
-	 */
-	since = now - first;
-	if (since < HZ)
-		goto out_unlock;
-
-	/*
-	 * If we have gotten only a few failures,
-	 * we're not really oom. 
-	 */
-	if (++count < 10)
-		goto out_unlock;
-
-	/*
-	 * If we just killed a process, wait a while
-	 * to give that task a chance to exit. This
-	 * avoids killing multiple processes needlessly.
-	 */
-	since = now - lastkill;
-	if (since < HZ*5)
-		goto out_unlock;
-
-	/*
-	 * Ok, really out of memory. Kill something.
-	 */
-	lastkill = now;
+ 	 * inprogress protects out_of_memory()'s static variables
+	 * and prevents reentrancy
+  	 */
+ 	static unsigned long inprogress;
+ 	static unsigned int  freepages = 1000000;
+
+ 	if (test_and_set_bit(0, &inprogress))
+ 		return;
+ 	
+ 	/* Check, if memory was freed since the last oom kill */
+ 	if (freepages < nr_free_pages())
+ 		goto out_unlock;
 
 	printk("oom-killer: gfp_mask=0x%x\n", gfp_mask);
 	show_free_areas();
-
-	/* oom_kill() sleeps */
-	spin_unlock(&oom_lock);
+	/* Store free pages  * 2 for the check above */
+	freepages = (nr_free_pages() << 1);
 	oom_kill();
 	/*
 	 * Make kswapd go out of the way, so "p" has a good chance of
@@ -284,17 +290,7 @@
 	 * for more memory.
 	 */
 	yield();
-	spin_lock(&oom_lock);
-
-reset:
-	/*
-	 * We dropped the lock above, so check to be sure the variable
-	 * first only ever increases to prevent false OOM's.
-	 */
-	if (time_after(now, first))
-		first = now;
-	count = 0;
-
+	
 out_unlock:
-	spin_unlock(&oom_lock);
+	clear_bit(0, &inprogress);
 }

  reply	other threads:[~2004-11-19 16:31 UTC|newest]

Thread overview: 126+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-11-11 11:29 [PATCH] fix spurious OOM kills Marcelo Tosatti
2004-11-11 11:29 ` Marcelo Tosatti
2004-11-11 15:42 ` Andrea Arcangeli
2004-11-11 15:42   ` Andrea Arcangeli
2004-11-11 12:38   ` Marcelo Tosatti
2004-11-11 12:38     ` Marcelo Tosatti
2004-11-11 16:50     ` Andrea Arcangeli
2004-11-11 16:50       ` Andrea Arcangeli
2004-11-11 13:56       ` Marcelo Tosatti
2004-11-11 13:56         ` Marcelo Tosatti
2004-11-11 21:45         ` Andrea Arcangeli
2004-11-11 21:45           ` Andrea Arcangeli
2004-11-11 19:19           ` Marcelo Tosatti
2004-11-11 19:19             ` Marcelo Tosatti
2004-11-11 17:42       ` Martin J. Bligh
2004-11-11 17:42         ` Martin J. Bligh
2004-11-11 21:50         ` Andrea Arcangeli
2004-11-11 21:50           ` Andrea Arcangeli
2004-11-12 11:13       ` fix for mpol mm corruption on tmpfs Andrea Arcangeli
2004-11-12 11:13         ` Andrea Arcangeli
2004-11-11 21:57 ` [PATCH] fix spurious OOM kills Chris Ross
2004-11-11 21:57   ` Chris Ross
2004-11-12 16:52   ` Chris Ross
2004-11-12 16:52     ` Chris Ross
2004-11-12 23:56     ` Nick Piggin
2004-11-12 23:56       ` Nick Piggin
2004-11-13 23:37     ` Andrea Arcangeli
2004-11-13 23:37       ` Andrea Arcangeli
2004-11-14  9:44       ` Marcelo Tosatti
2004-11-14  9:44         ` Marcelo Tosatti
2004-11-14 10:02         ` Marcelo Tosatti
2004-11-14 10:02           ` Marcelo Tosatti
2004-11-14 17:11           ` Andrea Arcangeli
2004-11-14 17:11             ` Andrea Arcangeli
2004-11-14 17:03         ` Andrea Arcangeli
2004-11-14 17:03           ` Andrea Arcangeli
2004-11-14 18:16           ` Martin J. Bligh
2004-11-14 18:16             ` Martin J. Bligh
2004-11-14 18:27             ` Andrea Arcangeli
2004-11-14 18:27               ` Andrea Arcangeli
2004-11-14 20:21           ` Marcelo Tosatti
2004-11-14 20:21             ` Marcelo Tosatti
2004-11-16 16:30             ` Chris Ross
2004-11-16 16:30               ` Chris Ross
2004-11-17  9:08               ` Chris Ross
2004-11-17  9:23                 ` Andrew Morton
2004-11-17  9:23                   ` Andrew Morton
2004-11-17  6:06                   ` Marcelo Tosatti
2004-11-17  6:06                     ` Marcelo Tosatti
2004-11-17  6:08                     ` Marcelo Tosatti
2004-11-17  6:08                       ` Marcelo Tosatti
2004-11-17  6:38                       ` Marcelo Tosatti
2004-11-17  6:38                         ` Marcelo Tosatti
2004-11-17 11:04                         ` Chris Ross
2004-11-17 11:04                           ` Chris Ross
2004-11-17 10:26                       ` Andrew Morton
2004-11-17 10:26                         ` Andrew Morton
2004-11-17 10:50                       ` Chris Ross
2004-11-17 10:50                         ` Chris Ross
2004-11-17  7:09                         ` Marcelo Tosatti
2004-11-17  7:09                           ` Marcelo Tosatti
2004-11-17 11:49                           ` Chris Ross
2004-11-17 11:49                             ` Chris Ross
2004-11-17 12:09                           ` Rik van Riel
2004-11-17 12:09                             ` Rik van Riel
2004-11-17 13:12                   ` Chris Ross
2004-11-17 13:12                     ` Chris Ross
     [not found]                   ` <419CD8C1.4030506@ribosome.natur.cuni.cz>
2004-11-18 21:16                     ` Andrew Morton
2004-11-18 21:16                       ` Andrew Morton
     [not found]                       ` <419D25B5.1060504@ribosome.natur.cuni.cz>
     [not found]                         ` <419D2987.8010305@cyberone.com.au>
2004-11-19  0:03                           ` Martin MOKREJŠ
2004-11-19  0:03                             ` Martin MOKREJŠ
2004-11-19  0:08                             ` Andrew Morton
2004-11-19  0:08                               ` Andrew Morton
2004-11-19  8:09                               ` Marcelo Tosatti
2004-11-19  8:09                                 ` Marcelo Tosatti
2004-11-19 16:17                                 ` Thomas Gleixner [this message]
     [not found]                               ` <419E821F.7010601@ribosome.natur.cuni.cz>
2004-11-20 10:23                                 ` Thomas Gleixner
2004-11-20 10:23                                   ` Thomas Gleixner
2004-11-20 10:45                                   ` Martin MOKREJŠ
2004-11-20 10:45                                     ` Martin MOKREJŠ
2004-11-20 11:29                                   ` Martin MOKREJŠ
2004-11-20 11:29                                     ` Martin MOKREJŠ
2004-11-20 13:29                                     ` Thomas Gleixner
2004-11-20 13:29                                       ` Thomas Gleixner
2004-11-20 21:19                                       ` Martin MOKREJŠ
2004-11-20 21:19                                         ` Martin MOKREJŠ
2004-11-21 11:53                                         ` Thomas Gleixner
2004-11-21 11:53                                           ` Thomas Gleixner
2004-11-21 12:17                                           ` Martin MOKREJŠ
2004-11-21 12:17                                             ` Martin MOKREJŠ
2004-11-21 13:57                                             ` Thomas Gleixner
2004-11-21 13:57                                               ` Thomas Gleixner
2004-11-22 10:55                                               ` Thomas Gleixner
2004-11-22 10:55                                                 ` Thomas Gleixner
2004-11-23  7:41                                                 ` Martin MOKREJŠ
2004-11-23  7:41                                                   ` Martin MOKREJŠ
2004-11-23 10:27                                                   ` Thomas Gleixner
2004-11-23 10:27                                                     ` Thomas Gleixner
2004-11-24 15:52                                                     ` Martin MOKREJŠ
2004-11-24 15:52                                                       ` Martin MOKREJŠ
2004-11-24 16:36                                                       ` Thomas Gleixner
2004-11-24 16:36                                                         ` Thomas Gleixner
2004-12-14 16:04                                                     ` Martin MOKREJŠ
2004-12-14 16:04                                                       ` Martin MOKREJŠ
2004-12-14 17:38                                                       ` Andrea Arcangeli
2004-12-14 17:38                                                         ` Andrea Arcangeli
2004-12-14 23:30                                                         ` Nick Piggin
2004-12-14 23:30                                                           ` Nick Piggin
2004-12-14 23:55                                                           ` Andrea Arcangeli
2004-12-14 23:55                                                             ` Andrea Arcangeli
2004-12-15  0:16                                                             ` Thomas Gleixner
2004-12-15  0:16                                                               ` Thomas Gleixner
2004-12-15  0:37                                                               ` Andrea Arcangeli
2004-12-15  0:37                                                                 ` Andrea Arcangeli
2004-12-15  0:48                                                                 ` Thomas Gleixner
2004-12-15  0:48                                                                   ` Thomas Gleixner
2004-11-21 19:01                   ` Chris Ross
2004-11-21 19:01                     ` Chris Ross
2004-11-22 12:15                     ` Chris Ross
2004-11-22 12:15                       ` Chris Ross
2004-11-22  8:35                       ` Marcelo Tosatti
2004-11-22  8:35                         ` Marcelo Tosatti
2004-11-16  8:37           ` Chris Ross
2004-11-16  8:37             ` Chris Ross
2004-11-17  3:45   ` Andrew Morton
2004-11-17  3:45     ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1100881042.2635.140.camel@thomas \
    --to=tglx@linutronix.de \
    --cc=akpm@osdl.org \
    --cc=andrea@novell.com \
    --cc=chris@tebibyte.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=marcelo.tosatti@cyclades.com \
    --cc=mmokrejs@ribosome.natur.cuni.cz \
    --cc=piggin@cyberone.com.au \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.