From: Andrea Arcangeli <andrea@suse.de>
To: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Subject: [PATCH 23 of 24] serialize for cpusets
Date: Wed, 22 Aug 2007 14:49:10 +0200 [thread overview]
Message-ID: <a3d679df54ebb1f977b9.1187786950@v2.random> (raw)
In-Reply-To: <patchbomb.1187786927@v2.random>
# HG changeset patch
# User David Rientjes <rientjes@google.com>
# Date 1187778125 -7200
# Node ID a3d679df54ebb1f977b97ab6b3e501134bf9e7ef
# Parent 8807a4d14b241b2d1132fde7f83834603b6cf093
serialize for cpusets
Adds a last_tif_memdie_jiffies field to struct cpuset to store the
jiffies value at the last OOM kill. This will detect deadlocks in the
CONSTRAINT_CPUSET case and kill another task if its detected.
Adds a CS_OOM bit to struct cpuset's flags field. This will be tested,
set, and cleared atomically to denote a cpuset that currently has an
attached task exiting as a result of the OOM killer. We are required to
take p->alloc_lock to dereference p->cpuset so this cannot be implemented
as a simple trylock.
As a result, we cannot allow the detachment of a task from a cpuset that
is currently OOM killing one of its tasks. If we did, we would end up
clearing the CS_OOM bit in the wrong cpuset upon that task's exit.
sysctl's panic_on_oom is now only effected in the non-cpuset-constrained
case.
Cc: Andrea Arcangeli <andrea@suse.de>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
---
include/linux/cpuset.h | 19 ++++++++++++++
kernel/cpuset.c | 65 +++++++++++++++++++++++++++++++++++++++++++++---
kernel/exit.c | 1 +
mm/oom_kill.c | 21 ++++++++++++++-
4 files changed, 100 insertions(+), 6 deletions(-)
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -46,6 +46,12 @@ static int inline cpuset_zone_allowed_ha
}
extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
+
+extern int cpuset_get_last_tif_memdie(struct task_struct *task);
+extern void cpuset_set_last_tif_memdie(struct task_struct *task,
+ unsigned long last_tif_memdie);
+extern int cpuset_set_oom(struct task_struct *task);
+extern void cpuset_clear_oom(struct task_struct *task);
#define cpuset_memory_pressure_bump() \
do { \
@@ -118,6 +124,19 @@ static inline int cpuset_excl_nodes_over
return 1;
}
+static inline int cpuset_get_last_tif_memdie(struct task_struct *task)
+{
+ return jiffies;
+}
+static inline void cpuset_set_last_tif_memdie(struct task_struct *task,
+ unsigned long last_tif_memdie) {}
+
+static inline int cpuset_set_oom(struct task_struct *task)
+{
+ return 0;
+}
+static inline void cpuset_clear_oom(struct task_struct *task) {}
+
static inline void cpuset_memory_pressure_bump(void) {}
static inline char *cpuset_task_status_allowed(struct task_struct *task,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -98,6 +98,12 @@ struct cpuset {
int mems_generation;
struct fmeter fmeter; /* memory_pressure filter */
+
+ /*
+ * The jiffies at the last time TIF_MEMDIE was set for a task
+ * associated with this cpuset.
+ */
+ unsigned long last_tif_memdie_jiffies;
};
/* bits in struct cpuset flags field */
@@ -109,6 +115,7 @@ typedef enum {
CS_NOTIFY_ON_RELEASE,
CS_SPREAD_PAGE,
CS_SPREAD_SLAB,
+ CS_OOM,
} cpuset_flagbits_t;
/* convenient tests for these bits */
@@ -145,6 +152,11 @@ static inline int is_spread_slab(const s
static inline int is_spread_slab(const struct cpuset *cs)
{
return test_bit(CS_SPREAD_SLAB, &cs->flags);
+}
+
+static inline int is_oom(const struct cpuset *cs)
+{
+ return test_bit(CS_OOM, &cs->flags);
}
/*
@@ -1251,10 +1263,16 @@ static int attach_task(struct cpuset *cs
* then fail this attach_task(), to avoid breaking top_cpuset.count.
*/
if (tsk->flags & PF_EXITING) {
- task_unlock(tsk);
- mutex_unlock(&callback_mutex);
- put_task_struct(tsk);
- return -ESRCH;
+ retval = -ESRCH;
+ goto error;
+ }
+ /*
+ * If the task's cpuset is currently in the OOM killer, we cannot
+ * move it or we'll clear the CS_OOM flag in the new cpuset.
+ */
+ if (unlikely(is_oom(oldcs))) {
+ retval = -EBUSY;
+ goto error;
}
atomic_inc(&cs->count);
rcu_assign_pointer(tsk->cpuset, cs);
@@ -1281,6 +1299,12 @@ static int attach_task(struct cpuset *cs
if (atomic_dec_and_test(&oldcs->count))
check_for_release(oldcs, ppathbuf);
return 0;
+
+error:
+ task_unlock(tsk);
+ mutex_unlock(&callback_mutex);
+ put_task_struct(tsk);
+ return retval;
}
/* The various types of files and directories in a cpuset file system */
@@ -2603,6 +2627,39 @@ done:
return overlap;
}
+int cpuset_get_last_tif_memdie(struct task_struct *task)
+{
+ unsigned long ret;
+ task_lock(task);
+ ret = task->cpuset->last_tif_memdie_jiffies;
+ task_unlock(task);
+ return ret;
+}
+
+void cpuset_set_last_tif_memdie(struct task_struct *task,
+ unsigned long last_tif_memdie)
+{
+ task_lock(task);
+ task->cpuset->last_tif_memdie_jiffies = last_tif_memdie;
+ task_unlock(task);
+}
+
+int cpuset_set_oom(struct task_struct *task)
+{
+ int ret;
+ task_lock(task);
+ ret = test_and_set_bit(CS_OOM, &task->cpuset->flags);
+ task_unlock(task);
+ return ret;
+}
+
+void cpuset_clear_oom(struct task_struct *task)
+{
+ task_lock(task);
+ clear_bit(CS_OOM, &task->cpuset->flags);
+ task_unlock(task);
+}
+
/*
* Collection of memory_pressure is suppressed unless
* this flag is enabled by writing "1" to the special
diff --git a/kernel/exit.c b/kernel/exit.c
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -857,6 +857,7 @@ static void exit_notify(struct task_stru
if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) {
extern unsigned long VM_is_OOM;
clear_bit(0, &VM_is_OOM);
+ cpuset_clear_oom(tsk);
}
write_unlock_irq(&tasklist_lock);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -428,6 +428,7 @@ void out_of_memory(struct zonelist *zone
void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
{
unsigned long freed = 0;
+ unsigned long last_tif_memdie;
int constraint;
static DECLARE_MUTEX(OOM_lock);
@@ -455,6 +456,22 @@ void out_of_memory(struct zonelist *zone
break;
case CONSTRAINT_CPUSET:
+ read_lock(&tasklist_lock);
+ last_tif_memdie = cpuset_get_last_tif_memdie(current);
+ /*
+ * If current's cpuset is already in the OOM killer or its killed
+ * task has not yet exited and a deadlock hasn't been detected, then
+ * do nothing.
+ */
+ if (unlikely(cpuset_set_oom(current)) &&
+ !oom_is_deadlocked(&last_tif_memdie))
+ goto out_cpuset;
+ cpuset_set_last_tif_memdie(current, last_tif_memdie);
+ select_and_kill_process(gfp_mask, order, constraint);
+
+ out_cpuset:
+ read_unlock(&tasklist_lock);
+ break;
case CONSTRAINT_NONE:
if (down_trylock(&OOM_lock))
break;
@@ -467,7 +484,7 @@ void out_of_memory(struct zonelist *zone
*/
if (unlikely(test_bit(0, &VM_is_OOM)) &&
!oom_is_deadlocked(&last_tif_memdie_jiffies))
- goto out;
+ goto out_none;
if (sysctl_panic_on_oom) {
read_unlock(&tasklist_lock);
@@ -477,7 +494,7 @@ void out_of_memory(struct zonelist *zone
select_and_kill_process(gfp_mask, order, constraint);
- out:
+ out_none:
read_unlock(&tasklist_lock);
up(&OOM_lock);
break;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-08-22 12:49 UTC|newest]
Thread overview: 113+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-08-22 12:48 [PATCH 00 of 24] OOM related fixes Andrea Arcangeli
2007-08-22 12:48 ` [PATCH 01 of 24] remove nr_scan_inactive/active Andrea Arcangeli
2007-09-12 11:44 ` Andrew Morton
2008-01-02 17:50 ` Andrea Arcangeli
2007-08-22 12:48 ` [PATCH 02 of 24] avoid oom deadlock in nfs_create_request Andrea Arcangeli
2007-09-12 23:54 ` Christoph Lameter
2007-08-22 12:48 ` [PATCH 03 of 24] prevent oom deadlocks during read/write operations Andrea Arcangeli
2007-09-12 11:56 ` Andrew Morton
2007-09-12 2:18 ` Nick Piggin
2008-01-03 0:53 ` Andrea Arcangeli
2007-08-22 12:48 ` [PATCH 04 of 24] serialize oom killer Andrea Arcangeli
2007-09-12 12:02 ` Andrew Morton
2007-09-12 12:04 ` Andrew Morton
2007-09-12 12:11 ` Andrea Arcangeli
2008-01-03 0:55 ` Andrea Arcangeli
2007-09-13 0:09 ` Christoph Lameter
2007-09-13 18:32 ` David Rientjes
2007-09-13 18:37 ` Christoph Lameter
2007-09-13 18:46 ` David Rientjes
2007-09-13 18:53 ` Christoph Lameter
2007-09-14 0:36 ` David Rientjes
2007-09-14 2:31 ` Christoph Lameter
2007-09-14 3:33 ` David Rientjes
2007-09-18 16:44 ` David Rientjes
2007-09-18 16:44 ` [patch 1/4] oom: move prototypes to appropriate header file David Rientjes
2007-09-18 16:44 ` [patch 2/4] oom: move constraints to enum David Rientjes
2007-09-18 16:44 ` [patch 3/4] oom: save zonelist pointer for oom killer calls David Rientjes
2007-09-18 16:44 ` [patch 4/4] oom: serialize out of memory calls David Rientjes
2007-09-18 19:54 ` Christoph Lameter
2007-09-18 19:56 ` David Rientjes
2007-09-18 20:01 ` Christoph Lameter
2007-09-18 20:06 ` David Rientjes
2007-09-18 20:23 ` [patch 5/4] oom: rename serialization helper functions David Rientjes
2007-09-18 20:26 ` Christoph Lameter
2007-09-18 20:39 ` [patch 5/4 v2] " David Rientjes
2007-09-18 20:59 ` Christoph Lameter
2007-09-18 19:57 ` [patch 3/4] oom: save zonelist pointer for oom killer calls Christoph Lameter
2007-09-18 20:13 ` David Rientjes
2007-09-18 20:16 ` Christoph Lameter
2007-09-18 20:47 ` [patch 6/4] oom: pass null to kfree if zonelist is not cleared David Rientjes
2007-09-18 21:01 ` Christoph Lameter
2007-09-18 21:13 ` David Rientjes
2007-09-18 21:25 ` Christoph Lameter
2007-09-18 22:16 ` David Rientjes
2007-09-19 17:09 ` Paul Jackson
2007-09-19 18:21 ` David Rientjes
2007-09-18 19:55 ` [patch 2/4] oom: move constraints to enum Christoph Lameter
2007-08-22 12:48 ` [PATCH 05 of 24] avoid selecting already killed tasks Andrea Arcangeli
2007-09-13 0:13 ` Christoph Lameter
2007-08-22 12:48 ` [PATCH 06 of 24] reduce the probability of an OOM livelock Andrea Arcangeli
2007-09-12 12:17 ` Andrew Morton
2008-01-03 1:03 ` Andrea Arcangeli
2007-08-22 12:48 ` [PATCH 07 of 24] balance_pgdat doesn't return the number of pages freed Andrea Arcangeli
2007-09-12 12:18 ` Andrew Morton
2007-09-13 0:26 ` Christoph Lameter
2007-08-22 12:48 ` [PATCH 08 of 24] don't depend on PF_EXITING tasks to go away Andrea Arcangeli
2007-09-12 12:20 ` Andrew Morton
2008-01-03 0:56 ` Andrea Arcangeli
2007-08-22 12:48 ` [PATCH 09 of 24] fallback killing more tasks if tif-memdie doesn't " Andrea Arcangeli
2007-09-12 12:30 ` Andrew Morton
2007-09-12 12:34 ` Andrew Morton
2008-01-03 1:06 ` Andrea Arcangeli
2007-08-22 12:48 ` [PATCH 10 of 24] stop useless vm trashing while we wait the TIF_MEMDIE task to exit Andrea Arcangeli
2007-09-12 12:42 ` Andrew Morton
2007-09-13 0:36 ` Christoph Lameter
2007-09-21 19:10 ` David Rientjes
2008-01-03 1:08 ` Andrea Arcangeli
2007-08-22 12:48 ` [PATCH 11 of 24] the oom schedule timeout isn't needed with the VM_is_OOM logic Andrea Arcangeli
2007-09-12 12:44 ` Andrew Morton
2007-08-22 12:48 ` [PATCH 12 of 24] show mem information only when a task is actually being killed Andrea Arcangeli
2007-09-12 12:49 ` Andrew Morton
2007-08-22 12:49 ` [PATCH 13 of 24] simplify oom heuristics Andrea Arcangeli
2007-09-12 12:52 ` Andrew Morton
2007-09-12 13:40 ` Andrea Arcangeli
2007-09-12 20:52 ` Andrew Morton
2007-08-22 12:49 ` [PATCH 14 of 24] oom select should only take rss into account Andrea Arcangeli
2007-09-13 0:43 ` Christoph Lameter
2007-08-22 12:49 ` [PATCH 15 of 24] limit reclaim if enough pages have been freed Andrea Arcangeli
2007-09-12 12:57 ` Andrew Morton
2008-01-03 1:12 ` Andrea Arcangeli
2007-09-12 12:58 ` Andrew Morton
2007-09-12 13:38 ` Andrea Arcangeli
2007-08-22 12:49 ` [PATCH 16 of 24] avoid some lock operation in vm fast path Andrea Arcangeli
2007-09-12 12:59 ` Andrew Morton
2007-09-13 0:49 ` Christoph Lameter
2007-09-13 1:16 ` Andrew Morton
2007-09-13 1:33 ` Christoph Lameter
2007-09-13 1:41 ` KAMEZAWA Hiroyuki
2007-09-13 1:44 ` Andrew Morton
2007-08-22 12:49 ` [PATCH 17 of 24] apply the anti deadlock features only to global oom Andrea Arcangeli
2007-09-12 13:02 ` Andrew Morton
2007-09-13 0:53 ` Christoph Lameter
2007-09-13 0:52 ` Christoph Lameter
2007-08-22 12:49 ` [PATCH 18 of 24] run panic the same way in both places Andrea Arcangeli
2007-09-13 0:54 ` Christoph Lameter
2007-08-22 12:49 ` [PATCH 19 of 24] cacheline align VM_is_OOM to prevent false sharing Andrea Arcangeli
2007-09-12 13:02 ` Andrew Morton
2007-09-12 13:36 ` Andrea Arcangeli
2007-09-13 0:55 ` Christoph Lameter
2007-08-22 12:49 ` [PATCH 20 of 24] extract deadlock helper function Andrea Arcangeli
2007-08-22 12:49 ` [PATCH 21 of 24] select process to kill for cpusets Andrea Arcangeli
2007-09-12 13:05 ` Andrew Morton
2007-09-13 0:59 ` Christoph Lameter
2007-09-13 5:13 ` David Rientjes
2007-09-13 17:55 ` Christoph Lameter
2007-08-22 12:49 ` [PATCH 22 of 24] extract select helper function Andrea Arcangeli
2007-08-22 12:49 ` Andrea Arcangeli [this message]
2007-09-12 13:10 ` [PATCH 23 of 24] serialize for cpusets Andrew Morton
2007-09-12 13:34 ` Andrea Arcangeli
2007-09-12 19:08 ` David Rientjes
2007-09-13 1:02 ` Christoph Lameter
2007-08-22 12:49 ` [PATCH 24 of 24] add oom_kill_asking_task flag Andrea Arcangeli
2007-09-12 13:11 ` Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a3d679df54ebb1f977b9.1187786950@v2.random \
--to=andrea@suse.de \
--cc=linux-mm@kvack.org \
--cc=rientjes@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.