From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@suse.cz>,
David Rientjes <rientjes@google.com>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
azurIt <azurit@pobox.sk>,
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
Andrew Morton <akpm@linux-foundation.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
Cong Wang <xiyou.wangcong@gmail.com>
Subject: [PATCH 3.10 67/70] mm: memcg: enable memcg OOM killer only for user faults
Date: Wed, 19 Nov 2014 12:52:58 -0800 [thread overview]
Message-ID: <20141119205213.126536266@linuxfoundation.org> (raw)
In-Reply-To: <20141119205210.913169042@linuxfoundation.org>
3.10-stable review patch. If anyone has any objections, please let me know.
------------------
From: Johannes Weiner <hannes@cmpxchg.org>
commit 519e52473ebe9db5cdef44670d5a97f1fd53d721 upstream.
System calls and kernel faults (uaccess, gup) can handle an out of memory
situation gracefully and just return -ENOMEM.
Enable the memcg OOM killer only for user faults, where it's really the
only option available.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/linux/memcontrol.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/sched.h | 3 +++
mm/filemap.c | 11 ++++++++++-
mm/memcontrol.c | 2 +-
mm/memory.c | 40 ++++++++++++++++++++++++++++++----------
5 files changed, 88 insertions(+), 12 deletions(-)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -124,6 +124,37 @@ extern void mem_cgroup_print_oom_info(st
extern void mem_cgroup_replace_page_cache(struct page *oldpage,
struct page *newpage);
+/**
+ * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
+ * @new: true to enable, false to disable
+ *
+ * Toggle whether a failed memcg charge should invoke the OOM killer
+ * or just return -ENOMEM. Returns the previous toggle state.
+ */
+static inline bool mem_cgroup_toggle_oom(bool new)
+{
+ bool old;
+
+ old = current->memcg_oom.may_oom;
+ current->memcg_oom.may_oom = new;
+
+ return old;
+}
+
+static inline void mem_cgroup_enable_oom(void)
+{
+ bool old = mem_cgroup_toggle_oom(true);
+
+ WARN_ON(old == true);
+}
+
+static inline void mem_cgroup_disable_oom(void)
+{
+ bool old = mem_cgroup_toggle_oom(false);
+
+ WARN_ON(old == false);
+}
+
#ifdef CONFIG_MEMCG_SWAP
extern int do_swap_account;
#endif
@@ -347,6 +378,19 @@ static inline void mem_cgroup_end_update
{
}
+static inline bool mem_cgroup_toggle_oom(bool new)
+{
+ return false;
+}
+
+static inline void mem_cgroup_enable_oom(void)
+{
+}
+
+static inline void mem_cgroup_disable_oom(void)
+{
+}
+
static inline void mem_cgroup_inc_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx)
{
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1411,6 +1411,9 @@ struct task_struct {
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
} memcg_batch;
unsigned int memcg_kmem_skip_account;
+ struct memcg_oom_info {
+ unsigned int may_oom:1;
+ } memcg_oom;
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1614,6 +1614,7 @@ int filemap_fault(struct vm_area_struct
struct inode *inode = mapping->host;
pgoff_t offset = vmf->pgoff;
struct page *page;
+ bool memcg_oom;
pgoff_t size;
int ret = 0;
@@ -1622,7 +1623,11 @@ int filemap_fault(struct vm_area_struct
return VM_FAULT_SIGBUS;
/*
- * Do we have something in the page cache already?
+ * Do we have something in the page cache already? Either
+ * way, try readahead, but disable the memcg OOM killer for it
+ * as readahead is optional and no errors are propagated up
+ * the fault stack. The OOM killer is enabled while trying to
+ * instantiate the faulting page individually below.
*/
page = find_get_page(mapping, offset);
if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
@@ -1630,10 +1635,14 @@ int filemap_fault(struct vm_area_struct
* We found the page, so try async readahead before
* waiting for the lock.
*/
+ memcg_oom = mem_cgroup_toggle_oom(false);
do_async_mmap_readahead(vma, ra, file, page, offset);
+ mem_cgroup_toggle_oom(memcg_oom);
} else if (!page) {
/* No page in the page cache at all */
+ memcg_oom = mem_cgroup_toggle_oom(false);
do_sync_mmap_readahead(vma, ra, file, offset);
+ mem_cgroup_toggle_oom(memcg_oom);
count_vm_event(PGMAJFAULT);
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
ret = VM_FAULT_MAJOR;
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2613,7 +2613,7 @@ static int mem_cgroup_do_charge(struct m
return CHARGE_RETRY;
/* If we don't need to call oom-killer at el, return immediately */
- if (!oom_check)
+ if (!oom_check || !current->memcg_oom.may_oom)
return CHARGE_NOMEM;
/* check OOM */
if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize)))
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3754,22 +3754,14 @@ unlock:
/*
* By the time we get here, we already hold the mm semaphore
*/
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, unsigned int flags)
+static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- __set_current_state(TASK_RUNNING);
-
- count_vm_event(PGFAULT);
- mem_cgroup_count_vm_event(mm, PGFAULT);
-
- /* do counter updates before entering really critical section. */
- check_sync_rss_stat(current);
-
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, flags);
@@ -3850,6 +3842,34 @@ retry:
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
}
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
+{
+ int ret;
+
+ __set_current_state(TASK_RUNNING);
+
+ count_vm_event(PGFAULT);
+ mem_cgroup_count_vm_event(mm, PGFAULT);
+
+ /* do counter updates before entering really critical section. */
+ check_sync_rss_stat(current);
+
+ /*
+ * Enable the memcg OOM handling for faults triggered in user
+ * space. Kernel faults are handled more gracefully.
+ */
+ if (flags & FAULT_FLAG_USER)
+ mem_cgroup_enable_oom();
+
+ ret = __handle_mm_fault(mm, vma, address, flags);
+
+ if (flags & FAULT_FLAG_USER)
+ mem_cgroup_disable_oom();
+
+ return ret;
+}
+
#ifndef __PAGETABLE_PUD_FOLDED
/*
* Allocate page upper directory.
next prev parent reply other threads:[~2014-11-19 20:52 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-11-19 20:51 [PATCH 3.10 00/70] 3.10.61-stable review Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 01/70] ip6_tunnel: Use ip6_tnl_dev_init as the ndo_init function Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 02/70] gre6: Move the setting of dev->iflink into the ndo_init functions Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 03/70] net: sctp: fix NULL pointer dereference in af->from_addr_param on malformed packet Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 04/70] net: sctp: fix memory leak in auth key management Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 05/70] sunvdc: add cdrom and v1.1 protocol support Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 06/70] sunvdc: compute vdisk geometry from capacity Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 07/70] sunvdc: limit each sg segment to a page Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 08/70] vio: fix reuse of vio_dring slot Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 09/70] sunvdc: dont call VD_OP_GET_VTOC Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 10/70] sparc64: Fix crashes in schizo_pcierr_intr_other() Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 11/70] sparc64: Do irq_{enter,exit}() around generic_smp_call_function*() Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 12/70] sparc32: Implement xchg and atomic_xchg using ATOMIC_HASH locks Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 13/70] x86, x32, audit: Fix x32s AUDIT_ARCH wrt audit Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 14/70] audit: keep inode pinned Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 15/70] ahci: Add Device IDs for Intel Sunrise Point PCH Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 16/70] ahci: disable MSI instead of NCQ on Samsung pci-e SSDs on macbooks Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 17/70] ALSA: usb-audio: Fix memory leak in FTU quirk Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 18/70] xtensa: re-wire umount syscall to sys_oldumount Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 19/70] libceph: do not crash on large auth tickets Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 20/70] iwlwifi: configure the LTR Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 21/70] macvtap: Fix csum_start when VLAN tags are present Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 22/70] mac80211: fix use-after-free in defragmentation Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 23/70] drm/radeon: add missing crtc unlock when setting up the MC Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 26/70] firewire: cdev: prevent kernel stack leaking into ioctl arguments Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 27/70] nfs: fix pnfs direct write memory leak Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 28/70] scsi: only re-lock door after EH on devices that were reset Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 29/70] parisc: Use compat layer for msgctl, shmat, shmctl and semtimedop syscalls Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 30/70] block: Fix computation of merged request priority Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 31/70] dm btree: fix a recursion depth bug in btree walking code Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 32/70] dm raid: ensure superblocks size matches devices logical block size Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 35/70] NFSv4: Ensure that we remove NFSv4.0 delegations when state has expired Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 36/70] NFS: Dont try to reclaim delegation open state if recovery failed Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 37/70] nfs: Fix use of uninitialized variable in nfs_getattr() Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 38/70] NFSv4: Fix races between nfs_remove_bad_delegation() and delegation return Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 39/70] media: ttusb-dec: buffer overflow in ioctl Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 40/70] kgdb: Remove "weak" from kgdb_arch_pc() declaration Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 41/70] clocksource: Remove "weak" from clocksource_default_clock() declaration Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 42/70] ipc: always handle a new value of auto_msgmni Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 43/70] netfilter: nf_log: account for size of NLMSG_DONE attribute Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 44/70] netfilter: nfnetlink_log: fix maximum packet length logged to userspace Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 45/70] netfilter: nf_log: release skbuff on nlmsg put failure Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 46/70] netfilter: xt_bpf: add mising opaque struct sk_filter definition Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 47/70] netfilter: nf_nat: fix oops on netns removal Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 48/70] br: fix use of ->rx_handler_data in code executed on non-rx_handler path Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 49/70] ARM: probes: fix instruction fetch order with <asm/opcodes.h> Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 51/70] MIPS: Fix forgotten preempt_enable() when CPU has inclusive pcaches Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 52/70] perf: Handle compat ioctl Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 53/70] mei: bus: fix possible boundaries violation Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 54/70] perf/x86/intel: Use proper dTLB-load-misses event on IvyBridge Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 55/70] ARM: Correct BUG() assembly to ensure it is endian-agnostic Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 56/70] net/mlx4_en: Fix BlueFlame race Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 57/70] SCSI: hpsa: fix a race in cmd_free/scsi_done Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 58/70] KVM: x86: Dont report guest userspace emulation error to userspace Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 59/70] net: sctp: fix remote memory pressure from excessive queueing Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 60/70] net: sctp: fix panic on duplicate ASCONF chunks Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 61/70] net: sctp: fix skb_over_panic when receiving malformed " Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 62/70] mm: invoke oom-killer from remaining unconverted page fault handlers Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 63/70] arch: mm: remove obsolete init OOM protection Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 64/70] arch: mm: do not invoke OOM killer on kernel fault OOM Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 65/70] arch: mm: pass userspace fault flag to generic fault handler Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 66/70] x86: finish user fault error path with fatal signal Greg Kroah-Hartman
2014-11-19 20:52 ` Greg Kroah-Hartman [this message]
2014-11-19 20:52 ` [PATCH 3.10 68/70] mm: memcg: rework and document OOM waiting and wakeup Greg Kroah-Hartman
2014-11-19 20:53 ` [PATCH 3.10 69/70] mm: memcg: do not trap chargers with full callstack on OOM Greg Kroah-Hartman
2014-11-19 20:53 ` [PATCH 3.10 70/70] mm: memcg: handle non-error OOM situations more gracefully Greg Kroah-Hartman
2014-11-20 5:30 ` [PATCH 3.10 00/70] 3.10.61-stable review Guenter Roeck
2014-11-21 1:38 ` Shuah Khan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141119205213.126536266@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=akpm@linux-foundation.org \
--cc=azurit@pobox.sk \
--cc=hannes@cmpxchg.org \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=kosaki.motohiro@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mhocko@suse.cz \
--cc=rientjes@google.com \
--cc=stable@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).