From: Waiman Long <longman@redhat.com>
To: Andrew Morton <akpm@linux-foundation.org>,
Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Vladimir Davydov <vdavydov.dev@gmail.com>,
Jonathan Corbet <corbet@lwn.net>,
Alexey Dobriyan <adobriyan@gmail.com>,
Ingo Molnar <mingo@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Juri Lelli <juri.lelli@redhat.com>,
Vincent Guittot <vincent.guittot@linaro.org>
Cc: linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
linux-fsdevel@vger.kernel.org, cgroups@vger.kernel.org,
linux-mm@kvack.org, Waiman Long <longman@redhat.com>
Subject: [RFC PATCH 2/8] memcg, mm: Return ENOMEM or delay if memcg_over_limit
Date: Mon, 17 Aug 2020 10:08:25 -0400 [thread overview]
Message-ID: <20200817140831.30260-3-longman@redhat.com> (raw)
In-Reply-To: <20200817140831.30260-1-longman@redhat.com>
The brk(), mmap(), mlock(), mlockall() and mprotect() syscalls are
modified to check the memcg_over_limit flag and return ENOMEM when it
is set and memory control action is PR_MEMACT_ENOMEM.
In case the action is PR_MEMACT_SLOWDOWN, an artificial delay of 20ms
will be added to slow down the memory allocation syscalls.
Signed-off-by: Waiman Long <longman@redhat.com>
---
include/linux/sched.h | 16 ++++++++++++++++
kernel/fork.c | 1 +
mm/memcontrol.c | 25 +++++++++++++++++++++++--
mm/mlock.c | 6 ++++++
mm/mmap.c | 12 ++++++++++++
mm/mprotect.c | 3 +++
6 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c79d606d27ab..9ec1bd072334 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1477,6 +1477,22 @@ static inline char task_state_to_char(struct task_struct *tsk)
return task_index_to_char(task_state_index(tsk));
}
+#ifdef CONFIG_MEMCG
+extern bool mem_cgroup_check_over_limit(void);
+
+static inline bool mem_over_memcg_limit(void)
+{
+ if (READ_ONCE(current->memcg_over_limit))
+ return mem_cgroup_check_over_limit();
+ return false;
+}
+#else
+static inline bool mem_over_memcg_limit(void)
+{
+ return false;
+}
+#endif
+
/**
* is_global_init - check if a task structure is init. Since init
* is free to have sub-threads we need to check tgid.
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d32190861bd..61f9a9e5f857 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -940,6 +940,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#ifdef CONFIG_MEMCG
tsk->active_memcg = NULL;
+ tsk->memcg_over_limit = false;
#endif
return tsk;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1106dac024ac..5cad7bb26d13 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2646,7 +2646,9 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action)
if (!mm)
return true; /* No more check is needed */
- current->memcg_over_limit = false;
+ if (READ_ONCE(current->memcg_over_limit))
+ WRITE_ONCE(current->memcg_over_limit, false);
+
if ((action == PR_MEMACT_SIGNAL) && !signal)
goto out;
@@ -2660,7 +2662,11 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action)
WRITE_ONCE(current->memcg_over_limit, true);
break;
case PR_MEMACT_SLOWDOWN:
- /* Slow down by yielding the cpu */
+ /*
+ * Slow down by yielding the cpu & adding delay to
+ * memory allocation syscalls.
+ */
+ WRITE_ONCE(current->memcg_over_limit, true);
set_tsk_need_resched(current);
set_preempt_need_resched();
break;
@@ -2694,6 +2700,21 @@ static inline bool mem_cgroup_over_high_action(struct mem_cgroup *memcg)
return __mem_cgroup_over_high_action(memcg, action);
}
+/*
+ * Called from memory allocation syscalls.
+ * Return true if ENOMEM should be returned, false otherwise.
+ */
+bool mem_cgroup_check_over_limit(void)
+{
+ u8 action = READ_ONCE(current->memcg_over_high_action);
+
+ if (action == PR_MEMACT_ENOMEM)
+ return true;
+ if (action == PR_MEMACT_SLOWDOWN)
+ msleep(20); /* Artificial delay of 20ms */
+ return false;
+}
+
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages)
{
diff --git a/mm/mlock.c b/mm/mlock.c
index 93ca2bf30b4f..130d4b3fa0f5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -678,6 +678,9 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
if (!can_do_mlock())
return -EPERM;
+ if (mem_over_memcg_limit())
+ return -ENOMEM;
+
len = PAGE_ALIGN(len + (offset_in_page(start)));
start &= PAGE_MASK;
@@ -807,6 +810,9 @@ SYSCALL_DEFINE1(mlockall, int, flags)
if (!can_do_mlock())
return -EPERM;
+ if (mem_over_memcg_limit())
+ return -ENOMEM;
+
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
diff --git a/mm/mmap.c b/mm/mmap.c
index 40248d84ad5f..873ccf2560a6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -198,6 +198,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
bool downgraded = false;
LIST_HEAD(uf);
+ /* Too much memory used? */
+ if (mem_over_memcg_limit())
+ return -ENOMEM;
+
if (mmap_write_lock_killable(mm))
return -EINTR;
@@ -1407,6 +1411,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
+ /* Too much memory used? */
+ if (mem_over_memcg_limit())
+ return -ENOMEM;
+
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
@@ -1557,6 +1565,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
struct file *file = NULL;
unsigned long retval;
+ /* Too much memory used? */
+ if (mem_over_memcg_limit())
+ return -ENOMEM;
+
if (!(flags & MAP_ANONYMOUS)) {
audit_mmap_fd(fd, flags);
file = fget(fd);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ce8b8a5eacbb..b2c0f50bb0a0 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -519,6 +519,9 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
(prot & PROT_READ);
+ if (mem_over_memcg_limit())
+ return -ENOMEM;
+
start = untagged_addr(start);
prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
--
2.18.1
next prev parent reply other threads:[~2020-08-17 14:10 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-17 14:08 [RFC PATCH 0/8] memcg: Enable fine-grained per process memory control Waiman Long
2020-08-17 14:08 ` [RFC PATCH 1/8] memcg: Enable fine-grained control of over memory.high action Waiman Long
2020-08-17 14:30 ` Chris Down
2020-08-17 15:38 ` Waiman Long
2020-08-17 16:11 ` Chris Down
2020-08-17 16:44 ` Shakeel Butt
2020-08-17 16:56 ` Chris Down
2020-08-18 19:12 ` Waiman Long
2020-08-18 19:14 ` Waiman Long
2020-08-17 14:08 ` Waiman Long [this message]
2020-08-17 14:08 ` [RFC PATCH 3/8] memcg: Allow the use of task RSS memory as over-high action trigger Waiman Long
2020-08-17 14:08 ` [RFC PATCH 4/8] fs/proc: Support a new procfs memctl file Waiman Long
2020-08-17 14:08 ` [RFC PATCH 5/8] memcg: Allow direct per-task memory limit checking Waiman Long
2020-08-17 14:08 ` [RFC PATCH 6/8] memcg: Introduce additional memory control slowdown if needed Waiman Long
2020-08-17 14:08 ` [RFC PATCH 7/8] memcg: Enable logging of memory control mitigation action Waiman Long
2020-08-17 14:08 ` [RFC PATCH 8/8] memcg: Add over-high action prctl() documentation Waiman Long
2020-08-17 15:26 ` [RFC PATCH 0/8] memcg: Enable fine-grained per process memory control Michal Hocko
2020-08-17 15:55 ` Waiman Long
2020-08-17 19:26 ` Michal Hocko
2020-08-18 19:20 ` Waiman Long
2020-08-18 9:14 ` peterz
2020-08-18 9:26 ` Michal Hocko
2020-08-18 9:59 ` peterz
2020-08-18 10:05 ` Michal Hocko
2020-08-18 10:18 ` peterz
2020-08-18 10:30 ` Michal Hocko
2020-08-18 10:36 ` peterz
2020-08-18 13:49 ` Johannes Weiner
2020-08-21 19:37 ` Peter Zijlstra
2020-08-24 16:58 ` Johannes Weiner
2020-09-07 11:47 ` Chris Down
2020-09-09 11:53 ` Michal Hocko
2020-08-18 10:17 ` Chris Down
2020-08-18 10:26 ` peterz
2020-08-18 10:35 ` Chris Down
2020-08-23 2:49 ` Waiman Long
2020-08-18 9:27 ` Chris Down
2020-08-18 10:04 ` peterz
2020-08-18 12:55 ` Matthew Wilcox
2020-08-20 6:11 ` Dave Chinner
2020-08-18 19:30 ` Waiman Long
2020-08-18 19:27 ` Waiman Long
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200817140831.30260-3-longman@redhat.com \
--to=longman@redhat.com \
--cc=adobriyan@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=corbet@lwn.net \
--cc=hannes@cmpxchg.org \
--cc=juri.lelli@redhat.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=vdavydov.dev@gmail.com \
--cc=vincent.guittot@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).