From: Huan Yang <link@vivo.com>
To: Tejun Heo <tj@kernel.org>, Zefan Li <lizefan.x@bytedance.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Michal Hocko <mhocko@kernel.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeelb@google.com>,
Muchun Song <muchun.song@linux.dev>,
Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@redhat.com>,
Matthew Wilcox <willy@infradead.org>,
Huang Ying <ying.huang@intel.com>,
Kefeng Wang <wangkefeng.wang@huawei.com>,
"Vishal Moola (Oracle)" <vishal.moola@gmail.com>,
Yosry Ahmed <yosryahmed@google.com>,
Liu Shixin <liushixin2@huawei.com>, Peter Xu <peterx@redhat.com>,
Hugh Dickins <hughd@google.com>,
cgroups@vger.kernel.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: opensource.kernel@vivo.com, Huan Yang <link@vivo.com>
Subject: [PATCH 4/4] mm: memcg: apply proactive reclaim into cgroupv1
Date: Wed, 8 Nov 2023 14:58:15 +0800 [thread overview]
Message-ID: <20231108065818.19932-5-link@vivo.com> (raw)
In-Reply-To: <20231108065818.19932-1-link@vivo.com>
For android use, apply proactive reclaim into cgroupv1
Signed-off-by: Huan Yang <link@vivo.com>
---
.../admin-guide/cgroup-v1/memory.rst | 38 +++-
mm/memcontrol.c | 170 +++++++++---------
2 files changed, 124 insertions(+), 84 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index ca7d9402f6be..600bf26a470a 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -90,6 +90,7 @@ Brief summary of control files.
This knob is deprecated and shouldn't be
used.
memory.oom_control set/show oom controls.
+ memory.memory proactive reclaim.
memory.numa_stat show the number of memory usage per numa
node
memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
@@ -972,7 +973,42 @@ Test:
(Expect a bunch of notifications, and eventually, the oom-killer will
trigger.)
-12. TODO
+12. Proactive Reclaim
+========
+memory.reclaim A write-only nested-keyed file which exists for all cgroups.
+
+This is a simple interface to trigger memory reclaim in the
+target cgroup.
+
+This file accepts a few key, the number of bytes to reclaim.
+Few nested keys are currently supported.
+
+Example::
+
+ echo "1G" > memory.reclaim
+
+The interface extended with nested keys to configure the
+reclaim behavior. For example, specify the swappiness of
+memory to reclaim from (anon, file, ..).
+
+Example::
+
+ echo "1G" 200 > memory.reclaim (only reclaim anon)
+ echo "1G" 0 > memory.reclaim (only reclaim file)
+ echo "1G" 1 > memory.reclaim (only reclaim file)
+
+Please note that the kernel can over or under reclaim from
+the target cgroup. If less bytes are reclaimed than the
+specified amount, -EAGAIN is returned.
+
+Please note that the proactive reclaim (triggered by this
+interface) is not meant to indicate memory pressure on the
+memory cgroup. Therefore socket memory balancing triggered by
+the memory reclaim normally is not exercised in this case.
+This means that the networking layer will not adapt based on
+reclaim induced by memory.reclaim.
+
+13. TODO
========
1. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a0e460abd41c..03de3387d714 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5209,6 +5209,89 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p)
static int memory_stat_show(struct seq_file *m, void *v);
+static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ unsigned int nr_retries = MAX_RECLAIM_RETRIES;
+ unsigned long nr_to_reclaim, nr_reclaimed = 0;
+ unsigned int reclaim_options;
+ int swappiness = -1, org_swappiness, n;
+ char *tmpbuf;
+ int err;
+
+ tmpbuf = kvzalloc(nbytes, GFP_KERNEL);
+ if (unlikely(!tmpbuf))
+ return -ENOMEM;
+
+ buf = skip_spaces(buf);
+ n = sscanf(buf, "%s %d", tmpbuf, &swappiness);
+ if (n < 1) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (n == 2 && (swappiness > 200 || swappiness < 0)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim);
+ if (err)
+ goto out_free;
+
+ reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
+ if (swappiness != -1) {
+ org_swappiness = memcg->swappiness;
+ memcg->swappiness = swappiness;
+ if (swappiness == 200)
+ reclaim_options |= MEMCG_RECLAIM_ANON;
+ else if (swappiness == 0 || swappiness == 1)
+ reclaim_options |= MEMCG_RECLAIM_FILE;
+ }
+
+ while (nr_reclaimed < nr_to_reclaim) {
+ unsigned long reclaimed;
+
+ if (signal_pending(current)) {
+ err = -EINTR;
+ goto out;
+ }
+
+ /*
+ * This is the final attempt, drain percpu lru caches in the
+ * hope of introducing more evictable pages for
+ * try_to_free_mem_cgroup_pages().
+ */
+ if (!nr_retries)
+ lru_add_drain_all();
+
+ reclaimed = try_to_free_mem_cgroup_pages(memcg,
+ min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
+ GFP_KERNEL, reclaim_options);
+
+ if (!reclaimed && !nr_retries--) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ nr_reclaimed += reclaimed;
+ }
+
+ if (swappiness != -1)
+ memcg->swappiness = org_swappiness;
+
+ return nbytes;
+
+out:
+ if (swappiness != -1)
+ memcg->swappiness = org_swappiness;
+
+out_free:
+ kvfree(tmpbuf);
+ return err;
+}
+
static struct cftype mem_cgroup_legacy_files[] = {
{
.name = "usage_in_bytes",
@@ -5272,6 +5355,10 @@ static struct cftype mem_cgroup_legacy_files[] = {
.seq_show = mem_cgroup_oom_control_read,
.write_u64 = mem_cgroup_oom_control_write,
},
+ {
+ .name = "reclaim",
+ .write = memory_reclaim,
+ },
{
.name = "pressure_level",
.seq_show = mem_cgroup_dummy_seq_show,
@@ -6946,89 +7033,6 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
-static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
- size_t nbytes, loff_t off)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
- unsigned int nr_retries = MAX_RECLAIM_RETRIES;
- unsigned long nr_to_reclaim, nr_reclaimed = 0;
- unsigned int reclaim_options;
- int swappiness = -1, org_swappiness, n;
- char *tmpbuf;
- int err;
-
- tmpbuf = kvzalloc(nbytes, GFP_KERNEL);
- if (unlikely(!tmpbuf))
- return -ENOMEM;
-
- buf = skip_spaces(buf);
- n = sscanf(buf, "%s %d", tmpbuf, &swappiness);
- if (n < 1) {
- err = -EINVAL;
- goto out_free;
- }
-
- if (n == 2 && (swappiness > 200 || swappiness < 0)) {
- err = -EINVAL;
- goto out_free;
- }
-
- err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim);
- if (err)
- goto out_free;
-
- reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
- if (swappiness != -1) {
- org_swappiness = memcg->swappiness;
- memcg->swappiness = swappiness;
- if (swappiness == 200)
- reclaim_options |= MEMCG_RECLAIM_ANON;
- else if (swappiness == 0 || swappiness == 1)
- reclaim_options |= MEMCG_RECLAIM_FILE;
- }
-
- while (nr_reclaimed < nr_to_reclaim) {
- unsigned long reclaimed;
-
- if (signal_pending(current)) {
- err = -EINTR;
- goto out;
- }
-
- /*
- * This is the final attempt, drain percpu lru caches in the
- * hope of introducing more evictable pages for
- * try_to_free_mem_cgroup_pages().
- */
- if (!nr_retries)
- lru_add_drain_all();
-
- reclaimed = try_to_free_mem_cgroup_pages(memcg,
- min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
- GFP_KERNEL, reclaim_options);
-
- if (!reclaimed && !nr_retries--) {
- err = -EAGAIN;
- goto out;
- }
-
- nr_reclaimed += reclaimed;
- }
-
- if (swappiness != -1)
- memcg->swappiness = org_swappiness;
-
- return nbytes;
-
-out:
- if (swappiness != -1)
- memcg->swappiness = org_swappiness;
-
-out_free:
- kvfree(tmpbuf);
- return err;
-}
-
static struct cftype memory_files[] = {
{
.name = "current",
--
2.34.1
next prev parent reply other threads:[~2023-11-08 6:59 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-08 6:58 [RFC 0/4] Introduce unbalance proactive reclaim Huan Yang
2023-11-08 6:58 ` [PATCH 1/4] mm: vmscan: LRU unbalance cgroup reclaim Huan Yang
2023-11-08 6:58 ` [PATCH 2/4] mm: multi-gen LRU: MGLRU unbalance reclaim Huan Yang
2023-11-08 12:34 ` kernel test robot
2023-11-09 11:08 ` kernel test robot
2023-12-04 6:53 ` Dan Carpenter
2023-11-08 6:58 ` [PATCH 3/4] mm: memcg: implement unbalance proactive reclaim Huan Yang
2023-11-08 6:58 ` Huan Yang [this message]
2023-11-08 21:06 ` [PATCH 4/4] mm: memcg: apply proactive reclaim into cgroupv1 kernel test robot
2023-11-08 7:35 ` [RFC 0/4] Introduce unbalance proactive reclaim Huang, Ying
2023-11-08 7:53 ` Huan Yang
2023-11-08 8:09 ` Huang, Ying
2023-11-08 8:14 ` Yosry Ahmed
2023-11-08 8:21 ` Huan Yang
2023-11-08 9:00 ` Yosry Ahmed
2023-11-08 9:05 ` Huan Yang
2023-11-08 8:00 ` Yosry Ahmed
2023-11-08 8:26 ` Huan Yang
2023-11-08 8:59 ` Yosry Ahmed
2023-11-08 9:12 ` Huan Yang
2023-11-08 14:06 ` Michal Hocko
2023-11-09 1:56 ` Huan Yang
2023-11-09 3:15 ` Huang, Ying
2023-11-09 3:38 ` Huan Yang
2023-11-09 9:57 ` Michal Hocko
2023-11-09 10:29 ` Huan Yang
2023-11-09 10:39 ` Michal Hocko
2023-11-09 10:50 ` Huan Yang
2023-11-09 12:40 ` Michal Hocko
2023-11-09 13:07 ` Huan Yang
2023-11-09 13:46 ` Michal Hocko
2023-11-10 3:48 ` Huan Yang
2023-11-10 12:24 ` Michal Hocko
2023-11-13 2:17 ` Huan Yang
2023-11-13 6:10 ` Huang, Ying
2023-11-13 6:28 ` Huan Yang
2023-11-13 8:05 ` Huang, Ying
2023-11-13 8:26 ` Huan Yang
2023-11-14 9:54 ` Michal Hocko
2023-11-14 9:56 ` Michal Hocko
2023-11-15 6:52 ` Huang, Ying
2023-11-14 9:50 ` Michal Hocko
2023-11-10 1:19 ` Huang, Ying
2023-11-10 2:44 ` Huan Yang
2023-11-10 4:00 ` Huang, Ying
2023-11-10 6:21 ` Huan Yang
2023-11-10 12:32 ` Michal Hocko
2023-11-13 1:54 ` Huan Yang
2023-11-14 10:04 ` Michal Hocko
2023-11-14 12:37 ` Huan Yang
2023-11-14 13:03 ` Michal Hocko
2023-11-15 2:11 ` Huan Yang
2023-11-09 9:53 ` Michal Hocko
2023-11-09 10:55 ` Huan Yang
2023-11-09 12:45 ` Michal Hocko
2023-11-09 13:10 ` Huan Yang
2023-11-08 16:14 ` Andrew Morton
2023-11-09 1:58 ` Huan Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231108065818.19932-5-link@vivo.com \
--to=link@vivo.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=corbet@lwn.net \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=liushixin2@huawei.com \
--cc=lizefan.x@bytedance.com \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=opensource.kernel@vivo.com \
--cc=peterx@redhat.com \
--cc=roman.gushchin@linux.dev \
--cc=shakeelb@google.com \
--cc=tj@kernel.org \
--cc=vishal.moola@gmail.com \
--cc=wangkefeng.wang@huawei.com \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox