From: Ning Zhang <ningzhang@linux.alibaba.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Vladimir Davydov <vdavydov.dev@gmail.com>,
Yu Zhao <yuzhao@google.com>
Subject: [RFC 3/6] mm, thp: introduce zero subpages reclaim threshold
Date: Thu, 28 Oct 2021 19:56:52 +0800 [thread overview]
Message-ID: <1635422215-99394-4-git-send-email-ningzhang@linux.alibaba.com> (raw)
In-Reply-To: <1635422215-99394-1-git-send-email-ningzhang@linux.alibaba.com>
In this patch, we add memory.thp_reclaim_ctrl for each memory
cgroup to control thp reclaim.
The first controller "threshold" is to set the reclaim threshold.
The default value is 16, which means if a huge page contains over
16 zero subpages (estimated), the huge page can be split and the
zero subpages can be reclaimed when the zero subpages reclaim is
enable.
You can change this value by:
echo "threshold $v" > /sys/fs/cgroup/memory/{memcg}/thp_reclaim_ctrl
Signed-off-by: Ning Zhang <ningzhang@linux.alibaba.com>
---
include/linux/huge_mm.h | 3 ++-
include/linux/memcontrol.h | 3 +++
mm/huge_memory.c | 9 ++++---
mm/memcontrol.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++
mm/vmscan.c | 4 ++-
5 files changed, 75 insertions(+), 6 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 04607b1..304e3df 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -187,7 +187,8 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
#ifdef CONFIG_MEMCG
extern int global_thp_reclaim;
-int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page);
+int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page,
+ int threshold);
unsigned long zsr_reclaim_hpage(struct lruvec *lruvec, struct page *page);
static inline struct list_head *hpage_reclaim_list(struct page *page)
{
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f99f13f..4815c56 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -237,6 +237,8 @@ enum thp_reclaim_state {
THP_RECLAIM_ENABLE,
THP_RECLAIM_MEMCG, /* For global configure*/
};
+
+#define THP_RECLAIM_THRESHOLD_DEFAULT 16
#endif
/*
* The memory controller data structure. The memory controller controls both
@@ -356,6 +358,7 @@ struct mem_cgroup {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct deferred_split deferred_split_queue;
int thp_reclaim;
+ int thp_reclaim_threshold;
#endif
struct mem_cgroup_per_node *nodeinfo[];
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 84fd738..40a9879 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3270,7 +3270,7 @@ static inline bool is_zero_page(struct page *page)
* We'll split the huge page iff it contains at least 1/32 zeros,
* estimate it by checking some discrete unsigned long values.
*/
-static bool hpage_estimate_zero(struct page *page)
+static bool hpage_estimate_zero(struct page *page, int threshold)
{
unsigned int i, maybe_zero_pages = 0, offset = 0;
void *addr;
@@ -3281,7 +3281,7 @@ static bool hpage_estimate_zero(struct page *page)
if (unlikely((offset + 1) * BYTES_PER_LONG > PAGE_SIZE))
offset = 0;
if (*(const unsigned long *)(addr + offset) == 0UL) {
- if (++maybe_zero_pages == HPAGE_PMD_NR >> 5) {
+ if (++maybe_zero_pages == threshold) {
kunmap(page);
return true;
}
@@ -3456,7 +3456,8 @@ static unsigned long reclaim_zero_subpages(struct list_head *list,
* be stored in reclaim_page; otherwise, just delete the page from the
* queue.
*/
-int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page)
+int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page,
+ int threshold)
{
struct page *page = NULL;
unsigned long flags;
@@ -3482,7 +3483,7 @@ int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page)
spin_unlock_irqrestore(&hr_queue->reclaim_queue_lock, flags);
- if (hpage_can_reclaim(page) && hpage_estimate_zero(page) &&
+ if (hpage_can_reclaim(page) && hpage_estimate_zero(page, threshold) &&
!isolate_lru_page(page)) {
__mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON,
HPAGE_PMD_NR);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ae96781..7ba3c69 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4498,6 +4498,61 @@ static int mem_cgroup_thp_reclaim_write(struct cgroup_subsys_state *css,
return 0;
}
+
+static inline char *strsep_s(char **s, const char *ct)
+{
+ char *p;
+
+ while ((p = strsep(s, ct))) {
+ if (*p)
+ return p;
+ }
+
+ return NULL;
+}
+
+static int memcg_thp_reclaim_ctrl_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+ int thp_reclaim_threshold = READ_ONCE(memcg->thp_reclaim_threshold);
+
+ seq_printf(m, "threshold\t%d\n", thp_reclaim_threshold);
+
+ return 0;
+}
+
+static ssize_t memcg_thp_reclaim_ctrl_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes,
+ loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ char *key, *value;
+ int ret;
+
+ key = strsep_s(&buf, " \t\n");
+ if (!key)
+ return -EINVAL;
+
+ if (!strcmp(key, "threshold")) {
+ int threshold;
+
+ value = strsep_s(&buf, " \t\n");
+ if (!value)
+ return -EINVAL;
+
+ ret = kstrtouint(value, 0, &threshold);
+ if (ret)
+ return ret;
+
+ if (threshold > HPAGE_PMD_NR || threshold < 1)
+ return -EINVAL;
+
+ xchg(&memcg->thp_reclaim_threshold, threshold);
+ } else
+ return -EINVAL;
+
+ return nbytes;
+}
#endif
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -5068,6 +5123,11 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
.read_u64 = mem_cgroup_thp_reclaim_read,
.write_u64 = mem_cgroup_thp_reclaim_write,
},
+ {
+ .name = "thp_reclaim_ctrl",
+ .seq_show = memcg_thp_reclaim_ctrl_show,
+ .write = memcg_thp_reclaim_ctrl_write,
+ },
#endif
{ }, /* terminate */
};
@@ -5265,6 +5325,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
memcg->deferred_split_queue.split_queue_len = 0;
memcg->thp_reclaim = THP_RECLAIM_DISABLE;
+ memcg->thp_reclaim_threshold = THP_RECLAIM_THRESHOLD_DEFAULT;
#endif
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
return memcg;
@@ -5300,6 +5361,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
page_counter_init(&memcg->tcpmem, &parent->tcpmem);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
memcg->thp_reclaim = parent->thp_reclaim;
+ memcg->thp_reclaim_threshold = parent->thp_reclaim_threshold;
#endif
} else {
page_counter_init(&memcg->memory, NULL);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f4ff14d..fcc80a6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2794,6 +2794,7 @@ static unsigned long reclaim_hpage_zero_subpages(struct lruvec *lruvec,
struct mem_cgroup *memcg;
struct hpage_reclaim *hr_queue;
int nid = lruvec->pgdat->node_id;
+ int threshold;
unsigned long nr_reclaimed = 0, nr_scanned = 0, nr_to_scan;
memcg = lruvec_memcg(lruvec);
@@ -2806,11 +2807,12 @@ static unsigned long reclaim_hpage_zero_subpages(struct lruvec *lruvec,
/* The last scan loop will scan all the huge pages.*/
nr_to_scan = priority == 0 ? 0 : MAX_SCAN_HPAGE;
+ threshold = READ_ONCE(memcg->thp_reclaim_threshold);
do {
struct page *page = NULL;
- if (zsr_get_hpage(hr_queue, &page))
+ if (zsr_get_hpage(hr_queue, &page, threshold))
break;
if (!page)
--
1.8.3.1
next prev parent reply other threads:[~2021-10-28 11:57 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-28 11:56 [RFC 0/6] Reclaim zero subpages of thp to avoid memory bloat Ning Zhang
2021-10-28 11:56 ` [RFC 1/6] mm, thp: introduce thp zero subpages reclaim Ning Zhang
2021-10-28 12:53 ` Matthew Wilcox
2021-10-29 12:16 ` ning zhang
2021-10-28 11:56 ` [RFC 2/6] mm, thp: add a global interface for zero subapges reclaim Ning Zhang
2021-10-28 11:56 ` Ning Zhang [this message]
2021-10-28 11:56 ` [RFC 4/6] mm, thp: introduce a controller to trigger zero subpages reclaim Ning Zhang
2021-10-28 11:56 ` [RFC 5/6] mm, thp: add some statistics for " Ning Zhang
2021-10-28 11:56 ` [RFC 6/6] mm, thp: add document " Ning Zhang
2021-10-28 14:13 ` [RFC 0/6] Reclaim zero subpages of thp to avoid memory bloat Kirill A. Shutemov
2021-10-29 12:07 ` ning zhang
2021-10-29 16:56 ` Yang Shi
2021-11-01 2:50 ` ning zhang
2021-10-29 13:38 ` Michal Hocko
2021-10-29 16:12 ` ning zhang
2021-11-01 9:20 ` Michal Hocko
2021-11-08 3:24 ` ning zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1635422215-99394-4-git-send-email-ningzhang@linux.alibaba.com \
--to=ningzhang@linux.alibaba.com \
--cc=akpm@linux-foundation.org \
--cc=hannes@cmpxchg.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=vdavydov.dev@gmail.com \
--cc=yuzhao@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox