From: Johannes Weiner <jweiner@redhat.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>,
Balbir Singh <bsingharora@gmail.com>,
Ying Han <yinghan@google.com>, Michal Hocko <mhocko@suse.cz>,
Greg Thelen <gthelen@google.com>,
Michel Lespinasse <walken@google.com>,
Rik van Riel <riel@redhat.com>,
Minchan Kim <minchan.kim@gmail.com>,
Christoph Hellwig <hch@infradead.org>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [patch 05/11] mm: move memcg hierarchy reclaim to generic reclaim code
Date: Mon, 12 Sep 2011 12:57:22 +0200 [thread overview]
Message-ID: <1315825048-3437-6-git-send-email-jweiner@redhat.com> (raw)
In-Reply-To: <1315825048-3437-1-git-send-email-jweiner@redhat.com>
Memory cgroup limit reclaim and traditional global pressure reclaim
will soon share the same code to reclaim from a hierarchical tree of
memory cgroups.
In preparation of this, move the two right next to each other in
shrink_zone().
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
---
include/linux/memcontrol.h | 25 ++++++-
mm/memcontrol.c | 167 ++++++++++++++++++++++----------------------
mm/vmscan.c | 43 ++++++++++-
3 files changed, 147 insertions(+), 88 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b87068a..6575931 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct mem_cgroup *mem_cont,
int active, int file);
+struct mem_cgroup_iter {
+ struct zone *zone;
+ int priority;
+ unsigned int generation;
+};
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
/*
* All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -103,6 +109,11 @@ mem_cgroup_prepare_migration(struct page *page,
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok);
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+ struct mem_cgroup *,
+ struct mem_cgroup_iter *);
+void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
+
/*
* For memory reclaim.
*/
@@ -276,7 +287,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
{
}
-static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
+static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+ struct mem_cgroup *prev,
+ struct mem_cgroup_iter *iter)
+{
+ return NULL;
+}
+
+static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
+ struct mem_cgroup *prev)
+{
+}
+
+static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
{
return 0;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f4b404e..413e1f8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -362,8 +362,6 @@ enum charge_type {
#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
-#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
-#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
static void mem_cgroup_get(struct mem_cgroup *memcg);
static void mem_cgroup_put(struct mem_cgroup *memcg);
@@ -783,19 +781,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
return memcg;
}
-struct mem_cgroup_iter {
- struct zone *zone;
- int priority;
- unsigned int generation;
-};
-
-static struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
- struct mem_cgroup *prev,
- struct mem_cgroup_iter *iter)
+/**
+ * mem_cgroup_iter - iterate over memory cgroup hierarchy
+ * @root: hierarchy root
+ * @prev: previously returned memcg, NULL on first invocation
+ * @iter: token for partial walks, NULL for full walks
+ *
+ * Returns references to children of the hierarchy starting at @root,
+ * or @root itself, or %NULL after a full round-trip.
+ *
+ * Caller must pass the return value in @prev on subsequent
+ * invocations for reference counting, or use mem_cgroup_iter_break()
+ * to cancel a hierarchy walk before the round-trip is complete.
+ *
+ * Reclaimers can specify a zone and a priority level in @iter to
+ * divide up the memcgs in the hierarchy among all concurrent
+ * reclaimers operating on the same zone and priority.
+ */
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+ struct mem_cgroup *prev,
+ struct mem_cgroup_iter *iter)
{
struct mem_cgroup *mem = NULL;
int id = 0;
+ if (mem_cgroup_disabled())
+ return NULL;
+
if (!root)
root = root_mem_cgroup;
@@ -850,8 +862,13 @@ static struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
return mem;
}
-static void mem_cgroup_iter_break(struct mem_cgroup *root,
- struct mem_cgroup *prev)
+/**
+ * mem_cgroup_iter_break - abort a hierarchy walk prematurely
+ * @root: hierarchy root
+ * @prev: last visited hierarchy member as returned by mem_cgroup_iter()
+ */
+void mem_cgroup_iter_break(struct mem_cgroup *root,
+ struct mem_cgroup *prev)
{
if (!root)
root = root_mem_cgroup;
@@ -1479,6 +1496,41 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
return min(limit, memsw);
}
+static unsigned long mem_cgroup_reclaim(struct mem_cgroup *mem,
+ gfp_t gfp_mask,
+ unsigned long flags)
+{
+ unsigned long total = 0;
+ bool noswap = false;
+ int loop;
+
+ if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
+ noswap = true;
+ else if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && mem->memsw_is_minimum)
+ noswap = true;
+
+ for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
+ if (loop)
+ drain_all_stock_async(mem);
+ total += try_to_free_mem_cgroup_pages(mem, gfp_mask, noswap);
+ /*
+ * Avoid freeing too much when shrinking to resize the
+ * limit. XXX: Shouldn't the margin check be enough?
+ */
+ if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
+ break;
+ if (mem_cgroup_margin(mem))
+ break;
+ /*
+ * If nothing was reclaimed after two attempts, there
+ * may be no reclaimable pages in this hierarchy.
+ */
+ if (loop && !total)
+ break;
+ }
+ return total;
+}
+
/**
* test_mem_cgroup_node_reclaimable
* @mem: the target memcg
@@ -1616,30 +1668,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
}
#endif
-/*
- * Scan the hierarchy if needed to reclaim memory. We remember the last child
- * we reclaimed from, so that we don't end up penalizing one child extensively
- * based on its position in the children list.
- *
- * root_memcg is the original ancestor that we've been reclaim from.
- *
- * We give up and return to the caller when we visit root_memcg twice.
- * (other groups can be removed while we're walking....)
- *
- * If shrink==true, for avoiding to free too much, this returns immedieately.
- */
-static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
- struct zone *zone,
- gfp_t gfp_mask,
- unsigned long reclaim_options,
- unsigned long *total_scanned)
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
+ struct zone *zone,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
{
struct mem_cgroup *victim = NULL;
- int ret, total = 0;
+ int total = 0;
int loop = 0;
- bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
- bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
- bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
unsigned long excess;
unsigned long nr_scanned;
struct mem_cgroup_iter iter = {
@@ -1649,29 +1685,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
- /* If memsw_is_minimum==1, swap-out is of-no-use. */
- if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
- noswap = true;
-
while (1) {
victim = mem_cgroup_iter(root_memcg, victim, &iter);
if (!victim) {
loop++;
- /*
- * We are not draining per cpu cached charges during
- * soft limit reclaim because global reclaim doesn't
- * care about charges. It tries to free some memory and
- * charges will not give any.
- */
- if (!check_soft && loop >= 1)
- drain_all_stock_async(root_memcg);
if (loop >= 2) {
/*
* If we have not been able to reclaim
* anything, it might because there are
* no reclaimable pages under this hierarchy
*/
- if (!check_soft || !total)
+ if (!total)
break;
/*
* We want to do more targeted reclaim.
@@ -1685,30 +1709,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
}
continue;
}
- if (!mem_cgroup_reclaimable(victim, noswap)) {
- /* this cgroup's local usage == 0 */
+ if (!mem_cgroup_reclaimable(victim, false))
continue;
- }
- /* we use swappiness of local cgroup */
- if (check_soft) {
- ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
- noswap, zone, &nr_scanned);
- *total_scanned += nr_scanned;
- } else
- ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
- noswap);
- /*
- * At shrinking usage, we can't check we should stop here or
- * reclaim more. It's depends on callers. last_scanned_child
- * will work enough for keeping fairness under tree.
- */
- if (shrink)
- break;
- total += ret;
- if (check_soft) {
- if (!res_counter_soft_limit_excess(&root_memcg->res))
- break;
- } else if (mem_cgroup_margin(root_memcg))
+ total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
+ zone, &nr_scanned);
+ *total_scanned += nr_scanned;
+ if (!res_counter_soft_limit_excess(&root_memcg->res))
break;
}
mem_cgroup_iter_break(root_memcg, victim);
@@ -2205,8 +2211,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (!(gfp_mask & __GFP_WAIT))
return CHARGE_WOULDBLOCK;
- ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
- gfp_mask, flags, NULL);
+ ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
return CHARGE_RETRY;
/*
@@ -3437,9 +3442,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
if (!ret)
break;
- mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
- MEM_CGROUP_RECLAIM_SHRINK,
- NULL);
+ mem_cgroup_reclaim(memcg, GFP_KERNEL,
+ MEM_CGROUP_RECLAIM_SHRINK);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
@@ -3497,10 +3501,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
if (!ret)
break;
- mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
- MEM_CGROUP_RECLAIM_NOSWAP |
- MEM_CGROUP_RECLAIM_SHRINK,
- NULL);
+ mem_cgroup_reclaim(memcg, GFP_KERNEL,
+ MEM_CGROUP_RECLAIM_NOSWAP |
+ MEM_CGROUP_RECLAIM_SHRINK);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
@@ -3543,10 +3546,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
break;
nr_scanned = 0;
- reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
- gfp_mask,
- MEM_CGROUP_RECLAIM_SOFT,
- &nr_scanned);
+ reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
+ gfp_mask, &nr_scanned);
nr_reclaimed += reclaimed;
*total_scanned += nr_scanned;
spin_lock(&mctz->lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 92f4e22..8419e8f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2104,12 +2104,43 @@ restart:
static void shrink_zone(int priority, struct zone *zone,
struct scan_control *sc)
{
- struct mem_cgroup_zone mz = {
- .mem_cgroup = sc->target_mem_cgroup,
+ struct mem_cgroup *root = sc->target_mem_cgroup;
+ struct mem_cgroup_iter iter = {
.zone = zone,
+ .priority = priority,
};
+ struct mem_cgroup *mem;
+
+ if (global_reclaim(sc)) {
+ struct mem_cgroup_zone mz = {
+ .mem_cgroup = NULL,
+ .zone = zone,
+ };
+
+ shrink_mem_cgroup_zone(priority, &mz, sc);
+ return;
+ }
+
+ mem = mem_cgroup_iter(root, NULL, &iter);
+ do {
+ struct mem_cgroup_zone mz = {
+ .mem_cgroup = mem,
+ .zone = zone,
+ };
- shrink_mem_cgroup_zone(priority, &mz, sc);
+ shrink_mem_cgroup_zone(priority, &mz, sc);
+ /*
+ * Limit reclaim has historically picked one memcg and
+ * scanned it with decreasing priority levels until
+ * nr_to_reclaim had been reclaimed. This priority
+ * cycle is thus over after a single memcg.
+ */
+ if (!global_reclaim(sc)) {
+ mem_cgroup_iter_break(root, mem);
+ break;
+ }
+ mem = mem_cgroup_iter(root, mem, &iter);
+ } while (mem);
}
/*
@@ -2347,6 +2378,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
.order = 0,
.target_mem_cgroup = mem,
};
+ struct mem_cgroup_zone mz = {
+ .mem_cgroup = mem,
+ .zone = zone,
+ };
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2362,7 +2397,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_zone(0, zone, &sc);
+ shrink_mem_cgroup_zone(0, &mz, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
--
1.7.6
next prev parent reply other threads:[~2011-09-12 10:58 UTC|newest]
Thread overview: 65+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-09-12 10:57 [patch 0/11] mm: memcg naturalization -rc3 Johannes Weiner
2011-09-12 10:57 ` [patch 01/11] mm: memcg: consolidate hierarchy iteration primitives Johannes Weiner
2011-09-12 22:37 ` Kirill A. Shutemov
2011-09-13 5:40 ` Johannes Weiner
2011-09-19 13:06 ` Michal Hocko
2011-09-13 10:06 ` KAMEZAWA Hiroyuki
2011-09-19 12:53 ` Michal Hocko
2011-09-20 8:45 ` Johannes Weiner
2011-09-20 8:53 ` Michal Hocko
2011-09-12 10:57 ` [patch 02/11] mm: vmscan: distinguish global reclaim from global LRU scanning Johannes Weiner
2011-09-12 23:02 ` Kirill A. Shutemov
2011-09-13 5:48 ` Johannes Weiner
2011-09-13 10:07 ` KAMEZAWA Hiroyuki
2011-09-19 13:23 ` Michal Hocko
2011-09-19 13:46 ` Michal Hocko
2011-09-20 8:52 ` Johannes Weiner
2011-09-12 10:57 ` [patch 03/11] mm: vmscan: distinguish between memcg triggering reclaim and memcg being scanned Johannes Weiner
2011-09-13 10:23 ` KAMEZAWA Hiroyuki
2011-09-19 14:29 ` Michal Hocko
2011-09-20 8:58 ` Johannes Weiner
2011-09-20 9:17 ` Michal Hocko
2011-09-29 7:55 ` Johannes Weiner
2011-09-12 10:57 ` [patch 04/11] mm: memcg: per-priority per-zone hierarchy scan generations Johannes Weiner
2011-09-13 10:27 ` KAMEZAWA Hiroyuki
2011-09-13 11:03 ` Johannes Weiner
2011-09-14 0:55 ` KAMEZAWA Hiroyuki
2011-09-14 5:56 ` Johannes Weiner
2011-09-14 7:40 ` KAMEZAWA Hiroyuki
2011-09-20 8:15 ` Michal Hocko
2011-09-20 8:45 ` Michal Hocko
2011-09-20 9:10 ` Johannes Weiner
2011-09-20 12:37 ` Michal Hocko
2011-09-12 10:57 ` Johannes Weiner [this message]
2011-09-13 10:31 ` [patch 05/11] mm: move memcg hierarchy reclaim to generic reclaim code KAMEZAWA Hiroyuki
2011-09-20 13:09 ` Michal Hocko
2011-09-20 13:29 ` Johannes Weiner
2011-09-20 14:08 ` Michal Hocko
2011-09-12 10:57 ` [patch 06/11] mm: memcg: remove optimization of keeping the root_mem_cgroup LRU lists empty Johannes Weiner
2011-09-13 10:34 ` KAMEZAWA Hiroyuki
2011-09-20 15:02 ` Michal Hocko
2011-09-29 9:20 ` Johannes Weiner
2011-09-29 9:49 ` Michal Hocko
2011-09-12 10:57 ` [patch 07/11] mm: vmscan: convert unevictable page rescue scanner to per-memcg LRU lists Johannes Weiner
2011-09-13 10:37 ` KAMEZAWA Hiroyuki
2011-09-21 12:33 ` Michal Hocko
2011-09-21 13:47 ` Johannes Weiner
2011-09-21 14:08 ` Michal Hocko
2011-09-12 10:57 ` [patch 08/11] mm: vmscan: convert global reclaim " Johannes Weiner
2011-09-13 10:41 ` KAMEZAWA Hiroyuki
2011-09-21 13:10 ` Michal Hocko
2011-09-21 13:51 ` Johannes Weiner
2011-09-21 13:57 ` Michal Hocko
2011-09-12 10:57 ` [patch 09/11] mm: collect LRU list heads into struct lruvec Johannes Weiner
2011-09-13 10:43 ` KAMEZAWA Hiroyuki
2011-09-21 13:43 ` Michal Hocko
2011-09-21 15:15 ` Michal Hocko
2011-09-12 10:57 ` [patch 10/11] mm: make per-memcg LRU lists exclusive Johannes Weiner
2011-09-13 10:47 ` KAMEZAWA Hiroyuki
2011-09-21 15:24 ` Michal Hocko
2011-09-21 15:47 ` Johannes Weiner
2011-09-21 16:05 ` Michal Hocko
2011-09-12 10:57 ` [patch 11/11] mm: memcg: remove unused node/section info from pc->flags Johannes Weiner
2011-09-13 10:50 ` KAMEZAWA Hiroyuki
2011-09-21 15:32 ` Michal Hocko
2011-09-13 20:35 ` [patch 0/11] mm: memcg naturalization -rc3 Kirill A. Shutemov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1315825048-3437-6-git-send-email-jweiner@redhat.com \
--to=jweiner@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=bsingharora@gmail.com \
--cc=gthelen@google.com \
--cc=hch@infradead.org \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.cz \
--cc=minchan.kim@gmail.com \
--cc=nishimura@mxp.nes.nec.co.jp \
--cc=riel@redhat.com \
--cc=walken@google.com \
--cc=yinghan@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).