From: Glauber Costa <glommer@openvz.org>
To: <linux-mm@kvack.org>
Cc: <cgroups@vger.kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Greg Thelen <gthelen@google.com>,
<kamezawa.hiroyu@jp.fujitsu.com>, Michal Hocko <mhocko@suse.cz>,
Johannes Weiner <hannes@cmpxchg.org>,
<linux-fsdevel@vger.kernel.org>,
Dave Chinner <david@fromorbit.com>,
hughd@google.com, Glauber Costa <glommer@openvz.org>,
Dave Chinner <dchinner@redhat.com>, Mel Gorman <mgorman@suse.de>,
Rik van Riel <riel@redhat.com>
Subject: [PATCH v7 29/34] memcg: scan cache objects hierarchically
Date: Mon, 20 May 2013 00:07:22 +0400 [thread overview]
Message-ID: <1368994047-5997-30-git-send-email-glommer@openvz.org> (raw)
In-Reply-To: <1368994047-5997-1-git-send-email-glommer@openvz.org>
When reaching shrink_slab, we should descent in children memcg searching
for objects that could be shrunk. This is true even if the memcg does
not have kmem limits on, since the kmem res_counter will also be billed
against the user res_counter of the parent.
It is possible that we will free objects and not free any pages, that
will just harm the child groups without helping the parent group at all.
But at this point, we basically are prepared to pay the price.
Signed-off-by: Glauber Costa <glommer@openvz.org>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/memcontrol.h | 6 ++++
mm/memcontrol.c | 13 +++++++++
mm/vmscan.c | 70 +++++++++++++++++++++++++++++++++++-----------
3 files changed, 72 insertions(+), 17 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3eeece8..c8b1412 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -441,6 +441,7 @@ static inline bool memcg_kmem_enabled(void)
return static_key_false(&memcg_kmem_enabled_key);
}
+bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg);
bool memcg_kmem_is_active(struct mem_cgroup *memcg);
/*
@@ -585,6 +586,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
}
#else
+static inline bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg)
+{
+ return false;
+}
+
static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
{
return false;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c563550..b8980d1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3043,6 +3043,19 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
}
#ifdef CONFIG_MEMCG_KMEM
+bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg)
+{
+ struct mem_cgroup *iter;
+
+ for_each_mem_cgroup_tree(iter, memcg) {
+ if (memcg_kmem_is_active(iter)) {
+ mem_cgroup_iter_break(memcg, iter);
+ return true;
+ }
+ }
+ return false;
+}
+
static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
{
return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eae8304..3c67f36 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -148,7 +148,7 @@ static bool global_reclaim(struct scan_control *sc)
static bool has_kmem_reclaim(struct scan_control *sc)
{
return !sc->target_mem_cgroup ||
- memcg_kmem_is_active(sc->target_mem_cgroup);
+ memcg_kmem_should_reclaim(sc->target_mem_cgroup);
}
static unsigned long
@@ -346,12 +346,39 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
*
* Returns the number of slab objects which we shrunk.
*/
+static unsigned long
+shrink_slab_one(struct shrink_control *shrinkctl, struct shrinker *shrinker,
+ unsigned long nr_pages_scanned, unsigned long lru_pages)
+{
+ unsigned long freed = 0;
+
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) {
+ shrinkctl->nid = 0;
+
+ return shrink_slab_node(shrinkctl, shrinker,
+ nr_pages_scanned, lru_pages,
+ &shrinker->nr_deferred);
+ }
+
+ for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
+ if (!node_online(shrinkctl->nid))
+ continue;
+
+ freed += shrink_slab_node(shrinkctl, shrinker,
+ nr_pages_scanned, lru_pages,
+ &shrinker->nr_deferred_node[shrinkctl->nid]);
+ }
+
+ return freed;
+}
+
unsigned long shrink_slab(struct shrink_control *shrinkctl,
unsigned long nr_pages_scanned,
unsigned long lru_pages)
{
struct shrinker *shrinker;
unsigned long freed = 0;
+ struct mem_cgroup *root = shrinkctl->target_mem_cgroup;
if (nr_pages_scanned == 0)
nr_pages_scanned = SWAP_CLUSTER_MAX;
@@ -363,6 +390,9 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
}
list_for_each_entry(shrinker, &shrinker_list, list) {
+ struct mem_cgroup *memcg;
+
+
/*
* If we don't have a target mem cgroup, we scan them all.
* Otherwise we will limit our scan to shrinkers marked as
@@ -372,23 +402,29 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
!(shrinker->flags & SHRINKER_MEMCG_AWARE))
continue;
- if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) {
- shrinkctl->nid = 0;
-
- freed += shrink_slab_node(shrinkctl, shrinker,
- nr_pages_scanned, lru_pages,
- &shrinker->nr_deferred);
- continue;
- }
-
- for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
- if (!node_online(shrinkctl->nid))
- continue;
+ /*
+ * In a hierarchical chain, it might be that not all
+ * memcgs are kmem active. kmemcg design mandates that
+ * when one memcg is active, its children will be
+ * active as well. But it is perfectly possible that
+ * its parent is not.
+ *
+ * We also need to make sure we scan at least once, for
+ * the global case. So if we don't have a target memcg
+ * (saved in root), we proceed normally and expect to
+ * break in the next round.
+ */
+ memcg = mem_cgroup_iter(root, NULL, NULL);
+ do {
+ if (!root || memcg_kmem_is_active(memcg))
+ freed += shrink_slab_one(shrinkctl, shrinker,
+ nr_pages_scanned, lru_pages);
+ memcg = mem_cgroup_iter(root, memcg, NULL);
+ shrinkctl->target_mem_cgroup = memcg;
+ } while (memcg);
- freed += shrink_slab_node(shrinkctl, shrinker,
- nr_pages_scanned, lru_pages,
- &shrinker->nr_deferred_node[shrinkctl->nid]);
- }
+ /* restore original state */
+ shrinkctl->target_mem_cgroup = root;
}
up_read(&shrinker_rwsem);
out:
--
1.8.1.4
next prev parent reply other threads:[~2013-05-19 20:08 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-19 20:06 [PATCH v7 00/34] kmemcg shrinkers Glauber Costa
2013-05-19 20:06 ` [PATCH v7 01/34] fs: bump inode and dentry counters to long Glauber Costa
2013-05-19 20:06 ` [PATCH v7 02/34] super: fix calculation of shrinkable objects for small numbers Glauber Costa
2013-05-19 20:06 ` [PATCH v7 03/34] dcache: convert dentry_stat.nr_unused to per-cpu counters Glauber Costa
2013-05-19 20:06 ` [PATCH v7 04/34] dentry: move to per-sb LRU locks Glauber Costa
2013-05-19 20:06 ` [PATCH v7 05/34] dcache: remove dentries from LRU before putting on dispose list Glauber Costa
2013-05-19 20:06 ` [PATCH v7 06/34] mm: new shrinker API Glauber Costa
2013-05-19 20:07 ` [PATCH v7 07/34] shrinker: convert superblock shrinkers to new API Glauber Costa
2013-05-20 16:39 ` Glauber Costa
2013-05-20 23:40 ` Dave Chinner
2013-05-19 20:07 ` [PATCH v7 08/34] list: add a new LRU list type Glauber Costa
2013-05-19 20:07 ` [PATCH v7 09/34] inode: convert inode lru list to generic lru list code Glauber Costa
2013-05-19 20:07 ` [PATCH v7 10/34] dcache: convert to use new lru list infrastructure Glauber Costa
2013-05-19 20:07 ` [PATCH v7 11/34] list_lru: per-node " Glauber Costa
2013-05-19 20:07 ` [PATCH v7 12/34] shrinker: add node awareness Glauber Costa
2013-05-19 20:07 ` [PATCH v7 13/34] vmscan: per-node deferred work Glauber Costa
2013-05-19 20:07 ` [PATCH v7 14/34] list_lru: per-node API Glauber Costa
2013-05-19 20:07 ` [PATCH v7 15/34] fs: convert inode and dentry shrinking to be node aware Glauber Costa
2013-05-19 20:07 ` [PATCH v7 16/34] xfs: convert buftarg LRU to generic code Glauber Costa
2013-05-19 20:07 ` [PATCH v7 17/34] xfs: convert dquot cache lru to list_lru Glauber Costa
2013-05-19 20:07 ` [PATCH v7 18/34] fs: convert fs shrinkers to new scan/count API Glauber Costa
2013-05-20 8:25 ` Steven Whitehouse
2013-05-20 13:46 ` Glauber Costa
2013-05-20 15:25 ` Glauber Costa
2013-05-20 23:38 ` Dave Chinner
2013-05-20 23:42 ` Glauber Costa
2013-05-19 20:07 ` [PATCH v7 19/34] drivers: convert shrinkers to new count/scan API Glauber Costa
2013-06-03 20:03 ` Kent Overstreet
2013-06-04 9:06 ` Glauber Costa
2013-06-04 9:10 ` Glauber Costa
2013-05-19 20:07 ` [PATCH v7 20/34] i915: bail out earlier when shrinker cannot acquire mutex Glauber Costa
2013-05-19 20:07 ` [PATCH v7 21/34] shrinker: convert remaining shrinkers to count/scan API Glauber Costa
2013-05-19 20:07 ` [PATCH v7 22/34] hugepage: convert huge zero page shrinker to new shrinker API Glauber Costa
2013-05-19 20:07 ` [PATCH v7 23/34] shrinker: Kill old ->shrink API Glauber Costa
2013-05-19 20:07 ` [PATCH v7 24/34] vmscan: also shrink slab in memcg pressure Glauber Costa
2013-05-19 20:07 ` [PATCH v7 25/34] memcg,list_lru: duplicate LRUs upon kmemcg creation Glauber Costa
2013-05-19 20:07 ` [PATCH v7 26/34] lru: add an element to a memcg list Glauber Costa
2013-05-19 20:07 ` [PATCH v7 27/34] list_lru: per-memcg walks Glauber Costa
2013-05-19 20:07 ` [PATCH v7 28/34] memcg: per-memcg kmem shrinking Glauber Costa
2013-05-19 20:07 ` Glauber Costa [this message]
2013-05-19 20:07 ` [PATCH v7 30/34] vmscan: take at least one pass with shrinkers Glauber Costa
2013-05-19 20:07 ` [PATCH v7 31/34] super: targeted memcg reclaim Glauber Costa
2013-05-19 20:07 ` [PATCH v7 32/34] memcg: move initialization to memcg creation Glauber Costa
2013-05-19 20:07 ` [PATCH v7 33/34] vmpressure: in-kernel notifications Glauber Costa
2013-05-19 20:07 ` [PATCH v7 34/34] memcg: reap dead memcgs upon global memory pressure Glauber Costa
2013-05-21 7:03 ` [PATCH v7 00/34] kmemcg shrinkers Glauber Costa
2013-05-21 7:18 ` Dave Chinner
2013-05-21 7:27 ` Glauber Costa
2013-05-22 6:26 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1368994047-5997-30-git-send-email-glommer@openvz.org \
--to=glommer@openvz.org \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=david@fromorbit.com \
--cc=dchinner@redhat.com \
--cc=gthelen@google.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=mhocko@suse.cz \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).