From: Glauber Costa <glommer@parallels.com>
To: <linux-mm@kvack.org>
Cc: <cgroups@vger.kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Michal Hocko <mhocko@suse.cz>,
Johannes Weiner <hannes@cmpxchg.org>,
<kamezawa.hiroyu@jp.fujitsu.com>,
Dave Shrinnker <david@fromorbit.com>,
<linux-fsdevel@vger.kernel.org>,
Glauber Costa <glommer@parallels.com>,
Dave Chinner <dchinner@redhat.com>, Mel Gorman <mgorman@suse.de>,
Rik van Riel <riel@redhat.com>, Hugh Dickins <hughd@google.com>
Subject: [PATCH 6/7] super: targeted memcg reclaim
Date: Fri, 8 Feb 2013 17:07:36 +0400 [thread overview]
Message-ID: <1360328857-28070-7-git-send-email-glommer@parallels.com> (raw)
In-Reply-To: <1360328857-28070-1-git-send-email-glommer@parallels.com>
We now have all our dentries and inodes placed in memcg-specific LRU
lists. All we have to do, is to restrict the reclaim to the said lists
in case of memcg pressure.
Marking the superblock shrinker and its LRUs as memcg-aware will
guarantee that the shrinkers will get invoked during targetted reclaim.
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
fs/dcache.c | 7 ++++---
fs/inode.c | 6 +++---
fs/internal.h | 5 +++--
fs/super.c | 37 +++++++++++++++++++++++++++----------
4 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 7f107fb..6f74887 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -907,14 +907,15 @@ static int dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock,
* This function may fail to free any resources if all the dentries are in
* use.
*/
+
long prune_dcache_sb(struct super_block *sb, long nr_to_scan,
- nodemask_t *nodes_to_walk)
+ nodemask_t *nodes_to_walk, struct mem_cgroup *memcg)
{
LIST_HEAD(dispose);
long freed;
- freed = list_lru_walk_nodemask(&sb->s_dentry_lru, dentry_lru_isolate,
- &dispose, nr_to_scan, nodes_to_walk);
+ freed = list_lru_walk_nodemask_memcg(&sb->s_dentry_lru,
+ dentry_lru_isolate, &dispose, nr_to_scan, nodes_to_walk, memcg);
shrink_dentry_list(&dispose);
return freed;
}
diff --git a/fs/inode.c b/fs/inode.c
index 5bb1e21..61673be 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -746,13 +746,13 @@ static int inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock,
* then are freed outside inode_lock by dispose_list().
*/
long prune_icache_sb(struct super_block *sb, long nr_to_scan,
- nodemask_t *nodes_to_walk)
+ nodemask_t *nodes_to_walk, struct mem_cgroup *memcg)
{
LIST_HEAD(freeable);
long freed;
- freed = list_lru_walk_nodemask(&sb->s_inode_lru, inode_lru_isolate,
- &freeable, nr_to_scan, nodes_to_walk);
+ freed = list_lru_walk_nodemask_memcg(&sb->s_inode_lru,
+ inode_lru_isolate, &freeable, nr_to_scan, nodes_to_walk, memcg);
dispose_list(&freeable);
return freed;
}
diff --git a/fs/internal.h b/fs/internal.h
index 0f37896..5e2211f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -16,6 +16,7 @@ struct file_system_type;
struct linux_binprm;
struct path;
struct mount;
+struct mem_cgroup;
/*
* block_dev.c
@@ -111,7 +112,7 @@ extern int open_check_o_direct(struct file *f);
*/
extern spinlock_t inode_sb_list_lock;
extern long prune_icache_sb(struct super_block *sb, long nr_to_scan,
- nodemask_t *nodes_to_scan);
+ nodemask_t *nodes_to_scan, struct mem_cgroup *memcg);
extern void inode_add_lru(struct inode *inode);
/*
@@ -128,4 +129,4 @@ extern int invalidate_inodes(struct super_block *, bool);
*/
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
extern long prune_dcache_sb(struct super_block *sb, long nr_to_scan,
- nodemask_t *nodes_to_scan);
+ nodemask_t *nodes_to_scan, struct mem_cgroup *memcg);
diff --git a/fs/super.c b/fs/super.c
index fe3aa4c..f687cc2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -34,6 +34,7 @@
#include <linux/cleancache.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
+#include <linux/memcontrol.h>
#include "internal.h"
@@ -56,6 +57,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
static long super_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
struct super_block *sb;
+ struct mem_cgroup *memcg = sc->target_mem_cgroup;
long fs_objects = 0;
long total_objects;
long freed = 0;
@@ -74,11 +76,13 @@ static long super_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
if (!grab_super_passive(sb))
return -1;
- if (sb->s_op && sb->s_op->nr_cached_objects)
+ if (sb->s_op && sb->s_op->nr_cached_objects && !memcg)
fs_objects = sb->s_op->nr_cached_objects(sb, &sc->nodes_to_scan);
- inodes = list_lru_count_nodemask(&sb->s_inode_lru, &sc->nodes_to_scan);
- dentries = list_lru_count_nodemask(&sb->s_dentry_lru, &sc->nodes_to_scan);
+ inodes = list_lru_count_nodemask_memcg(&sb->s_inode_lru,
+ &sc->nodes_to_scan, memcg);
+ dentries = list_lru_count_nodemask_memcg(&sb->s_dentry_lru,
+ &sc->nodes_to_scan, memcg);
total_objects = dentries + inodes + fs_objects + 1;
/* proportion the scan between the caches */
@@ -89,8 +93,8 @@ static long super_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
* prune the dcache first as the icache is pinned by it, then
* prune the icache, followed by the filesystem specific caches
*/
- freed = prune_dcache_sb(sb, dentries, &sc->nodes_to_scan);
- freed += prune_icache_sb(sb, inodes, &sc->nodes_to_scan);
+ freed = prune_dcache_sb(sb, dentries, &sc->nodes_to_scan, memcg);
+ freed += prune_icache_sb(sb, inodes, &sc->nodes_to_scan, memcg);
if (fs_objects) {
fs_objects = (sc->nr_to_scan * fs_objects) / total_objects;
@@ -106,20 +110,26 @@ static long super_cache_count(struct shrinker *shrink, struct shrink_control *sc
{
struct super_block *sb;
long total_objects = 0;
+ struct mem_cgroup *memcg = sc->target_mem_cgroup;
sb = container_of(shrink, struct super_block, s_shrink);
if (!grab_super_passive(sb))
return 0;
- if (sb->s_op && sb->s_op->nr_cached_objects)
+ /*
+ * Ideally we would pass memcg to nr_cached_objects, and
+ * let the underlying filesystem decide. Most likely the
+ * path will be if (!memcg) return;, but even then.
+ */
+ if (sb->s_op && sb->s_op->nr_cached_objects && !memcg)
total_objects = sb->s_op->nr_cached_objects(sb,
&sc->nodes_to_scan);
- total_objects += list_lru_count_nodemask(&sb->s_dentry_lru,
- &sc->nodes_to_scan);
- total_objects += list_lru_count_nodemask(&sb->s_inode_lru,
- &sc->nodes_to_scan);
+ total_objects += list_lru_count_nodemask_memcg(&sb->s_dentry_lru,
+ &sc->nodes_to_scan, memcg);
+ total_objects += list_lru_count_nodemask_memcg(&sb->s_inode_lru,
+ &sc->nodes_to_scan, memcg);
total_objects = vfs_pressure_ratio(total_objects);
drop_super(sb);
@@ -198,8 +208,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
INIT_HLIST_NODE(&s->s_instances);
INIT_HLIST_BL_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
+
+ lru_memcg_enable(&s->s_dentry_lru);
list_lru_init(&s->s_dentry_lru);
+ lru_memcg_enable(&s->s_inode_lru);
list_lru_init(&s->s_inode_lru);
+
INIT_LIST_HEAD(&s->s_mounts);
init_rwsem(&s->s_umount);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -235,6 +249,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
s->s_shrink.scan_objects = super_cache_scan;
s->s_shrink.count_objects = super_cache_count;
s->s_shrink.batch = 1024;
+ s->s_shrink.memcg_shrinker = true;
}
out:
return s;
@@ -317,6 +332,8 @@ void deactivate_locked_super(struct super_block *s)
/* caches are now gone, we can safely kill the shrinker now */
unregister_shrinker(&s->s_shrink);
+ list_lru_destroy(&s->s_dentry_lru);
+ list_lru_destroy(&s->s_inode_lru);
put_filesystem(fs);
put_super(s);
} else {
--
1.8.1
next prev parent reply other threads:[~2013-02-08 13:07 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-02-08 13:07 [PATCH 0/7] memcg targeted shrinking Glauber Costa
2013-02-08 13:07 ` [PATCH 1/7] vmscan: also shrink slab in memcg pressure Glauber Costa
2013-02-15 1:27 ` Greg Thelen
2013-02-15 10:46 ` Glauber Costa
2013-02-15 8:37 ` Kamezawa Hiroyuki
[not found] ` <511DF3CB.7020206-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2013-02-15 10:30 ` Glauber Costa
2013-02-08 13:07 ` [PATCH 2/7] memcg,list_lru: duplicate LRUs upon kmemcg creation Glauber Costa
2013-02-15 1:31 ` Greg Thelen
[not found] ` <xr934nhenz18.fsf-aSPv4SP+Du0KgorLzL7FmE7CuiCeIGUxQQ4Iyu8u01E@public.gmane.org>
2013-02-15 10:54 ` Glauber Costa
2013-02-20 7:46 ` Greg Thelen
[not found] ` <1360328857-28070-3-git-send-email-glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2013-02-15 9:21 ` Kamezawa Hiroyuki
2013-02-15 10:36 ` Glauber Costa
2013-02-08 13:07 ` [PATCH 3/7] lru: add an element to a memcg list Glauber Costa
2013-02-15 1:32 ` Greg Thelen
[not found] ` <xr93txpemkeo.fsf-aSPv4SP+Du0KgorLzL7FmE7CuiCeIGUxQQ4Iyu8u01E@public.gmane.org>
2013-02-15 10:57 ` Glauber Costa
2013-02-08 13:07 ` [PATCH 4/7] list_lru: also include memcg lists in counts and scans Glauber Costa
2013-02-08 13:07 ` [PATCH 5/7] list_lru: per-memcg walks Glauber Costa
2013-02-08 13:07 ` Glauber Costa [this message]
2013-02-08 13:07 ` [PATCH 7/7] memcg: per-memcg kmem shrinking Glauber Costa
[not found] ` <1360328857-28070-1-git-send-email-glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2013-02-15 1:28 ` [PATCH 0/7] memcg targeted shrinking Greg Thelen
[not found] ` <xr93ip5unz52.fsf-aSPv4SP+Du0KgorLzL7FmE7CuiCeIGUxQQ4Iyu8u01E@public.gmane.org>
2013-02-15 10:42 ` Glauber Costa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1360328857-28070-7-git-send-email-glommer@parallels.com \
--to=glommer@parallels.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=david@fromorbit.com \
--cc=dchinner@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=mhocko@suse.cz \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).