From: Ying Han <yinghan@google.com>
To: Michal Hocko <mhocko@suse.cz>,
Balbir Singh <bsingharora@gmail.com>,
Rik van Riel <riel@redhat.com>, Hugh Dickins <hughd@google.com>,
Johannes Weiner <hannes@cmpxchg.org>, Mel Gorman <mel@csn.ul.ie>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
Pavel Emelyanov <xemul@openvz.org>
Cc: linux-mm@kvack.org
Subject: [PATCH 3/3] memcg: track reclaim stats in memory.vmscan_stat
Date: Tue, 6 Dec 2011 15:59:59 -0800 [thread overview]
Message-ID: <1323215999-29164-4-git-send-email-yinghan@google.com> (raw)
In-Reply-To: <1323215999-29164-1-git-send-email-yinghan@google.com>
Not asking for inclusion, only for testing purpose.
The API tracks the number of scanned and freed pages during page reclaim
as well as the total time taken to shrink_zone(). Counts are broken
down by context (system vs. limit, under hierarchy) and by type.
"_by_limit": per-memcg reclaim and memcg is the target
"_by_system": global reclaim and memcg is the target
"_by_limit_under_hierarchy": per-memcg reclaim and memcg is under the hierarchy
"_by_system_under_hierarchy": global reclaim and memcg is under the hierarchy
Sample output:
$ cat /.../memory.vmscan_stat
...
scanned_pages_by_limit 3954818
scanned_anon_pages_by_limit 0
scanned_file_pages_by_limit 3954818
freed_pages_by_limit 3929770
freed_anon_pages_by_limit 0
freed_file_pages_by_limit 3929770
elapsed_ns_by_limit 3386358102
...
Signed-off-by: Ying Han <yinghan@google.com>
---
include/linux/memcontrol.h | 18 +++++
mm/memcontrol.c | 153 +++++++++++++++++++++++++++++++++++++++++++-
mm/vmscan.c | 35 ++++++++++-
3 files changed, 203 insertions(+), 3 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 25c4170..4afc144 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -38,6 +38,12 @@ struct mem_cgroup_reclaim_cookie {
unsigned int generation;
};
+struct memcg_scan_record {
+ unsigned long nr_scanned[2]; /* the number of scanned pages */
+ unsigned long nr_freed[2]; /* the number of freed pages */
+ unsigned long elapsed; /* nsec of time elapsed while scanning */
+};
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
/*
* All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -126,6 +132,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page);
extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
+void mem_cgroup_record_scanstat(struct mem_cgroup *mem,
+ struct memcg_scan_record *rec,
+ bool global, bool hierarchy);
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
extern int do_swap_account;
#endif
@@ -378,6 +388,14 @@ static inline
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
{
}
+
+static inline void
+mem_cgroup_record_scanstat(struct mem_cgroup *mem,
+ struct memcg_scan_record *rec,
+ bool global, bool hierarchy)
+{
+}
+
#endif /* CONFIG_CGROUP_MEM_CONT */
#if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 35bf664..894e0d2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -112,10 +112,30 @@ enum mem_cgroup_events_target {
#define THRESHOLDS_EVENTS_TARGET (128)
#define NUMAINFO_EVENTS_TARGET (1024)
+enum mem_cgroup_scan_context {
+ SCAN_BY_SYSTEM,
+ SCAN_BY_SYSTEM_UNDER_HIERARCHY,
+ SCAN_BY_LIMIT,
+ SCAN_BY_LIMIT_UNDER_HIERARCHY,
+ NR_SCAN_CONTEXT,
+};
+
+enum mem_cgroup_scan_stat {
+ SCANNED,
+ SCANNED_ANON,
+ SCANNED_FILE,
+ FREED,
+ FREED_ANON,
+ FREED_FILE,
+ ELAPSED,
+ NR_SCAN_STAT,
+};
+
struct mem_cgroup_stat_cpu {
long count[MEM_CGROUP_STAT_NSTATS];
unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
unsigned long targets[MEM_CGROUP_NTARGETS];
+ unsigned long scanstats[NR_SCAN_CONTEXT][NR_SCAN_STAT];
};
struct mem_cgroup_reclaim_iter {
@@ -542,6 +562,58 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
preempt_enable();
}
+void mem_cgroup_record_scanstat(struct mem_cgroup *mem,
+ struct memcg_scan_record *rec,
+ bool global, bool hierarchy)
+{
+ int context;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ if (global)
+ context = SCAN_BY_SYSTEM;
+ else
+ context = SCAN_BY_LIMIT;
+ if (hierarchy)
+ context++;
+
+ this_cpu_add(mem->stat->scanstats[context][SCANNED],
+ rec->nr_scanned[0] + rec->nr_scanned[1]);
+ this_cpu_add(mem->stat->scanstats[context][SCANNED_ANON],
+ rec->nr_scanned[0]);
+ this_cpu_add(mem->stat->scanstats[context][SCANNED_FILE],
+ rec->nr_scanned[1]);
+
+ this_cpu_add(mem->stat->scanstats[context][FREED],
+ rec->nr_freed[0] + rec->nr_freed[1]);
+ this_cpu_add(mem->stat->scanstats[context][FREED_ANON],
+ rec->nr_freed[0]);
+ this_cpu_add(mem->stat->scanstats[context][FREED_FILE],
+ rec->nr_freed[1]);
+
+ this_cpu_add(mem->stat->scanstats[context][ELAPSED],
+ rec->elapsed);
+}
+
+static long mem_cgroup_read_scan_stat(struct mem_cgroup *mem,
+ int context, int stat)
+{
+ long val = 0;
+ int cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu)
+ val += per_cpu(mem->stat->scanstats[context][stat], cpu);
+#ifdef CONFIG_HOTPLUG_CPU
+ spin_lock(&mem->pcp_counter_lock);
+ val += mem->nocpu_base.scanstats[context][stat];
+ spin_unlock(&mem->pcp_counter_lock);
+#endif
+ put_online_cpus();
+ return val;
+}
+
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
{
return container_of(cgroup_subsys_state(cont,
@@ -3672,10 +3744,12 @@ struct mcs_total_stat {
s64 stat[NR_MCS_STAT];
};
-struct {
+struct mem_cgroup_stat_name {
char *local_name;
char *total_name;
-} memcg_stat_strings[NR_MCS_STAT] = {
+};
+
+struct mem_cgroup_stat_name memcg_stat_strings[NR_MCS_STAT] = {
{"cache", "total_cache"},
{"rss", "total_rss"},
{"mapped_file", "total_mapped_file"},
@@ -4234,6 +4308,77 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
}
#endif /* CONFIG_NUMA */
+struct scan_stat {
+ unsigned long stats[NR_SCAN_CONTEXT][NR_SCAN_STAT];
+};
+
+struct mem_cgroup_stat_name scan_stat_strings[NR_SCAN_STAT] = {
+ {"scanned_pages", "total_scanned_pages"},
+ {"scanned_anon_pages", "total_scanned_anon_pages"},
+ {"scanned_file_pages", "total_scanned_file_pages"},
+ {"freed_pages", "total_freed_pages"},
+ {"freed_anon_pages", "total_freed_anon_pages"},
+ {"freed_file_pages", "total_freed_file_pages"},
+ {"elapsed_ns", "total_elapsed_ns"},
+};
+
+static const char *scan_context_strings[NR_SCAN_CONTEXT] = {
+ "_by_system",
+ "_by_system_under_hierarchy",
+ "_by_limit",
+ "_by_limit_under_hierarchy",
+};
+
+static void mem_cgroup_get_scan_stat(struct mem_cgroup *mem,
+ struct scan_stat *s)
+{
+ int i, j;
+
+ for (i = 0; i < NR_SCAN_CONTEXT; i++)
+ for (j = 0; j < NR_SCAN_STAT; j++)
+ s->stats[i][j] += mem_cgroup_read_scan_stat(mem, i, j);
+}
+
+static void mem_cgroup_get_total_scan_stat(struct mem_cgroup *mem,
+ struct scan_stat *s)
+{
+ struct mem_cgroup *iter;
+
+ for_each_mem_cgroup_tree(iter, mem)
+ mem_cgroup_get_scan_stat(iter, s);
+}
+
+static int mem_cgroup_scan_stat_show(struct cgroup *cont, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+ struct scan_stat s;
+ char string[64];
+ int i, j;
+
+ memset(&s, 0, sizeof(s));
+ mem_cgroup_get_scan_stat(mem, &s);
+ for (i = 0; i < NR_SCAN_CONTEXT; i++) {
+ for (j = 0; j < NR_SCAN_STAT; j++) {
+ strcpy(string, scan_stat_strings[j].local_name);
+ strcat(string, scan_context_strings[i]);
+ cb->fill(cb, string, s.stats[i][j]);
+ }
+ }
+
+ memset(&s, 0, sizeof(s));
+ mem_cgroup_get_total_scan_stat(mem, &s);
+ for (i = 0; i < NR_SCAN_CONTEXT; i++) {
+ for (j = 0; j < NR_SCAN_STAT; j++) {
+ strcpy(string, scan_stat_strings[j].total_name);
+ strcat(string, scan_context_strings[i]);
+ cb->fill(cb, string, s.stats[i][j]);
+ }
+ }
+
+ return 0;
+}
+
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
@@ -4304,6 +4449,10 @@ static struct cftype mem_cgroup_files[] = {
.mode = S_IRUGO,
},
#endif
+ {
+ .name = "vmscan_stat",
+ .read_map = mem_cgroup_scan_stat_show,
+ },
};
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b5e81b7..669d8c4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -110,6 +110,11 @@ struct scan_control {
struct mem_cgroup *target_mem_cgroup;
/*
+ * Stats tracked during page reclaim.
+ */
+ struct memcg_scan_record *memcg_record;
+
+ /*
* Nodemask of nodes allowed by the caller. If NULL, all nodes
* are scanned.
*/
@@ -1522,6 +1527,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
nr_taken = isolate_pages(nr_to_scan, mz, &page_list,
&nr_scanned, sc->order,
reclaim_mode, 0, file);
+
+ sc->memcg_record->nr_scanned[file] += nr_scanned;
+
if (global_reclaim(sc)) {
zone->pages_scanned += nr_scanned;
if (current_is_kswapd())
@@ -1551,6 +1559,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
priority, &nr_dirty, &nr_writeback);
}
+ sc->memcg_record->nr_freed[file] += nr_reclaimed;
+
local_irq_disable();
if (current_is_kswapd())
__count_vm_events(KSWAPD_STEAL, nr_reclaimed);
@@ -1675,6 +1685,9 @@ static void shrink_active_list(unsigned long nr_pages,
&pgscanned, sc->order,
reclaim_mode, 1, file);
+ if (sc->memcg_record)
+ sc->memcg_record->nr_scanned[file] += pgscanned;
+
if (global_reclaim(sc))
zone->pages_scanned += pgscanned;
@@ -2111,6 +2124,9 @@ static void shrink_zone(int priority, struct zone *zone,
.priority = priority,
};
struct mem_cgroup *memcg;
+ struct memcg_scan_record rec;
+
+ sc->memcg_record = &rec;
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
@@ -2119,9 +2135,21 @@ static void shrink_zone(int priority, struct zone *zone,
.zone = zone,
};
- if (should_reclaim_mem_cgroup(sc, memcg, priority))
+ if (should_reclaim_mem_cgroup(sc, memcg, priority)) {
+ unsigned long start, end;
+
+ memset(&rec, 0, sizeof(rec));
+ start = sched_clock();
+
shrink_mem_cgroup_zone(priority, &mz, sc);
+ end = sched_clock();
+ rec.elapsed = end - start;
+ mem_cgroup_record_scanstat(memcg, &rec,
+ global_reclaim(sc),
+ root != memcg);
+ }
+
/*
* Limit reclaim has historically picked one memcg and
* scanned it with decreasing priority levels until
@@ -2355,6 +2383,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.order = order,
.target_mem_cgroup = NULL,
.nodemask = nodemask,
+ .memcg_record = NULL,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -2390,6 +2419,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
.nodemask = NULL, /* we don't care the placement */
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+ .memcg_record = NULL,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -2558,6 +2588,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
.nr_to_reclaim = ULONG_MAX,
.order = order,
.target_mem_cgroup = NULL,
+ .memcg_record = NULL,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -3029,6 +3060,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
.nr_to_reclaim = nr_to_reclaim,
.hibernation_mode = 1,
.order = 0,
+ .memcg_record = NULL,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -3215,6 +3247,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
SWAP_CLUSTER_MAX),
.gfp_mask = gfp_mask,
.order = order,
+ .memcg_record = NULL,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
--
1.7.3.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
prev parent reply other threads:[~2011-12-07 0:00 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-12-06 23:59 [PATCH 0/3] memcg softlimit reclaim rework Ying Han
2011-12-06 23:59 ` [PATCH 1/3] memcg: rework softlimit reclaim Ying Han
2011-12-07 2:13 ` KAMEZAWA Hiroyuki
2011-12-07 17:39 ` Ying Han
2011-12-06 23:59 ` [PATCH 2/3] memcg: revert current soft limit reclaim implementation Ying Han
2011-12-07 2:15 ` KAMEZAWA Hiroyuki
2011-12-06 23:59 ` Ying Han [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1323215999-29164-4-git-send-email-yinghan@google.com \
--to=yinghan@google.com \
--cc=bsingharora@gmail.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-mm@kvack.org \
--cc=mel@csn.ul.ie \
--cc=mhocko@suse.cz \
--cc=riel@redhat.com \
--cc=xemul@openvz.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).