* memcg: add mlock statistic in memory.stat
@ 2012-01-11 22:41 Ying Han
2012-01-11 23:17 ` Hugh Dickins
2012-01-12 12:54 ` Michal Hocko
0 siblings, 2 replies; 12+ messages in thread
From: Ying Han @ 2012-01-11 22:41 UTC (permalink / raw)
To: Michal Hocko, Balbir Singh, Rik van Riel, Hugh Dickins,
Johannes Weiner, Mel Gorman, KAMEZAWA Hiroyuki, Pavel Emelyanov
Cc: linux-mm
We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
patch adds the mlock field into per-memcg memory stat. The stat itself enhances
the metrics exported by memcg, especially is used together with "uneivctable"
lru stat.
Tested:
$ cat /dev/cgroup/memory/memory.use_hierarchy
1
$ mkdir /dev/cgroup/memory/A
$ mkdir /dev/cgroup/memory/A/B
$ echo 1g >/dev/cgroup/memory/A/memory.limit_in_bytes
$ echo 1g >/dev/cgroup/memory/B/memory.limit_in_bytes
1. Run memtoy in B and mlock 512m file pages:
$ memtoy>file /export/hda3/file_512m
$ memtoy>map file_512m 0 512m shared
$ memtoy>lock file_512m
//meantime add some memory pressure.
$ cat /dev/cgroup/memory/A/B/memory.stat
...
mlock 536870912
unevictable 536870912
...
total_mlock 536870912
total_unevictable 536870912
$ cat /dev/cgroup/memory/A/memory.stat
...
mlock 0
unevictable 0
...
total_mlock 536870912
total_unevictable 536870912
2. unlock the file pages
$ memtoy>unlock file_512m
$ cat /dev/cgroup/memory/A/B/memory.stat
...
mlock 0
unevictable 0
...
total_mlock 0
total_unevictable 0
3. after step 1, move memtoy to A and force_empty B
$ cat /dev/cgroup/memory/A/B/memory.stat
...
mlock 0
unevictable 0
...
total_mlock 0
total_unevictable 0
$ cat /dev/cgroup/memory/A/memory.stat
...
mlock 536870912
unevictable 536870912
...
total_mlock 536870912
total_unevictable 536870912
Signed-off-by: Ying Han <yinghan@google.com>
---
Documentation/cgroups/memory.txt | 2 ++
include/linux/memcontrol.h | 1 +
include/linux/page_cgroup.h | 11 +++++++++++
mm/internal.h | 4 ++++
mm/memcontrol.c | 27 ++++++++++++++++++++++++++-
mm/mlock.c | 3 +++
mm/page_alloc.c | 1 +
7 files changed, 48 insertions(+), 1 deletions(-)
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 09a9472..070c016 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -386,6 +386,7 @@ memory.stat file includes following statistics
cache - # of bytes of page cache memory.
rss - # of bytes of anonymous and swap cache memory.
mapped_file - # of bytes of mapped file (includes tmpfs/shmem)
+mlock - # of bytes of mlocked memory.
pgpgin - # of charging events to the memory cgroup. The charging
event happens each time a page is accounted as either mapped
anon page(RSS) or cache page(Page Cache) to the cgroup.
@@ -410,6 +411,7 @@ hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to
total_cache - sum of all children's "cache"
total_rss - sum of all children's "rss"
total_mapped_file - sum of all children's "cache"
+total_mlock - sum of all children's "mlock"
total_pgpgin - sum of all children's "pgpgin"
total_pgpgout - sum of all children's "pgpgout"
total_swap - sum of all children's "swap"
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4afc144..18f675b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -30,6 +30,7 @@ struct mm_struct;
/* Stats that can be updated by kernel. */
enum mem_cgroup_page_stat_item {
MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
+ MEMCG_NR_MLOCK, /* # of pages charged as mlock */
};
struct mem_cgroup_reclaim_cookie {
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index aaa60da..ec8e7c0 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -10,6 +10,7 @@ enum {
/* flags for mem_cgroup and file and I/O status */
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
+ PCG_MLOCK, /* page is accounted as "mlock" */
/* No lock in page_cgroup */
PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
__NR_PCG_FLAGS,
@@ -62,6 +63,10 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
{ clear_bit(PCG_##lname, &pc->flags); }
+#define TESTSETPCGFLAG(uname, lname) \
+static inline int TestSetPageCgroup##uname(struct page_cgroup *pc) \
+ { return test_and_set_bit(PCG_##lname, &pc->flags); }
+
#define TESTCLEARPCGFLAG(uname, lname) \
static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
{ return test_and_clear_bit(PCG_##lname, &pc->flags); }
@@ -85,6 +90,12 @@ SETPCGFLAG(FileMapped, FILE_MAPPED)
CLEARPCGFLAG(FileMapped, FILE_MAPPED)
TESTPCGFLAG(FileMapped, FILE_MAPPED)
+SETPCGFLAG(Mlock, MLOCK)
+CLEARPCGFLAG(Mlock, MLOCK)
+TESTPCGFLAG(Mlock, MLOCK)
+TESTSETPCGFLAG(Mlock, MLOCK)
+TESTCLEARPCGFLAG(Mlock, MLOCK)
+
SETPCGFLAG(Migration, MIGRATION)
CLEARPCGFLAG(Migration, MIGRATION)
TESTPCGFLAG(Migration, MIGRATION)
diff --git a/mm/internal.h b/mm/internal.h
index 2189af4..1366a21 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -12,6 +12,7 @@
#define __MM_INTERNAL_H
#include <linux/mm.h>
+#include <linux/memcontrol.h>
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
@@ -139,6 +140,7 @@ static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page)
return 0;
if (!TestSetPageMlocked(page)) {
+ mem_cgroup_inc_page_stat(page, MEMCG_NR_MLOCK);
inc_zone_page_state(page, NR_MLOCK);
count_vm_event(UNEVICTABLE_PGMLOCKED);
}
@@ -177,8 +179,10 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
unsigned long flags;
local_irq_save(flags);
+ mem_cgroup_dec_page_stat(page, MEMCG_NR_MLOCK);
__dec_zone_page_state(page, NR_MLOCK);
SetPageMlocked(newpage);
+ mem_cgroup_inc_page_stat(newpage, MEMCG_NR_MLOCK);
__inc_zone_page_state(newpage, NR_MLOCK);
local_irq_restore(flags);
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 33f083a..4f540a7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -84,6 +84,7 @@ enum mem_cgroup_stat_index {
MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */
MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */
+ MEM_CGROUP_STAT_MLOCK, /* # of pages charged as mlock()ed */
MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */
MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */
@@ -1758,11 +1759,22 @@ void mem_cgroup_update_page_stat(struct page *page,
ClearPageCgroupFileMapped(pc);
idx = MEM_CGROUP_STAT_FILE_MAPPED;
break;
+ case MEMCG_NR_MLOCK:
+ if (val > 0) {
+ if (TestSetPageCgroupMlock(pc))
+ val = 0;
+ } else {
+ if (!TestClearPageCgroupMlock(pc))
+ val = 0;
+ }
+ idx = MEM_CGROUP_STAT_MLOCK;
+ break;
default:
BUG();
}
- this_cpu_add(memcg->stat->count[idx], val);
+ if (val)
+ this_cpu_add(memcg->stat->count[idx], val);
out:
if (unlikely(need_unlock))
@@ -2402,6 +2414,15 @@ static int mem_cgroup_move_account(struct page *page,
__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
preempt_enable();
}
+
+ if (PageCgroupMlock(pc)) {
+ /* Update mlocked data for mem_cgroup */
+ preempt_disable();
+ __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_MLOCK]);
+ __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_MLOCK]);
+ preempt_enable();
+ }
+
mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
if (uncharge)
/* This is not "cancel", but cancel_charge does all we need. */
@@ -3728,6 +3749,7 @@ enum {
MCS_CACHE,
MCS_RSS,
MCS_FILE_MAPPED,
+ MCS_MLOCK,
MCS_PGPGIN,
MCS_PGPGOUT,
MCS_SWAP,
@@ -3754,6 +3776,7 @@ struct mem_cgroup_stat_name memcg_stat_strings[NR_MCS_STAT] = {
{"cache", "total_cache"},
{"rss", "total_rss"},
{"mapped_file", "total_mapped_file"},
+ {"mlock", "total_mlock"},
{"pgpgin", "total_pgpgin"},
{"pgpgout", "total_pgpgout"},
{"swap", "total_swap"},
@@ -3779,6 +3802,8 @@ mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
s->stat[MCS_RSS] += val * PAGE_SIZE;
val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
+ val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_MLOCK);
+ s->stat[MCS_MLOCK] += val * PAGE_SIZE;
val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN);
s->stat[MCS_PGPGIN] += val;
val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT);
diff --git a/mm/mlock.c b/mm/mlock.c
index 4f4f53b..ad165ca 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -59,6 +59,7 @@ void __clear_page_mlock(struct page *page)
return;
}
+ mem_cgroup_dec_page_stat(page, MEMCG_NR_MLOCK);
dec_zone_page_state(page, NR_MLOCK);
count_vm_event(UNEVICTABLE_PGCLEARED);
if (!isolate_lru_page(page)) {
@@ -81,6 +82,7 @@ void mlock_vma_page(struct page *page)
BUG_ON(!PageLocked(page));
if (!TestSetPageMlocked(page)) {
+ mem_cgroup_inc_page_stat(page, MEMCG_NR_MLOCK);
inc_zone_page_state(page, NR_MLOCK);
count_vm_event(UNEVICTABLE_PGMLOCKED);
if (!isolate_lru_page(page))
@@ -108,6 +110,7 @@ void munlock_vma_page(struct page *page)
BUG_ON(!PageLocked(page));
if (TestClearPageMlocked(page)) {
+ mem_cgroup_dec_page_stat(page, MEMCG_NR_MLOCK);
dec_zone_page_state(page, NR_MLOCK);
if (!isolate_lru_page(page)) {
int ret = SWAP_AGAIN;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5c4922e..849426e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -634,6 +634,7 @@ out:
*/
static inline void free_page_mlock(struct page *page)
{
+ mem_cgroup_dec_page_stat(page, MEMCG_NR_MLOCK);
__dec_zone_page_state(page, NR_MLOCK);
__count_vm_event(UNEVICTABLE_MLOCKFREED);
}
--
1.7.3.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: memcg: add mlock statistic in memory.stat
2012-01-11 22:41 memcg: add mlock statistic in memory.stat Ying Han
@ 2012-01-11 23:17 ` Hugh Dickins
2012-01-11 23:59 ` KAMEZAWA Hiroyuki
2012-01-12 12:54 ` Michal Hocko
1 sibling, 1 reply; 12+ messages in thread
From: Hugh Dickins @ 2012-01-11 23:17 UTC (permalink / raw)
To: Ying Han
Cc: Michal Hocko, Balbir Singh, Rik van Riel, Hugh Dickins,
Johannes Weiner, Mel Gorman, KAMEZAWA Hiroyuki, Pavel Emelyanov,
linux-mm
On Wed, 11 Jan 2012, Ying Han wrote:
> We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
> patch adds the mlock field into per-memcg memory stat. The stat itself enhances
> the metrics exported by memcg, especially is used together with "uneivctable"
> lru stat.
>
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -10,6 +10,7 @@ enum {
> /* flags for mem_cgroup and file and I/O status */
> PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
> PCG_FILE_MAPPED, /* page is accounted as "mapped" */
> + PCG_MLOCK, /* page is accounted as "mlock" */
> /* No lock in page_cgroup */
> PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
> __NR_PCG_FLAGS,
Is this really necessary? KAMEZAWA-san is engaged in trying to reduce
the number of PageCgroup flags, and I expect that in due course we shall
want to merge them in with Page flags, so adding more is unwelcome.
I'd have thought that with memcg_ hooks in the right places,
a separate flag would not be necessary?
Hugh
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: memcg: add mlock statistic in memory.stat
2012-01-11 23:17 ` Hugh Dickins
@ 2012-01-11 23:59 ` KAMEZAWA Hiroyuki
2012-01-12 0:50 ` Ying Han
0 siblings, 1 reply; 12+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-01-11 23:59 UTC (permalink / raw)
To: Hugh Dickins
Cc: Ying Han, Michal Hocko, Balbir Singh, Rik van Riel,
Johannes Weiner, Mel Gorman, Pavel Emelyanov, linux-mm
On Wed, 11 Jan 2012 15:17:42 -0800 (PST)
Hugh Dickins <hughd@google.com> wrote:
> On Wed, 11 Jan 2012, Ying Han wrote:
>
> > We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
> > patch adds the mlock field into per-memcg memory stat. The stat itself enhances
> > the metrics exported by memcg, especially is used together with "uneivctable"
> > lru stat.
> >
> > --- a/include/linux/page_cgroup.h
> > +++ b/include/linux/page_cgroup.h
> > @@ -10,6 +10,7 @@ enum {
> > /* flags for mem_cgroup and file and I/O status */
> > PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
> > PCG_FILE_MAPPED, /* page is accounted as "mapped" */
> > + PCG_MLOCK, /* page is accounted as "mlock" */
> > /* No lock in page_cgroup */
> > PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
> > __NR_PCG_FLAGS,
>
> Is this really necessary? KAMEZAWA-san is engaged in trying to reduce
> the number of PageCgroup flags, and I expect that in due course we shall
> want to merge them in with Page flags, so adding more is unwelcome.
> I'd have thought that with memcg_ hooks in the right places,
> a separate flag would not be necessary?
>
Please don't ;)
NR_UNEIVCTABLE_LRU is not enough ?
Following is the patch I posted before to remove PCG_FILE_MAPPED.
Then, I think you can use similar logic and make use of UNEVICTABLE flags.
==
better (lockless) idea is welcomed.
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: memcg: add mlock statistic in memory.stat
2012-01-11 23:59 ` KAMEZAWA Hiroyuki
@ 2012-01-12 0:50 ` Ying Han
2012-01-12 3:21 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 12+ messages in thread
From: Ying Han @ 2012-01-12 0:50 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: Hugh Dickins, Michal Hocko, Balbir Singh, Rik van Riel,
Johannes Weiner, Mel Gorman, Pavel Emelyanov, linux-mm
On Wed, Jan 11, 2012 at 3:59 PM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@jp.fujitsu.com> wrote:
> On Wed, 11 Jan 2012 15:17:42 -0800 (PST)
> Hugh Dickins <hughd@google.com> wrote:
>
>> On Wed, 11 Jan 2012, Ying Han wrote:
>>
>> > We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
>> > patch adds the mlock field into per-memcg memory stat. The stat itself enhances
>> > the metrics exported by memcg, especially is used together with "uneivctable"
>> > lru stat.
>> >
>> > --- a/include/linux/page_cgroup.h
>> > +++ b/include/linux/page_cgroup.h
>> > @@ -10,6 +10,7 @@ enum {
>> > /* flags for mem_cgroup and file and I/O status */
>> > PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
>> > PCG_FILE_MAPPED, /* page is accounted as "mapped" */
>> > + PCG_MLOCK, /* page is accounted as "mlock" */
>> > /* No lock in page_cgroup */
>> > PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
>> > __NR_PCG_FLAGS,
>>
>> Is this really necessary? KAMEZAWA-san is engaged in trying to reduce
>> the number of PageCgroup flags, and I expect that in due course we shall
>> want to merge them in with Page flags, so adding more is unwelcome.
>> I'd have thought that with memcg_ hooks in the right places,
>> a separate flag would not be necessary?
>>
>
> Please don't ;)
>
> NR_UNEIVCTABLE_LRU is not enough ?
Seems not.
The unevictable lru includes more than mlock()'d pages ( SHM_LOCK'd
etc). There are use cases where we like to know the mlock-ed size
per-cgroup. We used to archived that in fake-numa based container by
reading the value from per-node meminfo, however we miss that
information in memcg. What do you think?
Thank you Hugh and Kame for the reference. Apparently I missed that
patch and I will take a look at it. (still catching up emails after
vacation).
--Ying
>
> Following is the patch I posted before to remove PCG_FILE_MAPPED.
> Then, I think you can use similar logic and make use of UNEVICTABLE flags.
>
> ==
> better (lockless) idea is welcomed.
>
> From fd2b5822838eebbacc41f343f9eb8c6f0ad8e1cc Mon Sep 17 00:00:00 2001
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Date: Thu, 15 Dec 2011 11:42:49 +0900
> Subject: [PATCH 2/5] memcg: safer page stat updating
>
> Now, page stat accounting is done like this.
>
> if (....set flag or some)
> update vmstat
> update memcg'stat
>
> Unlike vmstat, memcg must take care of changes in pc->mem_cgroup.
> This is done by page_cgroup_move_lock and other flags per stats.
>
> I think FileMapped works well. But, considering update of other
> statistics, current logic doesn't works well. Assume following case,
>
> set flag
> ..(delay by some preemption)..
> clear flag
> pc's flag is unset and
> don't update anything.
> memcg = pc->mem_cgroup
> set flag to pc->mem_cgroup
> update memcg stat
>
> In this case, the stat will be leaked out. I think memcg's account
> routine should see no flags. To avoid using memcg's original flags,
> we need to prevent overwriting pc->mem_cgroup while we updating
> the memcg.
>
> This patch adds
> - mem_cgroup_begin_update_page_stats(),
> - mem_cgroup_end_update_page_stats()
>
> And guarantees pc->mem_cgroup is not overwritten while updating.
> The caller should do
>
> mem_cgroup_begin_update_page_stats()
> if (.... set flag or some)
> update vmstat
> update memcg's stat
> mem_cgroup_end_update_page_stats().
>
> This beign...end will check a counter (which is 0 in most case) under
> rcu_read_lock/rcu_read_unlock. And take a spinlock if required.
>
> Following patch in this series will remove PCG_FILE_MAPPED flag.
> ---
> include/linux/memcontrol.h | 49 +++++++++++++++++++++++++++++++++++++++++-
> mm/memcontrol.c | 50 +++++++++++++++++++++++++++++--------------
> mm/rmap.c | 5 ++++
> 3 files changed, 86 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 598b3c9..4a61c4b 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -141,9 +141,52 @@ static inline bool mem_cgroup_disabled(void)
> return false;
> }
>
> -void mem_cgroup_update_page_stat(struct page *page,
> +/*
> + * Unlike vmstat, page's mem_cgroup can be overwritten and for which memcg
> + * the page stats should be accounted to is determined dynamically.
> + * Unfortunately, there are many races. To avoid races, the caller should do
> + *
> + * locked = mem_cgroup_begin_update_page_stat(page)
> + * if (set page flags etc)
> + * mem_cgroup_update_page_stat(page);
> + * mem_cgroup_end_update_page_stat(page, locked);
> + *
> + * Between [begin, end) calls, page's mem_cgroup will never be changed.
> + */
> +void __mem_cgroup_update_page_stat(struct page *page,
> + enum mem_cgroup_page_stat_item idx,
> + int val);
> +
> +static inline void mem_cgroup_update_page_stat(struct page *page,
> enum mem_cgroup_page_stat_item idx,
> - int val);
> + int val)
> +{
> + if (mem_cgroup_disabled())
> + return;
> + __mem_cgroup_update_page_stat(page, idx, val);
> +}
> +
> +bool __mem_cgroup_begin_update_page_stats(struct page *page,
> + unsigned long *flags);
> +static inline bool
> +mem_cgroup_begin_update_page_stats(struct page *page, unsigned long *flags)
> +{
> + if (mem_cgroup_disabled())
> + return false;
> + return __mem_cgroup_begin_update_page_stats(page, flags);
> +}
> +
> +void __mem_cgroup_end_update_page_stats(struct page *page, bool locked,
> + unsigned long *flags);
> +
> +static inline void
> +mem_cgroup_end_update_page_stats(struct page *page,
> + bool locked, unsigned long *flags)
> +{
> + if (mem_cgroup_disabled())
> + return;
> + __mem_cgroup_end_update_page_stats(page, locked, flags);
> +}
>
> static inline void mem_cgroup_inc_page_stat(struct page *page,
> enum mem_cgroup_page_stat_item idx)
> @@ -171,6 +214,8 @@ void mem_cgroup_split_huge_fixup(struct page *head);
> bool mem_cgroup_bad_page_check(struct page *page);
> void mem_cgroup_print_bad_page(struct page *page);
> #endif
> +
> +
> #else /* CONFIG_CGROUP_MEM_RES_CTLR */
> struct mem_cgroup;
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d183e1b..f4e6d5c 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1831,27 +1831,50 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
> * possibility of race condition. If there is, we take a lock.
> */
>
> -void mem_cgroup_update_page_stat(struct page *page,
> - enum mem_cgroup_page_stat_item idx, int val)
> +/*
> + * This function calls rcu_read_lock(). This lock is unlocked by
> + * __mem_cgroup_end_update_page_stat().
> + */
> +bool __mem_cgroup_begin_update_page_stats(struct page *page, unsigned long *flags)
> {
> struct mem_cgroup *memcg;
> struct page_cgroup *pc = lookup_page_cgroup(page);
> bool need_unlock = false;
> - unsigned long uninitialized_var(flags);
>
> rcu_read_lock();
> memcg = pc->mem_cgroup;
> - if (unlikely(!memcg || !PageCgroupUsed(pc)))
> + if (!memcg || !PageCgroupUsed(pc))
> goto out;
> - /* pc->mem_cgroup is unstable ? */
> if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) {
> - /* take a lock against to access pc->mem_cgroup */
> - move_lock_page_cgroup(pc, &flags);
> + move_lock_page_cgroup(pc, flags);
> need_unlock = true;
> - memcg = pc->mem_cgroup;
> - if (!memcg || !PageCgroupUsed(pc))
> - goto out;
> }
> +out:
> + return need_unlock;
> +}
> +EXPORT_SYMBOL(__mem_cgroup_begin_update_page_stats);
> +
> +void __mem_cgroup_end_update_page_stats(struct page *page, bool locked,
> + unsigned long *flags)
> +{
> + struct page_cgroup *pc;
> +
> + if (unlikely(locked)) {
> + pc = lookup_page_cgroup(page);
> + move_unlock_page_cgroup(pc, flags);
> + }
> + rcu_read_unlock();
> +}
> +EXPORT_SYMBOL(__mem_cgroup_end_update_page_stats);
> +
> +void __mem_cgroup_update_page_stat(struct page *page,
> + enum mem_cgroup_page_stat_item idx, int val)
> +{
> + struct page_cgroup *pc = lookup_page_cgroup(page);
> + struct mem_cgroup *memcg = pc->mem_cgroup;
> +
> + if (!memcg || !PageCgroupUsed(pc))
> + return;
>
> switch (idx) {
> case MEMCG_NR_FILE_MAPPED:
> @@ -1866,14 +1889,9 @@ void mem_cgroup_update_page_stat(struct page *page,
> }
>
> this_cpu_add(memcg->stat->count[idx], val);
> -
> -out:
> - if (unlikely(need_unlock))
> - move_unlock_page_cgroup(pc, &flags);
> - rcu_read_unlock();
> return;
> }
> -EXPORT_SYMBOL(mem_cgroup_update_page_stat);
> +EXPORT_SYMBOL(__mem_cgroup_update_page_stat);
>
> /*
> * size of first charge trial. "32" comes from vmscan.c's magic value.
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 54d140a..3648c88 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1105,10 +1105,15 @@ void page_add_new_anon_rmap(struct page *page,
> */
> void page_add_file_rmap(struct page *page)
> {
> + unsigned long flags;
> + bool locked;
> +
> + locked = mem_cgroup_begin_update_page_stats(page, &flags);
> if (atomic_inc_and_test(&page->_mapcount)) {
> __inc_zone_page_state(page, NR_FILE_MAPPED);
> mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
> }
> + mem_cgroup_end_update_page_stats(page, locked, &flags);
> }
>
> /**
> --
> 1.7.4.1
>
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: memcg: add mlock statistic in memory.stat
2012-01-12 0:50 ` Ying Han
@ 2012-01-12 3:21 ` KAMEZAWA Hiroyuki
2012-01-12 19:13 ` Ying Han
0 siblings, 1 reply; 12+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-01-12 3:21 UTC (permalink / raw)
To: Ying Han
Cc: Hugh Dickins, Michal Hocko, Balbir Singh, Rik van Riel,
Johannes Weiner, Mel Gorman, Pavel Emelyanov, linux-mm
On Wed, 11 Jan 2012 16:50:09 -0800
Ying Han <yinghan@google.com> wrote:
> On Wed, Jan 11, 2012 at 3:59 PM, KAMEZAWA Hiroyuki
> <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> > On Wed, 11 Jan 2012 15:17:42 -0800 (PST)
> > Hugh Dickins <hughd@google.com> wrote:
> >
> >> On Wed, 11 Jan 2012, Ying Han wrote:
> >>
> >> > We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
> >> > patch adds the mlock field into per-memcg memory stat. The stat itself enhances
> >> > the metrics exported by memcg, especially is used together with "uneivctable"
> >> > lru stat.
> >> >
> >> > --- a/include/linux/page_cgroup.h
> >> > +++ b/include/linux/page_cgroup.h
> >> > @@ -10,6 +10,7 @@ enum {
> >> > A A /* flags for mem_cgroup and file and I/O status */
> >> > A A PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
> >> > A A PCG_FILE_MAPPED, /* page is accounted as "mapped" */
> >> > + A PCG_MLOCK, /* page is accounted as "mlock" */
> >> > A A /* No lock in page_cgroup */
> >> > A A PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
> >> > A A __NR_PCG_FLAGS,
> >>
> >> Is this really necessary? A KAMEZAWA-san is engaged in trying to reduce
> >> the number of PageCgroup flags, and I expect that in due course we shall
> >> want to merge them in with Page flags, so adding more is unwelcome.
> >> I'd A have thought that with memcg_ hooks in the right places,
> >> a separate flag would not be necessary?
> >>
> >
> > Please don't ;)
> >
> > NR_UNEIVCTABLE_LRU is not enough ?
>
> Seems not.
>
> The unevictable lru includes more than mlock()'d pages ( SHM_LOCK'd
> etc). There are use cases where we like to know the mlock-ed size
> per-cgroup. We used to archived that in fake-numa based container by
> reading the value from per-node meminfo, however we miss that
> information in memcg. What do you think?
>
Hm. The # of mlocked pages can be got sum of /proc/<pid>/? ?
BTW, Roughly..
(inactive_anon + active_anon) - rss = # of unlocked shm.
cache - (inactive_file + active_file) = total # of shm
Then,
(cache - (inactive_file + active_file)) - ((inactive_anon + active_anon) - rss)
= cache + rss - (sum of inactive/actige lru)
= locked shm.
Hm, but this works only when unmapped swapcache is small ;)
Thanks,
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: memcg: add mlock statistic in memory.stat
2012-01-12 3:21 ` KAMEZAWA Hiroyuki
@ 2012-01-12 19:13 ` Ying Han
2012-01-13 0:10 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 12+ messages in thread
From: Ying Han @ 2012-01-12 19:13 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: Hugh Dickins, Michal Hocko, Balbir Singh, Rik van Riel,
Johannes Weiner, Mel Gorman, Pavel Emelyanov, linux-mm
On Wed, Jan 11, 2012 at 7:21 PM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@jp.fujitsu.com> wrote:
> On Wed, 11 Jan 2012 16:50:09 -0800
> Ying Han <yinghan@google.com> wrote:
>
>> On Wed, Jan 11, 2012 at 3:59 PM, KAMEZAWA Hiroyuki
>> <kamezawa.hiroyu@jp.fujitsu.com> wrote:
>> > On Wed, 11 Jan 2012 15:17:42 -0800 (PST)
>> > Hugh Dickins <hughd@google.com> wrote:
>> >
>> >> On Wed, 11 Jan 2012, Ying Han wrote:
>> >>
>> >> > We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
>> >> > patch adds the mlock field into per-memcg memory stat. The stat itself enhances
>> >> > the metrics exported by memcg, especially is used together with "uneivctable"
>> >> > lru stat.
>> >> >
>> >> > --- a/include/linux/page_cgroup.h
>> >> > +++ b/include/linux/page_cgroup.h
>> >> > @@ -10,6 +10,7 @@ enum {
>> >> > /* flags for mem_cgroup and file and I/O status */
>> >> > PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
>> >> > PCG_FILE_MAPPED, /* page is accounted as "mapped" */
>> >> > + PCG_MLOCK, /* page is accounted as "mlock" */
>> >> > /* No lock in page_cgroup */
>> >> > PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
>> >> > __NR_PCG_FLAGS,
>> >>
>> >> Is this really necessary? KAMEZAWA-san is engaged in trying to reduce
>> >> the number of PageCgroup flags, and I expect that in due course we shall
>> >> want to merge them in with Page flags, so adding more is unwelcome.
>> >> I'd have thought that with memcg_ hooks in the right places,
>> >> a separate flag would not be necessary?
>> >>
>> >
>> > Please don't ;)
>> >
>> > NR_UNEIVCTABLE_LRU is not enough ?
>>
>> Seems not.
>>
>> The unevictable lru includes more than mlock()'d pages ( SHM_LOCK'd
>> etc). There are use cases where we like to know the mlock-ed size
>> per-cgroup. We used to archived that in fake-numa based container by
>> reading the value from per-node meminfo, however we miss that
>> information in memcg. What do you think?
>>
>
> Hm. The # of mlocked pages can be got sum of /proc/<pid>/? ?
That is tough. Then we have to do the calculation by adding up all the
pids within a cgroup.
> BTW, Roughly..
>
> (inactive_anon + active_anon) - rss = # of unlocked shm.
>
> cache - (inactive_file + active_file) = total # of shm
>
> Then,
>
> (cache - (inactive_file + active_file)) - ((inactive_anon + active_anon) - rss)
> = cache + rss - (sum of inactive/actige lru)
> = locked shm.
>
> Hm, but this works only when unmapped swapcache is small ;)
We might be getting a rough number. But we have use cases relying on
more accurate output. Thoughts?
Thanks
--Ying
>
> Thanks,
> -Kame
>
>
>
>
>
>
>
>
>
>
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: memcg: add mlock statistic in memory.stat
2012-01-12 19:13 ` Ying Han
@ 2012-01-13 0:10 ` KAMEZAWA Hiroyuki
2012-01-13 22:27 ` Ying Han
0 siblings, 1 reply; 12+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-01-13 0:10 UTC (permalink / raw)
To: Ying Han
Cc: Hugh Dickins, Michal Hocko, Balbir Singh, Rik van Riel,
Johannes Weiner, Mel Gorman, Pavel Emelyanov, linux-mm
On Thu, 12 Jan 2012 11:13:00 -0800
Ying Han <yinghan@google.com> wrote:
> On Wed, Jan 11, 2012 at 7:21 PM, KAMEZAWA Hiroyuki
> <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> > On Wed, 11 Jan 2012 16:50:09 -0800
> > Ying Han <yinghan@google.com> wrote:
> >
> >> On Wed, Jan 11, 2012 at 3:59 PM, KAMEZAWA Hiroyuki
> >> <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> >> > On Wed, 11 Jan 2012 15:17:42 -0800 (PST)
> >> > Hugh Dickins <hughd@google.com> wrote:
> >> >
> >> >> On Wed, 11 Jan 2012, Ying Han wrote:
> >> >>
> >> >> > We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
> >> >> > patch adds the mlock field into per-memcg memory stat. The stat itself enhances
> >> >> > the metrics exported by memcg, especially is used together with "uneivctable"
> >> >> > lru stat.
> >> >> >
> >> >> > --- a/include/linux/page_cgroup.h
> >> >> > +++ b/include/linux/page_cgroup.h
> >> >> > @@ -10,6 +10,7 @@ enum {
> >> >> > A A /* flags for mem_cgroup and file and I/O status */
> >> >> > A A PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
> >> >> > A A PCG_FILE_MAPPED, /* page is accounted as "mapped" */
> >> >> > + A PCG_MLOCK, /* page is accounted as "mlock" */
> >> >> > A A /* No lock in page_cgroup */
> >> >> > A A PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
> >> >> > A A __NR_PCG_FLAGS,
> >> >>
> >> >> Is this really necessary? A KAMEZAWA-san is engaged in trying to reduce
> >> >> the number of PageCgroup flags, and I expect that in due course we shall
> >> >> want to merge them in with Page flags, so adding more is unwelcome.
> >> >> I'd A have thought that with memcg_ hooks in the right places,
> >> >> a separate flag would not be necessary?
> >> >>
> >> >
> >> > Please don't ;)
> >> >
> >> > NR_UNEIVCTABLE_LRU is not enough ?
> >>
> >> Seems not.
> >>
> >> The unevictable lru includes more than mlock()'d pages ( SHM_LOCK'd
> >> etc). There are use cases where we like to know the mlock-ed size
> >> per-cgroup. We used to archived that in fake-numa based container by
> >> reading the value from per-node meminfo, however we miss that
> >> information in memcg. What do you think?
> >>
> >
> > Hm. The # of mlocked pages can be got sum of /proc/<pid>/? ?
>
> That is tough. Then we have to do the calculation by adding up all the
> pids within a cgroup.
>
> > BTW, Roughly..
> >
> > (inactive_anon + active_anon) - rss = # of unlocked shm.
> >
> > cache - (inactive_file + active_file) = total # of shm
> >
> > Then,
> >
> > (cache - A (inactive_file + active_file)) - ((inactive_anon + active_anon) - rss)
> > = cache + rss - (sum of inactive/actige lru)
> > = locked shm.
> >
> > Hm, but this works only when unmapped swapcache is A small ;)
>
> We might be getting a rough number. But we have use cases relying on
> more accurate output. Thoughts?
>
If we need mega-byte order accuracy, above will work enough.
But ok, having stats seems useful because meminfo has it ;)
For your input, I'd like to post an updated RFC patch to do page state accounting
without additional bits to pc->flags, today. With that, you can rely on PG_mlocked.
By that patch, I know we can make use of page-flags by some logic but am still
looking for more efficient way...
Thanks,
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: memcg: add mlock statistic in memory.stat
2012-01-13 0:10 ` KAMEZAWA Hiroyuki
@ 2012-01-13 22:27 ` Ying Han
0 siblings, 0 replies; 12+ messages in thread
From: Ying Han @ 2012-01-13 22:27 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: Hugh Dickins, Michal Hocko, Balbir Singh, Rik van Riel,
Johannes Weiner, Mel Gorman, Pavel Emelyanov, linux-mm
On Thu, Jan 12, 2012 at 4:10 PM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@jp.fujitsu.com> wrote:
> On Thu, 12 Jan 2012 11:13:00 -0800
> Ying Han <yinghan@google.com> wrote:
>
>> On Wed, Jan 11, 2012 at 7:21 PM, KAMEZAWA Hiroyuki
>> <kamezawa.hiroyu@jp.fujitsu.com> wrote:
>> > On Wed, 11 Jan 2012 16:50:09 -0800
>> > Ying Han <yinghan@google.com> wrote:
>> >
>> >> On Wed, Jan 11, 2012 at 3:59 PM, KAMEZAWA Hiroyuki
>> >> <kamezawa.hiroyu@jp.fujitsu.com> wrote:
>> >> > On Wed, 11 Jan 2012 15:17:42 -0800 (PST)
>> >> > Hugh Dickins <hughd@google.com> wrote:
>> >> >
>> >> >> On Wed, 11 Jan 2012, Ying Han wrote:
>> >> >>
>> >> >> > We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
>> >> >> > patch adds the mlock field into per-memcg memory stat. The stat itself enhances
>> >> >> > the metrics exported by memcg, especially is used together with "uneivctable"
>> >> >> > lru stat.
>> >> >> >
>> >> >> > --- a/include/linux/page_cgroup.h
>> >> >> > +++ b/include/linux/page_cgroup.h
>> >> >> > @@ -10,6 +10,7 @@ enum {
>> >> >> > /* flags for mem_cgroup and file and I/O status */
>> >> >> > PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
>> >> >> > PCG_FILE_MAPPED, /* page is accounted as "mapped" */
>> >> >> > + PCG_MLOCK, /* page is accounted as "mlock" */
>> >> >> > /* No lock in page_cgroup */
>> >> >> > PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
>> >> >> > __NR_PCG_FLAGS,
>> >> >>
>> >> >> Is this really necessary? KAMEZAWA-san is engaged in trying to reduce
>> >> >> the number of PageCgroup flags, and I expect that in due course we shall
>> >> >> want to merge them in with Page flags, so adding more is unwelcome.
>> >> >> I'd have thought that with memcg_ hooks in the right places,
>> >> >> a separate flag would not be necessary?
>> >> >>
>> >> >
>> >> > Please don't ;)
>> >> >
>> >> > NR_UNEIVCTABLE_LRU is not enough ?
>> >>
>> >> Seems not.
>> >>
>> >> The unevictable lru includes more than mlock()'d pages ( SHM_LOCK'd
>> >> etc). There are use cases where we like to know the mlock-ed size
>> >> per-cgroup. We used to archived that in fake-numa based container by
>> >> reading the value from per-node meminfo, however we miss that
>> >> information in memcg. What do you think?
>> >>
>> >
>> > Hm. The # of mlocked pages can be got sum of /proc/<pid>/? ?
>>
>> That is tough. Then we have to do the calculation by adding up all the
>> pids within a cgroup.
>>
>> > BTW, Roughly..
>> >
>> > (inactive_anon + active_anon) - rss = # of unlocked shm.
>> >
>> > cache - (inactive_file + active_file) = total # of shm
>> >
>> > Then,
>> >
>> > (cache - (inactive_file + active_file)) - ((inactive_anon + active_anon) - rss)
>> > = cache + rss - (sum of inactive/actige lru)
>> > = locked shm.
>> >
>> > Hm, but this works only when unmapped swapcache is small ;)
>>
>> We might be getting a rough number. But we have use cases relying on
>> more accurate output. Thoughts?
>>
> If we need mega-byte order accuracy, above will work enough.
> But ok, having stats seems useful because meminfo has it ;)
Thanks.
>
> For your input, I'd like to post an updated RFC patch to do page state accounting
> without additional bits to pc->flags, today. With that, you can rely on PG_mlocked.
>
> By that patch, I know we can make use of page-flags by some logic but am still
> looking for more efficient way...
Thank you for the heads up. It makes sense to me to look at your
patchset first, and then the mlock stat patch comes after that.
--Ying
>
> Thanks,
> -Kame
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: memcg: add mlock statistic in memory.stat
2012-01-11 22:41 memcg: add mlock statistic in memory.stat Ying Han
2012-01-11 23:17 ` Hugh Dickins
@ 2012-01-12 12:54 ` Michal Hocko
2012-01-12 19:09 ` Ying Han
1 sibling, 1 reply; 12+ messages in thread
From: Michal Hocko @ 2012-01-12 12:54 UTC (permalink / raw)
To: Ying Han
Cc: Balbir Singh, Rik van Riel, Hugh Dickins, Johannes Weiner,
Mel Gorman, KAMEZAWA Hiroyuki, Pavel Emelyanov, linux-mm
On Wed 11-01-12 14:41:08, Ying Han wrote:
> We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
> patch adds the mlock field into per-memcg memory stat. The stat itself enhances
> the metrics exported by memcg, especially is used together with "uneivctable"
> lru stat.
Could you describe when the unevictable has such a different meaning than
mlocked that it is unusable?
--
Michal Hocko
SUSE Labs
SUSE LINUX s.r.o.
Lihovarska 1060/12
190 00 Praha 9
Czech Republic
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: memcg: add mlock statistic in memory.stat
2012-01-12 12:54 ` Michal Hocko
@ 2012-01-12 19:09 ` Ying Han
2012-01-12 20:44 ` Michal Hocko
0 siblings, 1 reply; 12+ messages in thread
From: Ying Han @ 2012-01-12 19:09 UTC (permalink / raw)
To: Michal Hocko
Cc: Balbir Singh, Rik van Riel, Hugh Dickins, Johannes Weiner,
Mel Gorman, KAMEZAWA Hiroyuki, Pavel Emelyanov, linux-mm
On Thu, Jan 12, 2012 at 4:54 AM, Michal Hocko <mhocko@suse.cz> wrote:
> On Wed 11-01-12 14:41:08, Ying Han wrote:
>> We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
>> patch adds the mlock field into per-memcg memory stat. The stat itself enhances
>> the metrics exported by memcg, especially is used together with "uneivctable"
>> lru stat.
>
> Could you describe when the unevictable has such a different meaning than
> mlocked that it is unusable?
The unevictable lru includes more than mlock()'d pages ( SHM_LOCK'd
etc). Like the following:
$ memtoy>shmem shm_400m 400m
$ memtoy>map shm_400m 0 400m
$ memtoy>touch shm_400m
memtoy: touched 102400 pages in 0.360 secs
$ memtoy>slock shm_400m
//meantime add some memory pressure.
$ memtoy>file /export/hda3/file_512m
$ memtoy>map file_512m 0 512m shared
$ memtoy>lock file_512m
$ cat /dev/cgroup/memory/B/memory.stat
mapped_file 956301312
mlock 536870912
unevictable 956203008
Here, mapped_file - mlock = 400M shm_lock'ed pages are included in
unevictable stat.
Besides, not all mlock'ed pages get to unevictable lru at the first
place, and the same for the other way around.
Thanks
--Ying
>
> --
> Michal Hocko
> SUSE Labs
> SUSE LINUX s.r.o.
> Lihovarska 1060/12
> 190 00 Praha 9
> Czech Republic
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: memcg: add mlock statistic in memory.stat
2012-01-12 19:09 ` Ying Han
@ 2012-01-12 20:44 ` Michal Hocko
2012-01-13 22:24 ` Ying Han
0 siblings, 1 reply; 12+ messages in thread
From: Michal Hocko @ 2012-01-12 20:44 UTC (permalink / raw)
To: Ying Han
Cc: Balbir Singh, Rik van Riel, Hugh Dickins, Johannes Weiner,
Mel Gorman, KAMEZAWA Hiroyuki, Pavel Emelyanov, linux-mm
On Thu 12-01-12 11:09:58, Ying Han wrote:
> On Thu, Jan 12, 2012 at 4:54 AM, Michal Hocko <mhocko@suse.cz> wrote:
> > On Wed 11-01-12 14:41:08, Ying Han wrote:
> >> We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
> >> patch adds the mlock field into per-memcg memory stat. The stat itself enhances
> >> the metrics exported by memcg, especially is used together with "uneivctable"
> >> lru stat.
> >
> > Could you describe when the unevictable has such a different meaning than
> > mlocked that it is unusable?
>
> The unevictable lru includes more than mlock()'d pages ( SHM_LOCK'd
> etc). Like the following:
Yes, I am aware of that. Maybe I wasn't clear enough in my question. I
was rather interested _when_ it actually matters for your decisions about
the setup. Those pages are not evictable anyway.
> $ memtoy>shmem shm_400m 400m
> $ memtoy>map shm_400m 0 400m
> $ memtoy>touch shm_400m
> memtoy: touched 102400 pages in 0.360 secs
> $ memtoy>slock shm_400m
> //meantime add some memory pressure.
>
> $ memtoy>file /export/hda3/file_512m
> $ memtoy>map file_512m 0 512m shared
> $ memtoy>lock file_512m
>
> $ cat /dev/cgroup/memory/B/memory.stat
> mapped_file 956301312
> mlock 536870912
> unevictable 956203008
>
> Here, mapped_file - mlock = 400M shm_lock'ed pages are included in
> unevictable stat.
>
> Besides, not all mlock'ed pages get to unevictable lru at the first
> place, and the same for the other way around.
>
> Thanks
>
> --Ying
--
Michal Hocko
SUSE Labs
SUSE LINUX s.r.o.
Lihovarska 1060/12
190 00 Praha 9
Czech Republic
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: memcg: add mlock statistic in memory.stat
2012-01-12 20:44 ` Michal Hocko
@ 2012-01-13 22:24 ` Ying Han
0 siblings, 0 replies; 12+ messages in thread
From: Ying Han @ 2012-01-13 22:24 UTC (permalink / raw)
To: Michal Hocko
Cc: Balbir Singh, Rik van Riel, Hugh Dickins, Johannes Weiner,
Mel Gorman, KAMEZAWA Hiroyuki, Pavel Emelyanov, linux-mm
On Thu, Jan 12, 2012 at 12:44 PM, Michal Hocko <mhocko@suse.cz> wrote:
> On Thu 12-01-12 11:09:58, Ying Han wrote:
>> On Thu, Jan 12, 2012 at 4:54 AM, Michal Hocko <mhocko@suse.cz> wrote:
>> > On Wed 11-01-12 14:41:08, Ying Han wrote:
>> >> We have the nr_mlock stat both in meminfo as well as vmstat system wide, this
>> >> patch adds the mlock field into per-memcg memory stat. The stat itself enhances
>> >> the metrics exported by memcg, especially is used together with "uneivctable"
>> >> lru stat.
>> >
>> > Could you describe when the unevictable has such a different meaning than
>> > mlocked that it is unusable?
>>
>> The unevictable lru includes more than mlock()'d pages ( SHM_LOCK'd
>> etc). Like the following:
>
> Yes, I am aware of that. Maybe I wasn't clear enough in my question. I
> was rather interested _when_ it actually matters for your decisions about
> the setup. Those pages are not evictable anyway.
It is true that we (as kernel) can not do much on those pages as long
as they are unevictable. The mlock stat I am proposing is more useful
for system admin, and sometimes for kernel developers as well. Many
times in the past that we need to read the mlock stat from the
per-container meminfo for different reasons. Sorry I can not give you
a very concrete example, but I do remember it happened a lot.
On the other hand, we do have the ability to read the mlock from
meminfo, and we should add the same visibility to memcg as well.
--Ying
>
>> $ memtoy>shmem shm_400m 400m
>> $ memtoy>map shm_400m 0 400m
>> $ memtoy>touch shm_400m
>> memtoy: touched 102400 pages in 0.360 secs
>> $ memtoy>slock shm_400m
>> //meantime add some memory pressure.
>>
>> $ memtoy>file /export/hda3/file_512m
>> $ memtoy>map file_512m 0 512m shared
>> $ memtoy>lock file_512m
>>
>> $ cat /dev/cgroup/memory/B/memory.stat
>> mapped_file 956301312
>> mlock 536870912
>> unevictable 956203008
>>
>> Here, mapped_file - mlock = 400M shm_lock'ed pages are included in
>> unevictable stat.
>>
>> Besides, not all mlock'ed pages get to unevictable lru at the first
>> place, and the same for the other way around.
>>
>> Thanks
>>
>> --Ying
>
> --
> Michal Hocko
> SUSE Labs
> SUSE LINUX s.r.o.
> Lihovarska 1060/12
> 190 00 Praha 9
> Czech Republic
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2012-01-13 22:27 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-01-11 22:41 memcg: add mlock statistic in memory.stat Ying Han
2012-01-11 23:17 ` Hugh Dickins
2012-01-11 23:59 ` KAMEZAWA Hiroyuki
2012-01-12 0:50 ` Ying Han
2012-01-12 3:21 ` KAMEZAWA Hiroyuki
2012-01-12 19:13 ` Ying Han
2012-01-13 0:10 ` KAMEZAWA Hiroyuki
2012-01-13 22:27 ` Ying Han
2012-01-12 12:54 ` Michal Hocko
2012-01-12 19:09 ` Ying Han
2012-01-12 20:44 ` Michal Hocko
2012-01-13 22:24 ` Ying Han
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).