* [RFC] [PATCH] memory controller statistics
@ 2007-09-07 3:39 YAMAMOTO Takashi
[not found] ` <20070907033942.4A6541BFA52-Pcsii4f/SVk@public.gmane.org>
0 siblings, 1 reply; 25+ messages in thread
From: YAMAMOTO Takashi @ 2007-09-07 3:39 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
svaidy-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg
hi,
i implemented some statistics for your memory controller.
it's tested with 2.6.23-rc2-mm2 + memory controller v7.
i think it can be applied to 2.6.23-rc4-mm1 as well.
YAMOMOTO Takshi
todo: something like nr_active/inactive in /proc/vmstat.
--- ./mm/memcontrol.c.BACKUP 2007-08-29 17:13:09.000000000 +0900
+++ ./mm/memcontrol.c 2007-09-06 16:26:13.000000000 +0900
@@ -24,6 +24,7 @@
#include <linux/page-flags.h>
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
@@ -33,6 +34,54 @@
struct container_subsys mem_container_subsys;
static const int MEM_CONTAINER_RECLAIM_RETRIES = 5;
+enum mem_container_stat_index {
+ /*
+ * for MEM_CONTAINER_TYPE_ALL, usage == pagecache + rss
+ */
+ MEMCONT_STAT_PAGECACHE,
+ MEMCONT_STAT_RSS,
+
+ /*
+ * redundant; usage == charge - uncharge
+ */
+ MEMCONT_STAT_CHARGE,
+ MEMCONT_STAT_UNCHARGE,
+
+ /*
+ * mostly for debug
+ */
+ MEMCONT_STAT_ISOLATE,
+ MEMCONT_STAT_ISOLATE_FAIL,
+ MEMCONT_STAT_NSTATS,
+};
+
+static const char * const mem_container_stat_desc[] = {
+ [MEMCONT_STAT_PAGECACHE] = "page_cache",
+ [MEMCONT_STAT_RSS] = "rss",
+ [MEMCONT_STAT_CHARGE] = "charge",
+ [MEMCONT_STAT_UNCHARGE] = "uncharge",
+ [MEMCONT_STAT_ISOLATE] = "isolate",
+ [MEMCONT_STAT_ISOLATE_FAIL] = "isolate_fail",
+};
+
+struct mem_container_stat {
+ atomic_t count[MEMCONT_STAT_NSTATS];
+};
+
+static void mem_container_stat_inc(struct mem_container_stat * stat,
+ enum mem_container_stat_index idx)
+{
+
+ atomic_inc(&stat->count[idx]);
+}
+
+static void mem_container_stat_dec(struct mem_container_stat * stat,
+ enum mem_container_stat_index idx)
+{
+
+ atomic_dec(&stat->count[idx]);
+}
+
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per container. We would eventually like to provide
@@ -62,6 +111,7 @@ struct mem_container {
*/
spinlock_t lru_lock;
unsigned long control_type; /* control RSS or RSS+Pagecache */
+ struct mem_container_stat stat;
};
/*
@@ -72,6 +122,12 @@ struct mem_container {
#define PAGE_CONTAINER_LOCK_BIT 0x0
#define PAGE_CONTAINER_LOCK (1 << PAGE_CONTAINER_LOCK_BIT)
+/* XXX hack; shouldn't be here. it really belongs to struct page_container. */
+#define PAGE_CONTAINER_CACHE_BIT 0x1
+#define PAGE_CONTAINER_CACHE (1 << PAGE_CONTAINER_CACHE_BIT)
+
+#define PAGE_CONTAINER_FLAGS (PAGE_CONTAINER_LOCK | PAGE_CONTAINER_CACHE)
+
/*
* A page_container page is associated with every page descriptor. The
* page_container helps us identify information about the container
@@ -134,9 +190,9 @@ static inline int page_container_locked(
&page->page_container);
}
-void page_assign_page_container(struct page *page, struct page_container *pc)
+static void page_assign_page_container_flags(struct page *page, int flags,
+ struct page_container *pc)
{
- int locked;
/*
* While resetting the page_container we might not hold the
@@ -145,14 +201,20 @@ void page_assign_page_container(struct p
*/
if (pc)
VM_BUG_ON(!page_container_locked(page));
- locked = (page->page_container & PAGE_CONTAINER_LOCK);
- page->page_container = ((unsigned long)pc | locked);
+ flags |= (page->page_container & PAGE_CONTAINER_LOCK);
+ page->page_container = ((unsigned long)pc | flags);
+}
+
+void page_assign_page_container(struct page *page, struct page_container *pc)
+{
+
+ page_assign_page_container_flags(page, 0, pc);
}
struct page_container *page_get_page_container(struct page *page)
{
return (struct page_container *)
- (page->page_container & ~PAGE_CONTAINER_LOCK);
+ (page->page_container & ~PAGE_CONTAINER_FLAGS);
}
void __always_inline lock_page_container(struct page *page)
@@ -203,6 +265,7 @@ unsigned long mem_container_isolate_page
LIST_HEAD(pc_list);
struct list_head *src;
struct page_container *pc;
+ struct mem_container_stat *stat = &mem_cont->stat;
if (active)
src = &mem_cont->active_list;
@@ -244,6 +307,9 @@ unsigned long mem_container_isolate_page
if (__isolate_lru_page(page, mode) == 0) {
list_move(&page->lru, dst);
nr_taken++;
+ mem_container_stat_inc(stat, MEMCONT_STAT_ISOLATE);
+ } else {
+ mem_container_stat_inc(stat, MEMCONT_STAT_ISOLATE_FAIL);
}
}
@@ -260,9 +326,11 @@ unsigned long mem_container_isolate_page
* 0 if the charge was successful
* < 0 if the container is over its limit
*/
-int mem_container_charge(struct page *page, struct mm_struct *mm)
+static int mem_container_charge_common(struct page *page, struct mm_struct *mm,
+ int is_cache)
{
struct mem_container *mem;
+ struct mem_container_stat *stat;
struct page_container *pc, *race_pc;
unsigned long flags;
unsigned long nr_retries = MEM_CONTAINER_RECLAIM_RETRIES;
@@ -360,7 +428,16 @@ int mem_container_charge(struct page *pa
atomic_set(&pc->ref_cnt, 1);
pc->mem_container = mem;
pc->page = page;
- page_assign_page_container(page, pc);
+ page_assign_page_container_flags(page,
+ is_cache ? PAGE_CONTAINER_CACHE : 0, pc);
+
+ stat = &mem->stat;
+ if (is_cache) {
+ mem_container_stat_inc(stat, MEMCONT_STAT_PAGECACHE);
+ } else {
+ mem_container_stat_inc(stat, MEMCONT_STAT_RSS);
+ }
+ mem_container_stat_inc(stat, MEMCONT_STAT_CHARGE);
spin_lock_irqsave(&mem->lru_lock, flags);
list_add(&pc->lru, &mem->active_list);
@@ -377,6 +454,12 @@ err:
return -ENOMEM;
}
+int mem_container_charge(struct page *page, struct mm_struct *mm)
+{
+
+ return mem_container_charge_common(page, mm, 0);
+}
+
/*
* See if the cached pages should be charged at all?
*/
@@ -388,7 +471,7 @@ int mem_container_cache_charge(struct pa
mem = rcu_dereference(mm->mem_container);
if (mem->control_type == MEM_CONTAINER_TYPE_ALL)
- return mem_container_charge(page, mm);
+ return mem_container_charge_common(page, mm, 1);
else
return 0;
}
@@ -411,15 +494,29 @@ void mem_container_uncharge(struct page_
return;
if (atomic_dec_and_test(&pc->ref_cnt)) {
+ struct mem_container_stat *stat;
+ int is_cache;
+
page = pc->page;
lock_page_container(page);
mem = pc->mem_container;
css_put(&mem->css);
+ /* XXX */
+ is_cache = (page->page_container & PAGE_CONTAINER_CACHE) != 0;
page_assign_page_container(page, NULL);
unlock_page_container(page);
res_counter_uncharge(&mem->res, 1);
+ stat = &mem->stat;
+ if (is_cache) {
+ mem_container_stat_dec(stat, MEMCONT_STAT_PAGECACHE);
+ } else {
+ mem_container_stat_dec(stat, MEMCONT_STAT_RSS);
+ }
+ mem_container_stat_inc(stat, MEMCONT_STAT_UNCHARGE);
+
spin_lock_irqsave(&mem->lru_lock, flags);
+ BUG_ON(list_empty(&pc->lru));
list_del_init(&pc->lru);
spin_unlock_irqrestore(&mem->lru_lock, flags);
kfree(pc);
@@ -496,6 +593,44 @@ static ssize_t mem_control_type_read(str
ppos, buf, s - buf);
}
+static void mem_container_stat_init(struct mem_container_stat *stat)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(stat->count); i++) {
+ atomic_set(&stat->count[i], 0);
+ }
+}
+
+static int mem_control_stat_show(struct seq_file *m, void *arg)
+{
+ struct container *cont = m->private;
+ struct mem_container *mem_cont = mem_container_from_cont(cont);
+ struct mem_container_stat *stat = &mem_cont->stat;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(stat->count); i++) {
+ seq_printf(m, "%s %u\n", mem_container_stat_desc[i],
+ (unsigned int)atomic_read(&stat->count[i]));
+ }
+ return 0;
+}
+
+static const struct file_operations mem_control_stat_file_operations = {
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int mem_control_stat_open(struct inode *unused, struct file *file)
+{
+ /* XXX __d_cont */
+ struct container *cont = file->f_dentry->d_parent->d_fsdata;
+
+ file->f_op = &mem_control_stat_file_operations;
+ return single_open(file, mem_control_stat_show, cont);
+}
+
static struct cftype mem_container_files[] = {
{
.name = "usage",
@@ -518,6 +653,10 @@ static struct cftype mem_container_files
.write = mem_control_type_write,
.read = mem_control_type_read,
},
+ {
+ .name = "stat",
+ .open = mem_control_stat_open,
+ },
};
static struct mem_container init_mem_container;
@@ -541,6 +680,7 @@ mem_container_create(struct container_su
INIT_LIST_HEAD(&mem->inactive_list);
spin_lock_init(&mem->lru_lock);
mem->control_type = MEM_CONTAINER_TYPE_ALL;
+ mem_container_stat_init(&mem->stat);
return &mem->css;
}
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <20070907033942.4A6541BFA52-Pcsii4f/SVk@public.gmane.org>
@ 2007-09-07 5:02 ` KAMEZAWA Hiroyuki
[not found] ` <20070907140234.4fe1075d.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-09-07 9:55 ` Balbir Singh
2007-10-04 5:13 ` YAMAMOTO Takashi
2 siblings, 1 reply; 25+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-09-07 5:02 UTC (permalink / raw)
To: YAMAMOTO Takashi
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Hi,
On Fri, 7 Sep 2007 12:39:42 +0900 (JST)
yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org (YAMAMOTO Takashi) wrote:
> +enum mem_container_stat_index {
> + /*
> + * for MEM_CONTAINER_TYPE_ALL, usage == pagecache + rss
> + */
> + MEMCONT_STAT_PAGECACHE,
> + MEMCONT_STAT_RSS,
> +
> + /*
> + * redundant; usage == charge - uncharge
> + */
> + MEMCONT_STAT_CHARGE,
> + MEMCONT_STAT_UNCHARGE,
> +
> + /*
> + * mostly for debug
> + */
> + MEMCONT_STAT_ISOLATE,
> + MEMCONT_STAT_ISOLATE_FAIL,
> + MEMCONT_STAT_NSTATS,
> +};
> +
please add comments on each statistics name. It's uneasy to catch the meaning of
ISOLATE/ISOLATE_FAIL without comments.
> +static const char * const mem_container_stat_desc[] = {
> + [MEMCONT_STAT_PAGECACHE] = "page_cache",
> + [MEMCONT_STAT_RSS] = "rss",
> + [MEMCONT_STAT_CHARGE] = "charge",
> + [MEMCONT_STAT_UNCHARGE] = "uncharge",
> + [MEMCONT_STAT_ISOLATE] = "isolate",
> + [MEMCONT_STAT_ISOLATE_FAIL] = "isolate_fail",
> +};
> +
> +struct mem_container_stat {
> + atomic_t count[MEMCONT_STAT_NSTATS];
> +};
> +
> +static void mem_container_stat_inc(struct mem_container_stat * stat,
> + enum mem_container_stat_index idx)
> +{
> +
> + atomic_inc(&stat->count[idx]);
> +}
> +
> +static void mem_container_stat_dec(struct mem_container_stat * stat,
> + enum mem_container_stat_index idx)
> +{
> +
> + atomic_dec(&stat->count[idx]);
> +}
> +
Can we do this accounting as mod_zone_page_state()(in mm/vmstat.c) ?
(use per-cpu data for accounting.)
> +/* XXX hack; shouldn't be here. it really belongs to struct page_container. */
> +#define PAGE_CONTAINER_CACHE_BIT 0x1
> +#define PAGE_CONTAINER_CACHE (1 << PAGE_CONTAINER_CACHE_BIT)
> +
Is this used for remebering whether a page is charged as page-cache or not ?
> + page_assign_page_container_flags(page,
> + is_cache ? PAGE_CONTAINER_CACHE : 0, pc);
> +
> + stat = &mem->stat;
> + if (is_cache) {
> + mem_container_stat_inc(stat, MEMCONT_STAT_PAGECACHE);
> + } else {
> + mem_container_stat_inc(stat, MEMCONT_STAT_RSS);
> + }
nitpick,in linux style, one-sentence block shouldn't have braces {}.
==
if (is_cache)
mem_cont...
else
mem_cont...
==
> + mem_container_stat_inc(stat, MEMCONT_STAT_CHARGE);
>
> spin_lock_irqsave(&mem->lru_lock, flags);
> list_add(&pc->lru, &mem->active_list);
> @@ -377,6 +454,12 @@ err:
> return -ENOMEM;
> }
>
> +int mem_container_charge(struct page *page, struct mm_struct *mm)
> +{
> +
> + return mem_container_charge_common(page, mm, 0);
> +}
> +
> /*
> * See if the cached pages should be charged at all?
> */
> @@ -388,7 +471,7 @@ int mem_container_cache_charge(struct pa
>
> mem = rcu_dereference(mm->mem_container);
> if (mem->control_type == MEM_CONTAINER_TYPE_ALL)
> - return mem_container_charge(page, mm);
> + return mem_container_charge_common(page, mm, 1);
> else
> return 0;
> }
> @@ -411,15 +494,29 @@ void mem_container_uncharge(struct page_
> return;
>
> if (atomic_dec_and_test(&pc->ref_cnt)) {
> + struct mem_container_stat *stat;
> + int is_cache;
> +
> page = pc->page;
> lock_page_container(page);
> mem = pc->mem_container;
> css_put(&mem->css);
> + /* XXX */
This kind of comment is bad.
> + is_cache = (page->page_container & PAGE_CONTAINER_CACHE) != 0;
> page_assign_page_container(page, NULL);
> unlock_page_container(page);
> res_counter_uncharge(&mem->res, 1);
>
> + stat = &mem->stat;
> + if (is_cache) {
> + mem_container_stat_dec(stat, MEMCONT_STAT_PAGECACHE);
> + } else {
> + mem_container_stat_dec(stat, MEMCONT_STAT_RSS);
> + }
> + mem_container_stat_inc(stat, MEMCONT_STAT_UNCHARGE);
> +
> spin_lock_irqsave(&mem->lru_lock, flags);
> + BUG_ON(list_empty(&pc->lru));
Why this BUG_ON() is added ?
Thanks
-Kame
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <20070907140234.4fe1075d.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-09-07 5:38 ` YAMAMOTO Takashi
0 siblings, 0 replies; 25+ messages in thread
From: YAMAMOTO Takashi @ 2007-09-07 5:38 UTC (permalink / raw)
To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
hi,
thanks for comments.
> Hi,
>
> On Fri, 7 Sep 2007 12:39:42 +0900 (JST)
> yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org (YAMAMOTO Takashi) wrote:
>
> > +enum mem_container_stat_index {
> > + /*
> > + * for MEM_CONTAINER_TYPE_ALL, usage == pagecache + rss
> > + */
> > + MEMCONT_STAT_PAGECACHE,
> > + MEMCONT_STAT_RSS,
> > +
> > + /*
> > + * redundant; usage == charge - uncharge
> > + */
> > + MEMCONT_STAT_CHARGE,
> > + MEMCONT_STAT_UNCHARGE,
> > +
> > + /*
> > + * mostly for debug
> > + */
> > + MEMCONT_STAT_ISOLATE,
> > + MEMCONT_STAT_ISOLATE_FAIL,
> > + MEMCONT_STAT_NSTATS,
> > +};
> > +
> please add comments on each statistics name.
sure.
> It's uneasy to catch the meaning of
> ISOLATE/ISOLATE_FAIL without comments.
they aren't useful for users who don't read the relevant code.
probably they should be just removed.
> > +static const char * const mem_container_stat_desc[] = {
> > + [MEMCONT_STAT_PAGECACHE] = "page_cache",
> > + [MEMCONT_STAT_RSS] = "rss",
> > + [MEMCONT_STAT_CHARGE] = "charge",
> > + [MEMCONT_STAT_UNCHARGE] = "uncharge",
> > + [MEMCONT_STAT_ISOLATE] = "isolate",
> > + [MEMCONT_STAT_ISOLATE_FAIL] = "isolate_fail",
> > +};
> > +
> > +struct mem_container_stat {
> > + atomic_t count[MEMCONT_STAT_NSTATS];
> > +};
> > +
> > +static void mem_container_stat_inc(struct mem_container_stat * stat,
> > + enum mem_container_stat_index idx)
> > +{
> > +
> > + atomic_inc(&stat->count[idx]);
> > +}
> > +
> > +static void mem_container_stat_dec(struct mem_container_stat * stat,
> > + enum mem_container_stat_index idx)
> > +{
> > +
> > + atomic_dec(&stat->count[idx]);
> > +}
> > +
>
> Can we do this accounting as mod_zone_page_state()(in mm/vmstat.c) ?
> (use per-cpu data for accounting.)
we can do so later.
> > +/* XXX hack; shouldn't be here. it really belongs to struct page_container. */
> > +#define PAGE_CONTAINER_CACHE_BIT 0x1
> > +#define PAGE_CONTAINER_CACHE (1 << PAGE_CONTAINER_CACHE_BIT)
> > +
>
> Is this used for remebering whether a page is charged as page-cache or not ?
yes.
> > + page_assign_page_container_flags(page,
> > + is_cache ? PAGE_CONTAINER_CACHE : 0, pc);
> > +
> > + stat = &mem->stat;
> > + if (is_cache) {
> > + mem_container_stat_inc(stat, MEMCONT_STAT_PAGECACHE);
> > + } else {
> > + mem_container_stat_inc(stat, MEMCONT_STAT_RSS);
> > + }
>
> nitpick,in linux style, one-sentence block shouldn't have braces {}.
>
> ==
> if (is_cache)
> mem_cont...
> else
> mem_cont...
> ==
sure.
> > + mem_container_stat_inc(stat, MEMCONT_STAT_CHARGE);
> >
> > spin_lock_irqsave(&mem->lru_lock, flags);
> > list_add(&pc->lru, &mem->active_list);
> > @@ -377,6 +454,12 @@ err:
> > return -ENOMEM;
> > }
> >
> > +int mem_container_charge(struct page *page, struct mm_struct *mm)
> > +{
> > +
> > + return mem_container_charge_common(page, mm, 0);
> > +}
> > +
> > /*
> > * See if the cached pages should be charged at all?
> > */
> > @@ -388,7 +471,7 @@ int mem_container_cache_charge(struct pa
> >
> > mem = rcu_dereference(mm->mem_container);
> > if (mem->control_type == MEM_CONTAINER_TYPE_ALL)
> > - return mem_container_charge(page, mm);
> > + return mem_container_charge_common(page, mm, 1);
> > else
> > return 0;
> > }
> > @@ -411,15 +494,29 @@ void mem_container_uncharge(struct page_
> > return;
> >
> > if (atomic_dec_and_test(&pc->ref_cnt)) {
> > + struct mem_container_stat *stat;
> > + int is_cache;
> > +
> > page = pc->page;
> > lock_page_container(page);
> > mem = pc->mem_container;
> > css_put(&mem->css);
> > + /* XXX */
> This kind of comment is bad.
sure.
> > + is_cache = (page->page_container & PAGE_CONTAINER_CACHE) != 0;
> > page_assign_page_container(page, NULL);
> > unlock_page_container(page);
> > res_counter_uncharge(&mem->res, 1);
> >
> > + stat = &mem->stat;
> > + if (is_cache) {
> > + mem_container_stat_dec(stat, MEMCONT_STAT_PAGECACHE);
> > + } else {
> > + mem_container_stat_dec(stat, MEMCONT_STAT_RSS);
> > + }
> > + mem_container_stat_inc(stat, MEMCONT_STAT_UNCHARGE);
> > +
> > spin_lock_irqsave(&mem->lru_lock, flags);
> > + BUG_ON(list_empty(&pc->lru));
>
> Why this BUG_ON() is added ?
>
> Thanks
> -Kame
to ensure that my understanding is correct.
YAMAMOTO Takashi
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
2007-09-07 3:39 [RFC] [PATCH] memory controller statistics YAMAMOTO Takashi
@ 2007-09-07 9:55 ` Balbir Singh
0 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-09-07 9:55 UTC (permalink / raw)
To: YAMAMOTO Takashi
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
Paul Menage, Linux Memory Management List
YAMAMOTO Takashi wrote:
> hi,
>
> i implemented some statistics for your memory controller.
>
> it's tested with 2.6.23-rc2-mm2 + memory controller v7.
> i think it can be applied to 2.6.23-rc4-mm1 as well.
>
Thanks for doing this. We are building containerstats for
per container statistics. It would be really nice to provide
the statistics using that interface. I am not opposed to
memory.stat, but Paul Menage recommends that one file has
just one meaningful value.
The other thing is that could you please report all the
statistics in bytes, we are moving to that interface,
I've posted patches to do that. If we are going to push
a bunch of statistics in one file, please use a format
separator like
name: value
> YAMOMOTO Takshi
>
> todo: something like nr_active/inactive in /proc/vmstat.
>
This would be really nice to add.
> --- ./mm/memcontrol.c.BACKUP 2007-08-29 17:13:09.000000000 +0900
> +++ ./mm/memcontrol.c 2007-09-06 16:26:13.000000000 +0900
> @@ -24,6 +24,7 @@
> #include <linux/page-flags.h>
> #include <linux/bit_spinlock.h>
> #include <linux/rcupdate.h>
> +#include <linux/seq_file.h>
> #include <linux/swap.h>
> #include <linux/spinlock.h>
> #include <linux/fs.h>
> @@ -33,6 +34,54 @@
> struct container_subsys mem_container_subsys;
> static const int MEM_CONTAINER_RECLAIM_RETRIES = 5;
>
> +enum mem_container_stat_index {
> + /*
> + * for MEM_CONTAINER_TYPE_ALL, usage == pagecache + rss
> + */
> + MEMCONT_STAT_PAGECACHE,
> + MEMCONT_STAT_RSS,
> +
I would prefer
MEM_CONTAINER_STAT_CACHED,
MEM_CONTAINER_STAT_MAPPED
> + /*
> + * redundant; usage == charge - uncharge
> + */
> + MEMCONT_STAT_CHARGE,
> + MEMCONT_STAT_UNCHARGE,
> +
> + /*
> + * mostly for debug
> + */
> + MEMCONT_STAT_ISOLATE,
> + MEMCONT_STAT_ISOLATE_FAIL,
> + MEMCONT_STAT_NSTATS,
> +};
> +
> +static const char * const mem_container_stat_desc[] = {
> + [MEMCONT_STAT_PAGECACHE] = "page_cache",
> + [MEMCONT_STAT_RSS] = "rss",
> + [MEMCONT_STAT_CHARGE] = "charge",
> + [MEMCONT_STAT_UNCHARGE] = "uncharge",
> + [MEMCONT_STAT_ISOLATE] = "isolate",
> + [MEMCONT_STAT_ISOLATE_FAIL] = "isolate_fail",
> +};
> +
> +struct mem_container_stat {
> + atomic_t count[MEMCONT_STAT_NSTATS];
> +};
> +
atomic_long_t would be better.
> +static void mem_container_stat_inc(struct mem_container_stat * stat,
> + enum mem_container_stat_index idx)
> +{
> +
> + atomic_inc(&stat->count[idx]);
> +}
> +
> +static void mem_container_stat_dec(struct mem_container_stat * stat,
> + enum mem_container_stat_index idx)
> +{
> +
> + atomic_dec(&stat->count[idx]);
> +}
> +
Shouldn't these functions be inlined?
> /*
> * The memory controller data structure. The memory controller controls both
> * page cache and RSS per container. We would eventually like to provide
> @@ -62,6 +111,7 @@ struct mem_container {
> */
> spinlock_t lru_lock;
> unsigned long control_type; /* control RSS or RSS+Pagecache */
> + struct mem_container_stat stat;
> };
>
> /*
> @@ -72,6 +122,12 @@ struct mem_container {
> #define PAGE_CONTAINER_LOCK_BIT 0x0
> #define PAGE_CONTAINER_LOCK (1 << PAGE_CONTAINER_LOCK_BIT)
>
> +/* XXX hack; shouldn't be here. it really belongs to struct page_container. */
> +#define PAGE_CONTAINER_CACHE_BIT 0x1
> +#define PAGE_CONTAINER_CACHE (1 << PAGE_CONTAINER_CACHE_BIT)
> +
> +#define PAGE_CONTAINER_FLAGS (PAGE_CONTAINER_LOCK | PAGE_CONTAINER_CACHE)
> +
The lock field is access atomically, adding these bits here would
increase the contention. Could you please move them to page_container?
> /*
> * A page_container page is associated with every page descriptor. The
> * page_container helps us identify information about the container
> @@ -134,9 +190,9 @@ static inline int page_container_locked(
> &page->page_container);
> }
>
> -void page_assign_page_container(struct page *page, struct page_container *pc)
> +static void page_assign_page_container_flags(struct page *page, int flags,
> + struct page_container *pc)
> {
> - int locked;
>
> /*
> * While resetting the page_container we might not hold the
> @@ -145,14 +201,20 @@ void page_assign_page_container(struct p
> */
> if (pc)
> VM_BUG_ON(!page_container_locked(page));
> - locked = (page->page_container & PAGE_CONTAINER_LOCK);
> - page->page_container = ((unsigned long)pc | locked);
> + flags |= (page->page_container & PAGE_CONTAINER_LOCK);
> + page->page_container = ((unsigned long)pc | flags);
> +}
> +
> +void page_assign_page_container(struct page *page, struct page_container *pc)
> +{
> +
> + page_assign_page_container_flags(page, 0, pc);
> }
>
> struct page_container *page_get_page_container(struct page *page)
> {
> return (struct page_container *)
> - (page->page_container & ~PAGE_CONTAINER_LOCK);
> + (page->page_container & ~PAGE_CONTAINER_FLAGS);
> }
>
> void __always_inline lock_page_container(struct page *page)
> @@ -203,6 +265,7 @@ unsigned long mem_container_isolate_page
> LIST_HEAD(pc_list);
> struct list_head *src;
> struct page_container *pc;
> + struct mem_container_stat *stat = &mem_cont->stat;
>
> if (active)
> src = &mem_cont->active_list;
> @@ -244,6 +307,9 @@ unsigned long mem_container_isolate_page
> if (__isolate_lru_page(page, mode) == 0) {
> list_move(&page->lru, dst);
> nr_taken++;
> + mem_container_stat_inc(stat, MEMCONT_STAT_ISOLATE);
> + } else {
> + mem_container_stat_inc(stat, MEMCONT_STAT_ISOLATE_FAIL);
> }
> }
>
> @@ -260,9 +326,11 @@ unsigned long mem_container_isolate_page
> * 0 if the charge was successful
> * < 0 if the container is over its limit
> */
> -int mem_container_charge(struct page *page, struct mm_struct *mm)
> +static int mem_container_charge_common(struct page *page, struct mm_struct *mm,
> + int is_cache)
> {
> struct mem_container *mem;
> + struct mem_container_stat *stat;
> struct page_container *pc, *race_pc;
> unsigned long flags;
> unsigned long nr_retries = MEM_CONTAINER_RECLAIM_RETRIES;
> @@ -360,7 +428,16 @@ int mem_container_charge(struct page *pa
> atomic_set(&pc->ref_cnt, 1);
> pc->mem_container = mem;
> pc->page = page;
> - page_assign_page_container(page, pc);
> + page_assign_page_container_flags(page,
> + is_cache ? PAGE_CONTAINER_CACHE : 0, pc);
> +
> + stat = &mem->stat;
> + if (is_cache) {
> + mem_container_stat_inc(stat, MEMCONT_STAT_PAGECACHE);
> + } else {
> + mem_container_stat_inc(stat, MEMCONT_STAT_RSS);
> + }
> + mem_container_stat_inc(stat, MEMCONT_STAT_CHARGE);
>
> spin_lock_irqsave(&mem->lru_lock, flags);
> list_add(&pc->lru, &mem->active_list);
> @@ -377,6 +454,12 @@ err:
> return -ENOMEM;
> }
>
> +int mem_container_charge(struct page *page, struct mm_struct *mm)
> +{
> +
> + return mem_container_charge_common(page, mm, 0);
> +}
> +
> /*
> * See if the cached pages should be charged at all?
> */
> @@ -388,7 +471,7 @@ int mem_container_cache_charge(struct pa
>
> mem = rcu_dereference(mm->mem_container);
> if (mem->control_type == MEM_CONTAINER_TYPE_ALL)
> - return mem_container_charge(page, mm);
> + return mem_container_charge_common(page, mm, 1);
> else
> return 0;
> }
> @@ -411,15 +494,29 @@ void mem_container_uncharge(struct page_
> return;
>
> if (atomic_dec_and_test(&pc->ref_cnt)) {
> + struct mem_container_stat *stat;
> + int is_cache;
> +
> page = pc->page;
> lock_page_container(page);
> mem = pc->mem_container;
> css_put(&mem->css);
> + /* XXX */
> + is_cache = (page->page_container & PAGE_CONTAINER_CACHE) != 0;
> page_assign_page_container(page, NULL);
> unlock_page_container(page);
> res_counter_uncharge(&mem->res, 1);
>
> + stat = &mem->stat;
> + if (is_cache) {
> + mem_container_stat_dec(stat, MEMCONT_STAT_PAGECACHE);
> + } else {
> + mem_container_stat_dec(stat, MEMCONT_STAT_RSS);
> + }
> + mem_container_stat_inc(stat, MEMCONT_STAT_UNCHARGE);
> +
> spin_lock_irqsave(&mem->lru_lock, flags);
> + BUG_ON(list_empty(&pc->lru));
> list_del_init(&pc->lru);
> spin_unlock_irqrestore(&mem->lru_lock, flags);
> kfree(pc);
> @@ -496,6 +593,44 @@ static ssize_t mem_control_type_read(str
> ppos, buf, s - buf);
> }
>
> +static void mem_container_stat_init(struct mem_container_stat *stat)
> +{
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(stat->count); i++) {
> + atomic_set(&stat->count[i], 0);
> + }
> +}
> +
> +static int mem_control_stat_show(struct seq_file *m, void *arg)
> +{
> + struct container *cont = m->private;
> + struct mem_container *mem_cont = mem_container_from_cont(cont);
> + struct mem_container_stat *stat = &mem_cont->stat;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(stat->count); i++) {
> + seq_printf(m, "%s %u\n", mem_container_stat_desc[i],
> + (unsigned int)atomic_read(&stat->count[i]));
> + }
> + return 0;
> +}
> +
> +static const struct file_operations mem_control_stat_file_operations = {
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
> +
> +static int mem_control_stat_open(struct inode *unused, struct file *file)
> +{
> + /* XXX __d_cont */
> + struct container *cont = file->f_dentry->d_parent->d_fsdata;
> +
> + file->f_op = &mem_control_stat_file_operations;
> + return single_open(file, mem_control_stat_show, cont);
> +}
> +
> static struct cftype mem_container_files[] = {
> {
> .name = "usage",
> @@ -518,6 +653,10 @@ static struct cftype mem_container_files
> .write = mem_control_type_write,
> .read = mem_control_type_read,
> },
> + {
> + .name = "stat",
> + .open = mem_control_stat_open,
> + },
> };
>
> static struct mem_container init_mem_container;
> @@ -541,6 +680,7 @@ mem_container_create(struct container_su
> INIT_LIST_HEAD(&mem->inactive_list);
> spin_lock_init(&mem->lru_lock);
> mem->control_type = MEM_CONTAINER_TYPE_ALL;
> + mem_container_stat_init(&mem->stat);
> return &mem->css;
> }
>
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
@ 2007-09-07 9:55 ` Balbir Singh
0 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-09-07 9:55 UTC (permalink / raw)
To: YAMAMOTO Takashi
Cc: svaidy, containers, minoura, Paul Menage,
Linux Memory Management List
YAMAMOTO Takashi wrote:
> hi,
>
> i implemented some statistics for your memory controller.
>
> it's tested with 2.6.23-rc2-mm2 + memory controller v7.
> i think it can be applied to 2.6.23-rc4-mm1 as well.
>
Thanks for doing this. We are building containerstats for
per container statistics. It would be really nice to provide
the statistics using that interface. I am not opposed to
memory.stat, but Paul Menage recommends that one file has
just one meaningful value.
The other thing is that could you please report all the
statistics in bytes, we are moving to that interface,
I've posted patches to do that. If we are going to push
a bunch of statistics in one file, please use a format
separator like
name: value
> YAMOMOTO Takshi
>
> todo: something like nr_active/inactive in /proc/vmstat.
>
This would be really nice to add.
> --- ./mm/memcontrol.c.BACKUP 2007-08-29 17:13:09.000000000 +0900
> +++ ./mm/memcontrol.c 2007-09-06 16:26:13.000000000 +0900
> @@ -24,6 +24,7 @@
> #include <linux/page-flags.h>
> #include <linux/bit_spinlock.h>
> #include <linux/rcupdate.h>
> +#include <linux/seq_file.h>
> #include <linux/swap.h>
> #include <linux/spinlock.h>
> #include <linux/fs.h>
> @@ -33,6 +34,54 @@
> struct container_subsys mem_container_subsys;
> static const int MEM_CONTAINER_RECLAIM_RETRIES = 5;
>
> +enum mem_container_stat_index {
> + /*
> + * for MEM_CONTAINER_TYPE_ALL, usage == pagecache + rss
> + */
> + MEMCONT_STAT_PAGECACHE,
> + MEMCONT_STAT_RSS,
> +
I would prefer
MEM_CONTAINER_STAT_CACHED,
MEM_CONTAINER_STAT_MAPPED
> + /*
> + * redundant; usage == charge - uncharge
> + */
> + MEMCONT_STAT_CHARGE,
> + MEMCONT_STAT_UNCHARGE,
> +
> + /*
> + * mostly for debug
> + */
> + MEMCONT_STAT_ISOLATE,
> + MEMCONT_STAT_ISOLATE_FAIL,
> + MEMCONT_STAT_NSTATS,
> +};
> +
> +static const char * const mem_container_stat_desc[] = {
> + [MEMCONT_STAT_PAGECACHE] = "page_cache",
> + [MEMCONT_STAT_RSS] = "rss",
> + [MEMCONT_STAT_CHARGE] = "charge",
> + [MEMCONT_STAT_UNCHARGE] = "uncharge",
> + [MEMCONT_STAT_ISOLATE] = "isolate",
> + [MEMCONT_STAT_ISOLATE_FAIL] = "isolate_fail",
> +};
> +
> +struct mem_container_stat {
> + atomic_t count[MEMCONT_STAT_NSTATS];
> +};
> +
atomic_long_t would be better.
> +static void mem_container_stat_inc(struct mem_container_stat * stat,
> + enum mem_container_stat_index idx)
> +{
> +
> + atomic_inc(&stat->count[idx]);
> +}
> +
> +static void mem_container_stat_dec(struct mem_container_stat * stat,
> + enum mem_container_stat_index idx)
> +{
> +
> + atomic_dec(&stat->count[idx]);
> +}
> +
Shouldn't these functions be inlined?
> /*
> * The memory controller data structure. The memory controller controls both
> * page cache and RSS per container. We would eventually like to provide
> @@ -62,6 +111,7 @@ struct mem_container {
> */
> spinlock_t lru_lock;
> unsigned long control_type; /* control RSS or RSS+Pagecache */
> + struct mem_container_stat stat;
> };
>
> /*
> @@ -72,6 +122,12 @@ struct mem_container {
> #define PAGE_CONTAINER_LOCK_BIT 0x0
> #define PAGE_CONTAINER_LOCK (1 << PAGE_CONTAINER_LOCK_BIT)
>
> +/* XXX hack; shouldn't be here. it really belongs to struct page_container. */
> +#define PAGE_CONTAINER_CACHE_BIT 0x1
> +#define PAGE_CONTAINER_CACHE (1 << PAGE_CONTAINER_CACHE_BIT)
> +
> +#define PAGE_CONTAINER_FLAGS (PAGE_CONTAINER_LOCK | PAGE_CONTAINER_CACHE)
> +
The lock field is access atomically, adding these bits here would
increase the contention. Could you please move them to page_container?
> /*
> * A page_container page is associated with every page descriptor. The
> * page_container helps us identify information about the container
> @@ -134,9 +190,9 @@ static inline int page_container_locked(
> &page->page_container);
> }
>
> -void page_assign_page_container(struct page *page, struct page_container *pc)
> +static void page_assign_page_container_flags(struct page *page, int flags,
> + struct page_container *pc)
> {
> - int locked;
>
> /*
> * While resetting the page_container we might not hold the
> @@ -145,14 +201,20 @@ void page_assign_page_container(struct p
> */
> if (pc)
> VM_BUG_ON(!page_container_locked(page));
> - locked = (page->page_container & PAGE_CONTAINER_LOCK);
> - page->page_container = ((unsigned long)pc | locked);
> + flags |= (page->page_container & PAGE_CONTAINER_LOCK);
> + page->page_container = ((unsigned long)pc | flags);
> +}
> +
> +void page_assign_page_container(struct page *page, struct page_container *pc)
> +{
> +
> + page_assign_page_container_flags(page, 0, pc);
> }
>
> struct page_container *page_get_page_container(struct page *page)
> {
> return (struct page_container *)
> - (page->page_container & ~PAGE_CONTAINER_LOCK);
> + (page->page_container & ~PAGE_CONTAINER_FLAGS);
> }
>
> void __always_inline lock_page_container(struct page *page)
> @@ -203,6 +265,7 @@ unsigned long mem_container_isolate_page
> LIST_HEAD(pc_list);
> struct list_head *src;
> struct page_container *pc;
> + struct mem_container_stat *stat = &mem_cont->stat;
>
> if (active)
> src = &mem_cont->active_list;
> @@ -244,6 +307,9 @@ unsigned long mem_container_isolate_page
> if (__isolate_lru_page(page, mode) == 0) {
> list_move(&page->lru, dst);
> nr_taken++;
> + mem_container_stat_inc(stat, MEMCONT_STAT_ISOLATE);
> + } else {
> + mem_container_stat_inc(stat, MEMCONT_STAT_ISOLATE_FAIL);
> }
> }
>
> @@ -260,9 +326,11 @@ unsigned long mem_container_isolate_page
> * 0 if the charge was successful
> * < 0 if the container is over its limit
> */
> -int mem_container_charge(struct page *page, struct mm_struct *mm)
> +static int mem_container_charge_common(struct page *page, struct mm_struct *mm,
> + int is_cache)
> {
> struct mem_container *mem;
> + struct mem_container_stat *stat;
> struct page_container *pc, *race_pc;
> unsigned long flags;
> unsigned long nr_retries = MEM_CONTAINER_RECLAIM_RETRIES;
> @@ -360,7 +428,16 @@ int mem_container_charge(struct page *pa
> atomic_set(&pc->ref_cnt, 1);
> pc->mem_container = mem;
> pc->page = page;
> - page_assign_page_container(page, pc);
> + page_assign_page_container_flags(page,
> + is_cache ? PAGE_CONTAINER_CACHE : 0, pc);
> +
> + stat = &mem->stat;
> + if (is_cache) {
> + mem_container_stat_inc(stat, MEMCONT_STAT_PAGECACHE);
> + } else {
> + mem_container_stat_inc(stat, MEMCONT_STAT_RSS);
> + }
> + mem_container_stat_inc(stat, MEMCONT_STAT_CHARGE);
>
> spin_lock_irqsave(&mem->lru_lock, flags);
> list_add(&pc->lru, &mem->active_list);
> @@ -377,6 +454,12 @@ err:
> return -ENOMEM;
> }
>
> +int mem_container_charge(struct page *page, struct mm_struct *mm)
> +{
> +
> + return mem_container_charge_common(page, mm, 0);
> +}
> +
> /*
> * See if the cached pages should be charged at all?
> */
> @@ -388,7 +471,7 @@ int mem_container_cache_charge(struct pa
>
> mem = rcu_dereference(mm->mem_container);
> if (mem->control_type == MEM_CONTAINER_TYPE_ALL)
> - return mem_container_charge(page, mm);
> + return mem_container_charge_common(page, mm, 1);
> else
> return 0;
> }
> @@ -411,15 +494,29 @@ void mem_container_uncharge(struct page_
> return;
>
> if (atomic_dec_and_test(&pc->ref_cnt)) {
> + struct mem_container_stat *stat;
> + int is_cache;
> +
> page = pc->page;
> lock_page_container(page);
> mem = pc->mem_container;
> css_put(&mem->css);
> + /* XXX */
> + is_cache = (page->page_container & PAGE_CONTAINER_CACHE) != 0;
> page_assign_page_container(page, NULL);
> unlock_page_container(page);
> res_counter_uncharge(&mem->res, 1);
>
> + stat = &mem->stat;
> + if (is_cache) {
> + mem_container_stat_dec(stat, MEMCONT_STAT_PAGECACHE);
> + } else {
> + mem_container_stat_dec(stat, MEMCONT_STAT_RSS);
> + }
> + mem_container_stat_inc(stat, MEMCONT_STAT_UNCHARGE);
> +
> spin_lock_irqsave(&mem->lru_lock, flags);
> + BUG_ON(list_empty(&pc->lru));
> list_del_init(&pc->lru);
> spin_unlock_irqrestore(&mem->lru_lock, flags);
> kfree(pc);
> @@ -496,6 +593,44 @@ static ssize_t mem_control_type_read(str
> ppos, buf, s - buf);
> }
>
> +static void mem_container_stat_init(struct mem_container_stat *stat)
> +{
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(stat->count); i++) {
> + atomic_set(&stat->count[i], 0);
> + }
> +}
> +
> +static int mem_control_stat_show(struct seq_file *m, void *arg)
> +{
> + struct container *cont = m->private;
> + struct mem_container *mem_cont = mem_container_from_cont(cont);
> + struct mem_container_stat *stat = &mem_cont->stat;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(stat->count); i++) {
> + seq_printf(m, "%s %u\n", mem_container_stat_desc[i],
> + (unsigned int)atomic_read(&stat->count[i]));
> + }
> + return 0;
> +}
> +
> +static const struct file_operations mem_control_stat_file_operations = {
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
> +
> +static int mem_control_stat_open(struct inode *unused, struct file *file)
> +{
> + /* XXX __d_cont */
> + struct container *cont = file->f_dentry->d_parent->d_fsdata;
> +
> + file->f_op = &mem_control_stat_file_operations;
> + return single_open(file, mem_control_stat_show, cont);
> +}
> +
> static struct cftype mem_container_files[] = {
> {
> .name = "usage",
> @@ -518,6 +653,10 @@ static struct cftype mem_container_files
> .write = mem_control_type_write,
> .read = mem_control_type_read,
> },
> + {
> + .name = "stat",
> + .open = mem_control_stat_open,
> + },
> };
>
> static struct mem_container init_mem_container;
> @@ -541,6 +680,7 @@ mem_container_create(struct container_su
> INIT_LIST_HEAD(&mem->inactive_list);
> spin_lock_init(&mem->lru_lock);
> mem->control_type = MEM_CONTAINER_TYPE_ALL;
> + mem_container_stat_init(&mem->stat);
> return &mem->css;
> }
>
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
2007-09-07 9:55 ` Balbir Singh
@ 2007-09-10 0:32 ` 箕浦真
-1 siblings, 0 replies; 25+ messages in thread
From: 箕浦真 @ 2007-09-10 0:32 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, Linux Memory Management List,
YAMAMOTO Takashi, Paul Menage
Takashi is AFK for a while; i'm replying for him as possible.
> Thanks for doing this. We are building containerstats for
> per container statistics. It would be really nice to provide
> the statistics using that interface. I am not opposed to
> memory.stat, but Paul Menage recommends that one file has
> just one meaningful value.
Thanks, we'll check it. The interface is not important for
us.
> The other thing is that could you please report all the
> statistics in bytes, we are moving to that interface,
> I've posted patches to do that. If we are going to push
> a bunch of statistics in one file, please use a format
> separator like
> name: value
> > YAMOMOTO Takshi
> >
> > todo: something like nr_active/inactive in /proc/vmstat.
> >
> This would be really nice to add.
`something' here could be # of pages (in bytes according to
your advice) on the global active (or inactive) list that
are charged to a container, and/or # of pages on the
per-container active/inactive list. The latter is easy to
implement but I'm afraid it's somewhat confusing for users.
--
Minoura Makoto <minoura-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
@ 2007-09-10 0:32 ` 箕浦真
0 siblings, 0 replies; 25+ messages in thread
From: 箕浦真 @ 2007-09-10 0:32 UTC (permalink / raw)
To: balbir
Cc: YAMAMOTO Takashi, containers, Paul Menage,
Linux Memory Management List
Takashi is AFK for a while; i'm replying for him as possible.
> Thanks for doing this. We are building containerstats for
> per container statistics. It would be really nice to provide
> the statistics using that interface. I am not opposed to
> memory.stat, but Paul Menage recommends that one file has
> just one meaningful value.
Thanks, we'll check it. The interface is not important for
us.
> The other thing is that could you please report all the
> statistics in bytes, we are moving to that interface,
> I've posted patches to do that. If we are going to push
> a bunch of statistics in one file, please use a format
> separator like
> name: value
> > YAMOMOTO Takshi
> >
> > todo: something like nr_active/inactive in /proc/vmstat.
> >
> This would be really nice to add.
`something' here could be # of pages (in bytes according to
your advice) on the global active (or inactive) list that
are charged to a container, and/or # of pages on the
per-container active/inactive list. The latter is easy to
implement but I'm afraid it's somewhat confusing for users.
--
Minoura Makoto <minoura@valinux.co.jp>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
2007-09-10 0:32 ` 箕浦真
@ 2007-09-10 8:26 ` Balbir Singh
-1 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-09-10 8:26 UTC (permalink / raw)
To: 箕浦真
Cc: containers-qjLDD68F18O7TbgM5vRIOg, Linux Memory Management List,
YAMAMOTO Takashi, Paul Menage
箕浦真 wrote:
> Takashi is AFK for a while; i'm replying for him as possible.
>
>> Thanks for doing this. We are building containerstats for
>> per container statistics. It would be really nice to provide
>> the statistics using that interface. I am not opposed to
>> memory.stat, but Paul Menage recommends that one file has
>> just one meaningful value.
>
> Thanks, we'll check it. The interface is not important for
> us.
>
>> The other thing is that could you please report all the
>> statistics in bytes, we are moving to that interface,
>> I've posted patches to do that. If we are going to push
>> a bunch of statistics in one file, please use a format
>> separator like
>
>> name: value
>
>>> YAMOMOTO Takshi
>>>
>>> todo: something like nr_active/inactive in /proc/vmstat.
>>>
>
>> This would be really nice to add.
>
> `something' here could be # of pages (in bytes according to
> your advice) on the global active (or inactive) list that
> are charged to a container, and/or # of pages on the
> per-container active/inactive list. The latter is easy to
> implement but I'm afraid it's somewhat confusing for users.
>
I think it would be useful for administrators to get a rough
idea of the working set (active bytes), to configure the size
of the container.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
@ 2007-09-10 8:26 ` Balbir Singh
0 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-09-10 8:26 UTC (permalink / raw)
To: 箕浦真
Cc: YAMAMOTO Takashi, containers, Paul Menage,
Linux Memory Management List
=?ISO-8859-1?Q?=E7=AE=95=E6=B5=A6=E7=9C=9F?= wrote:
Return-Path: <owner-linux-mm@kvack.org>
X-Envelope-To: <"|/home/majordomo/wrapper archive -f /home/ftp/pub/archives/linux-mm/linux-mm -m -a"> (uid 0)
X-Orcpt: rfc822;linux-mm-outgoing
Original-Recipient: rfc822;linux-mm-outgoing
> Takashi is AFK for a while; i'm replying for him as possible.
>
>> Thanks for doing this. We are building containerstats for
>> per container statistics. It would be really nice to provide
>> the statistics using that interface. I am not opposed to
>> memory.stat, but Paul Menage recommends that one file has
>> just one meaningful value.
>
> Thanks, we'll check it. The interface is not important for
> us.
>
>> The other thing is that could you please report all the
>> statistics in bytes, we are moving to that interface,
>> I've posted patches to do that. If we are going to push
>> a bunch of statistics in one file, please use a format
>> separator like
>
>> name: value
>
>>> YAMOMOTO Takshi
>>>
>>> todo: something like nr_active/inactive in /proc/vmstat.
>>>
>
>> This would be really nice to add.
>
> `something' here could be # of pages (in bytes according to
> your advice) on the global active (or inactive) list that
> are charged to a container, and/or # of pages on the
> per-container active/inactive list. The latter is easy to
> implement but I'm afraid it's somewhat confusing for users.
>
I think it would be useful for administrators to get a rough
idea of the working set (active bytes), to configure the size
of the container.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
2007-09-07 9:55 ` Balbir Singh
@ 2007-09-10 23:21 ` Paul Menage
-1 siblings, 0 replies; 25+ messages in thread
From: Paul Menage @ 2007-09-10 23:21 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
YAMAMOTO Takashi, Linux Memory Management List
On 9/7/07, Balbir Singh <balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> wrote:
>
> Thanks for doing this. We are building containerstats for
> per container statistics. It would be really nice to provide
> the statistics using that interface. I am not opposed to
> memory.stat, but Paul Menage recommends that one file has
> just one meaningful value.
That's based on examples from other virtual filesystems such as sysfs.
Paul
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
@ 2007-09-10 23:21 ` Paul Menage
0 siblings, 0 replies; 25+ messages in thread
From: Paul Menage @ 2007-09-10 23:21 UTC (permalink / raw)
To: balbir
Cc: YAMAMOTO Takashi, svaidy, containers, minoura,
Linux Memory Management List
On 9/7/07, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
>
> Thanks for doing this. We are building containerstats for
> per container statistics. It would be really nice to provide
> the statistics using that interface. I am not opposed to
> memory.stat, but Paul Menage recommends that one file has
> just one meaningful value.
That's based on examples from other virtual filesystems such as sysfs.
Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
2007-09-10 23:21 ` Paul Menage
@ 2007-09-11 2:39 ` Balbir Singh
-1 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-09-11 2:39 UTC (permalink / raw)
To: Paul Menage
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
YAMAMOTO Takashi, Linux Memory Management List
Paul Menage wrote:
> On 9/7/07, Balbir Singh <balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org> wrote:
>> Thanks for doing this. We are building containerstats for
>> per container statistics. It would be really nice to provide
>> the statistics using that interface. I am not opposed to
>> memory.stat, but Paul Menage recommends that one file has
>> just one meaningful value.
>
> That's based on examples from other virtual filesystems such as sysfs.
>
Even during the CKRM days (when configfs was used), the recommendation
from the configfs folks was the same. I sometimes worry about the
extra pinned dentries created with this logic/rule though.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
@ 2007-09-11 2:39 ` Balbir Singh
0 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-09-11 2:39 UTC (permalink / raw)
To: Paul Menage
Cc: YAMAMOTO Takashi, svaidy, containers, minoura,
Linux Memory Management List
Paul Menage wrote:
> On 9/7/07, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
>> Thanks for doing this. We are building containerstats for
>> per container statistics. It would be really nice to provide
>> the statistics using that interface. I am not opposed to
>> memory.stat, but Paul Menage recommends that one file has
>> just one meaningful value.
>
> That's based on examples from other virtual filesystems such as sysfs.
>
Even during the CKRM days (when configfs was used), the recommendation
from the configfs folks was the same. I sometimes worry about the
extra pinned dentries created with this logic/rule though.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
2007-09-07 9:55 ` Balbir Singh
@ 2007-09-26 1:48 ` YAMAMOTO Takashi
-1 siblings, 0 replies; 25+ messages in thread
From: YAMAMOTO Takashi @ 2007-09-26 1:48 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
menage-hpIqsD4AKlfQT0dZR+AlfA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg
> YAMAMOTO Takashi wrote:
> > hi,
> >
> > i implemented some statistics for your memory controller.
> >
> > it's tested with 2.6.23-rc2-mm2 + memory controller v7.
> > i think it can be applied to 2.6.23-rc4-mm1 as well.
> >
>
> Thanks for doing this. We are building containerstats for
> per container statistics. It would be really nice to provide
> the statistics using that interface. I am not opposed to
> memory.stat, but Paul Menage recommends that one file has
> just one meaningful value.
>
> The other thing is that could you please report all the
> statistics in bytes, we are moving to that interface,
> I've posted patches to do that. If we are going to push
> a bunch of statistics in one file, please use a format
> separator like
>
> name: value
i followed /proc/vmstat.
are you going to convert /proc/vmstat to the format as well?
YAMAMOTO Takashi
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
@ 2007-09-26 1:48 ` YAMAMOTO Takashi
0 siblings, 0 replies; 25+ messages in thread
From: YAMAMOTO Takashi @ 2007-09-26 1:48 UTC (permalink / raw)
To: balbir; +Cc: containers, minoura, menage, linux-mm
> YAMAMOTO Takashi wrote:
> > hi,
> >
> > i implemented some statistics for your memory controller.
> >
> > it's tested with 2.6.23-rc2-mm2 + memory controller v7.
> > i think it can be applied to 2.6.23-rc4-mm1 as well.
> >
>
> Thanks for doing this. We are building containerstats for
> per container statistics. It would be really nice to provide
> the statistics using that interface. I am not opposed to
> memory.stat, but Paul Menage recommends that one file has
> just one meaningful value.
>
> The other thing is that could you please report all the
> statistics in bytes, we are moving to that interface,
> I've posted patches to do that. If we are going to push
> a bunch of statistics in one file, please use a format
> separator like
>
> name: value
i followed /proc/vmstat.
are you going to convert /proc/vmstat to the format as well?
YAMAMOTO Takashi
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
2007-09-26 1:48 ` YAMAMOTO Takashi
@ 2007-09-26 3:16 ` Balbir Singh
-1 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-09-26 3:16 UTC (permalink / raw)
To: YAMAMOTO Takashi
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
menage-hpIqsD4AKlfQT0dZR+AlfA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg
YAMAMOTO Takashi wrote:
>> YAMAMOTO Takashi wrote:
>>> hi,
>>>
>>> i implemented some statistics for your memory controller.
>>>
>>> it's tested with 2.6.23-rc2-mm2 + memory controller v7.
>>> i think it can be applied to 2.6.23-rc4-mm1 as well.
>>>
>> Thanks for doing this. We are building containerstats for
>> per container statistics. It would be really nice to provide
>> the statistics using that interface. I am not opposed to
>> memory.stat, but Paul Menage recommends that one file has
>> just one meaningful value.
>>
>> The other thing is that could you please report all the
>> statistics in bytes, we are moving to that interface,
>> I've posted patches to do that. If we are going to push
>> a bunch of statistics in one file, please use a format
>> separator like
>>
>> name: value
>
> i followed /proc/vmstat.
> are you going to convert /proc/vmstat to the format as well?
>
I see, no I don't plan to convert /proc/vmstat. I wanted
to make it easier for tools to parse the format. Like
you point out /proc/vmstat uses that format, so I guess
this format is just fine.
Thanks for following up.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
@ 2007-09-26 3:16 ` Balbir Singh
0 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-09-26 3:16 UTC (permalink / raw)
To: YAMAMOTO Takashi; +Cc: containers, minoura, menage, linux-mm
YAMAMOTO Takashi wrote:
>> YAMAMOTO Takashi wrote:
>>> hi,
>>>
>>> i implemented some statistics for your memory controller.
>>>
>>> it's tested with 2.6.23-rc2-mm2 + memory controller v7.
>>> i think it can be applied to 2.6.23-rc4-mm1 as well.
>>>
>> Thanks for doing this. We are building containerstats for
>> per container statistics. It would be really nice to provide
>> the statistics using that interface. I am not opposed to
>> memory.stat, but Paul Menage recommends that one file has
>> just one meaningful value.
>>
>> The other thing is that could you please report all the
>> statistics in bytes, we are moving to that interface,
>> I've posted patches to do that. If we are going to push
>> a bunch of statistics in one file, please use a format
>> separator like
>>
>> name: value
>
> i followed /proc/vmstat.
> are you going to convert /proc/vmstat to the format as well?
>
I see, no I don't plan to convert /proc/vmstat. I wanted
to make it easier for tools to parse the format. Like
you point out /proc/vmstat uses that format, so I guess
this format is just fine.
Thanks for following up.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <20070907033942.4A6541BFA52-Pcsii4f/SVk@public.gmane.org>
2007-09-07 5:02 ` KAMEZAWA Hiroyuki
2007-09-07 9:55 ` Balbir Singh
@ 2007-10-04 5:13 ` YAMAMOTO Takashi
[not found] ` <20071004051332.9BE2C1BF95B-Pcsii4f/SVk@public.gmane.org>
2 siblings, 1 reply; 25+ messages in thread
From: YAMAMOTO Takashi @ 2007-10-04 5:13 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
svaidy-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg
> hi,
>
> i implemented some statistics for your memory controller.
>
> it's tested with 2.6.23-rc2-mm2 + memory controller v7.
> i think it can be applied to 2.6.23-rc4-mm1 as well.
>
> YAMOMOTO Takshi
>
> todo: something like nr_active/inactive in /proc/vmstat.
here's the version i'm working on currently. any comments?
changes from the previous version:
- adapt to 2.6.23-rc8-mm2, container -> cgroup rename.
- reflect some of comments on this list.
- rename some macros as suggested by balbir
- sprinkle some inlines as suggested by balbir.
- remove "isolate" statistics
- remove PAGE_CONTAINER_CACHE hack and
add "flags" member to page_container instead.
- make counters atomic_long_t.
- add some comments.
- coding style.
- implement nr_active/nr_inactive. they show numbers of pages on
per-cgroup lru lists.
todo:
- consider to make counters per-cpu.
- more statistics.
YAMAMOTO Takashi
--- linux-2.6.23-rc8-mm2-stat/mm/memcontrol.c.BACKUP 2007-10-01 17:19:57.000000000 +0900
+++ linux-2.6.23-rc8-mm2-stat/mm/memcontrol.c 2007-10-04 12:42:05.000000000 +0900
@@ -25,6 +25,7 @@
#include <linux/backing-dev.h>
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
@@ -34,6 +35,52 @@
struct cgroup_subsys mem_cgroup_subsys;
static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
+enum mem_cgroup_stat_index {
+ /*
+ * for MEM_CONTAINER_TYPE_ALL, usage == pagecache + rss
+ */
+ MEM_CGROUP_STAT_PAGECACHE, /* # of pages charged as cache */
+ MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */
+
+ /*
+ * redundant; usage == charge - uncharge
+ */
+ MEM_CGROUP_STAT_CHARGE, /* # of pages charged */
+ MEM_CGROUP_STAT_UNCHARGE, /* # of pages uncharged */
+
+ MEM_CGROUP_STAT_ACTIVE, /* # of pages on active_list */
+ MEM_CGROUP_STAT_INACTIVE, /* # of pages on inactive_list */
+
+ MEM_CGROUP_STAT_NSTATS,
+};
+
+static const char * const mem_cgroup_stat_desc[] = {
+ [MEM_CGROUP_STAT_PAGECACHE] = "page_cache",
+ [MEM_CGROUP_STAT_RSS] = "rss",
+ [MEM_CGROUP_STAT_CHARGE] = "charge",
+ [MEM_CGROUP_STAT_UNCHARGE] = "uncharge",
+ [MEM_CGROUP_STAT_ACTIVE] = "nr_active",
+ [MEM_CGROUP_STAT_INACTIVE] = "nr_inactive",
+};
+
+struct mem_cgroup_stat {
+ atomic_long_t count[MEM_CGROUP_STAT_NSTATS];
+};
+
+static inline void mem_cgroup_stat_inc(struct mem_cgroup_stat * stat,
+ enum mem_cgroup_stat_index idx)
+{
+
+ atomic_long_inc(&stat->count[idx]);
+}
+
+static inline void mem_cgroup_stat_dec(struct mem_cgroup_stat * stat,
+ enum mem_cgroup_stat_index idx)
+{
+
+ atomic_long_dec(&stat->count[idx]);
+}
+
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
@@ -63,6 +110,11 @@ struct mem_cgroup {
*/
spinlock_t lru_lock;
unsigned long control_type; /* control RSS or RSS+Pagecache */
+
+ /*
+ * statistics
+ */
+ struct mem_cgroup_stat stat;
};
/*
@@ -83,6 +135,9 @@ struct page_cgroup {
struct mem_cgroup *mem_cgroup;
atomic_t ref_cnt; /* Helpful when pages move b/w */
/* mapped and cached states */
+ int flags;
+#define PCGF_PAGECACHE 1 /* charged as page cache */
+#define PCGF_ACTIVE 2 /* on active_list */
};
enum {
@@ -164,10 +219,24 @@ static void __always_inline unlock_page_
static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
{
- if (active)
- list_move(&pc->lru, &pc->mem_cgroup->active_list);
- else
- list_move(&pc->lru, &pc->mem_cgroup->inactive_list);
+ struct mem_cgroup *mem = pc->mem_cgroup;
+ struct mem_cgroup_stat *stat = &mem->stat;
+
+ if (active) {
+ list_move(&pc->lru, &mem->active_list);
+ if ((pc->flags & PCGF_ACTIVE) == 0) {
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_INACTIVE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_ACTIVE);
+ pc->flags |= PCGF_ACTIVE;
+ }
+ } else {
+ list_move(&pc->lru, &mem->inactive_list);
+ if ((pc->flags & PCGF_ACTIVE) != 0) {
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_ACTIVE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_INACTIVE);
+ pc->flags &= ~PCGF_ACTIVE;
+ }
+ }
}
/*
@@ -256,10 +325,11 @@ unsigned long mem_cgroup_isolate_pages(u
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
-int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, int is_cache)
{
struct mem_cgroup *mem;
+ struct mem_cgroup_stat *stat;
struct page_cgroup *pc, *race_pc;
unsigned long flags;
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -365,8 +435,17 @@ noreclaim:
atomic_set(&pc->ref_cnt, 1);
pc->mem_cgroup = mem;
pc->page = page;
+ pc->flags = (is_cache ? PCGF_PAGECACHE : 0) | PCGF_ACTIVE;
page_assign_page_cgroup(page, pc);
+ stat = &mem->stat;
+ if (is_cache)
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_PAGECACHE);
+ else
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_RSS);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_CHARGE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_ACTIVE);
+
spin_lock_irqsave(&mem->lru_lock, flags);
list_add(&pc->lru, &mem->active_list);
spin_unlock_irqrestore(&mem->lru_lock, flags);
@@ -382,6 +461,14 @@ err:
return -ENOMEM;
}
+int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask)
+{
+
+ return mem_cgroup_charge_common(page, mm, gfp_mask, 0);
+}
+
+
/*
* See if the cached pages should be charged at all?
*/
@@ -394,7 +481,7 @@ int mem_cgroup_cache_charge(struct page
mem = rcu_dereference(mm->mem_cgroup);
if (mem->control_type == MEM_CGROUP_TYPE_ALL)
- return mem_cgroup_charge(page, mm, gfp_mask);
+ return mem_cgroup_charge_common(page, mm, gfp_mask, 1);
else
return 0;
}
@@ -417,7 +504,11 @@ void mem_cgroup_uncharge(struct page_cgr
return;
if (atomic_dec_and_test(&pc->ref_cnt)) {
+ struct mem_cgroup_stat *stat;
+ int is_cache;
+
page = pc->page;
+ is_cache = (pc->flags & PCGF_PAGECACHE) != 0;
lock_page_cgroup(page);
mem = pc->mem_cgroup;
css_put(&mem->css);
@@ -425,7 +516,19 @@ void mem_cgroup_uncharge(struct page_cgr
unlock_page_cgroup(page);
res_counter_uncharge(&mem->res, PAGE_SIZE);
+ stat = &mem->stat;
+ if (is_cache)
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_PAGECACHE);
+ else
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_RSS);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_UNCHARGE);
+ if ((pc->flags & PCGF_ACTIVE) != 0)
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_ACTIVE);
+ else
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_INACTIVE);
+
spin_lock_irqsave(&mem->lru_lock, flags);
+ BUG_ON(list_empty(&pc->lru));
list_del_init(&pc->lru);
spin_unlock_irqrestore(&mem->lru_lock, flags);
kfree(pc);
@@ -517,6 +620,43 @@ static ssize_t mem_control_type_read(str
ppos, buf, s - buf);
}
+static void mem_cgroup_stat_init(struct mem_cgroup_stat *stat)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(stat->count); i++)
+ atomic_long_set(&stat->count[i], 0);
+}
+
+static int mem_control_stat_show(struct seq_file *m, void *arg)
+{
+ struct cgroup *cont = m->private;
+ struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
+ struct mem_cgroup_stat *stat = &mem_cont->stat;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(stat->count); i++)
+ seq_printf(m, "%s %u\n", mem_cgroup_stat_desc[i],
+ (unsigned int)atomic_long_read(&stat->count[i]));
+
+ return 0;
+}
+
+static const struct file_operations mem_control_stat_file_operations = {
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int mem_control_stat_open(struct inode *unused, struct file *file)
+{
+ /* XXX __d_cont */
+ struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
+
+ file->f_op = &mem_control_stat_file_operations;
+ return single_open(file, mem_control_stat_show, cont);
+}
+
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
@@ -539,6 +679,10 @@ static struct cftype mem_cgroup_files[]
.write = mem_control_type_write,
.read = mem_control_type_read,
},
+ {
+ .name = "stat",
+ .open = mem_control_stat_open,
+ },
};
static struct mem_cgroup init_mem_cgroup;
@@ -562,6 +706,7 @@ mem_cgroup_create(struct cgroup_subsys *
INIT_LIST_HEAD(&mem->inactive_list);
spin_lock_init(&mem->lru_lock);
mem->control_type = MEM_CGROUP_TYPE_ALL;
+ mem_cgroup_stat_init(&mem->stat);
return &mem->css;
}
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <20071004051332.9BE2C1BF95B-Pcsii4f/SVk@public.gmane.org>
@ 2007-10-05 3:51 ` Balbir Singh
[not found] ` <4705B4AA.4070604-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-10-10 1:01 ` YAMAMOTO Takashi
1 sibling, 1 reply; 25+ messages in thread
From: Balbir Singh @ 2007-10-05 3:51 UTC (permalink / raw)
To: YAMAMOTO Takashi
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg
YAMAMOTO Takashi wrote:
>> hi,
>>
>> i implemented some statistics for your memory controller.
>>
>> it's tested with 2.6.23-rc2-mm2 + memory controller v7.
>> i think it can be applied to 2.6.23-rc4-mm1 as well.
>>
>> YAMOMOTO Takshi
>>
>> todo: something like nr_active/inactive in /proc/vmstat.
>
> here's the version i'm working on currently. any comments?
>
> changes from the previous version:
>
> - adapt to 2.6.23-rc8-mm2, container -> cgroup rename.
> - reflect some of comments on this list.
> - rename some macros as suggested by balbir
> - sprinkle some inlines as suggested by balbir.
> - remove "isolate" statistics
> - remove PAGE_CONTAINER_CACHE hack and
> add "flags" member to page_container instead.
> - make counters atomic_long_t.
> - add some comments.
> - coding style.
> - implement nr_active/nr_inactive. they show numbers of pages on
> per-cgroup lru lists.
>
> todo:
> - consider to make counters per-cpu.
> - more statistics.
>
> YAMAMOTO Takashi
Hi, YAMAMOTO-San
Looks much better, I did a quick review, it looks nice to me.
I'll test the patches and get back.
Thank you for working on this,
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <4705B4AA.4070604-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2007-10-07 5:56 ` Balbir Singh
[not found] ` <4708752B.9080107-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
0 siblings, 1 reply; 25+ messages in thread
From: Balbir Singh @ 2007-10-07 5:56 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
YAMAMOTO Takashi
Balbir Singh wrote:
> YAMAMOTO Takashi wrote:
>>> hi,
>>>
>>> i implemented some statistics for your memory controller.
>>>
>>> it's tested with 2.6.23-rc2-mm2 + memory controller v7.
>>> i think it can be applied to 2.6.23-rc4-mm1 as well.
>>>
>>> YAMOMOTO Takshi
>>>
>>> todo: something like nr_active/inactive in /proc/vmstat.
>> here's the version i'm working on currently. any comments?
>>
>> changes from the previous version:
>>
>> - adapt to 2.6.23-rc8-mm2, container -> cgroup rename.
>> - reflect some of comments on this list.
>> - rename some macros as suggested by balbir
>> - sprinkle some inlines as suggested by balbir.
>> - remove "isolate" statistics
>> - remove PAGE_CONTAINER_CACHE hack and
>> add "flags" member to page_container instead.
>> - make counters atomic_long_t.
>> - add some comments.
>> - coding style.
>> - implement nr_active/nr_inactive. they show numbers of pages on
>> per-cgroup lru lists.
>>
>> todo:
>> - consider to make counters per-cpu.
>> - more statistics.
>>
>> YAMAMOTO Takashi
>
> Hi, YAMAMOTO-San
>
> Looks much better, I did a quick review, it looks nice to me.
> I'll test the patches and get back.
>
> Thank you for working on this,
>
Forgot to mention one other detail, could we track the statistics
in bytes? We track usage and limit in bytes currently. I am open
to opinions on this, but the last time we discussed numbers,
tracking bytes was the preferred approach.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <4708752B.9080107-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2007-10-08 22:55 ` YAMAMOTO Takashi
[not found] ` <20071008225528.7A0481BF486-Pcsii4f/SVk@public.gmane.org>
0 siblings, 1 reply; 25+ messages in thread
From: YAMAMOTO Takashi @ 2007-10-08 22:55 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg
> Forgot to mention one other detail, could we track the statistics
> in bytes? We track usage and limit in bytes currently. I am open
> to opinions on this, but the last time we discussed numbers,
> tracking bytes was the preferred approach.
while /proc/vmstat and others uses number of pages?
IMO, numbers of pages make more sense for this kind of statistics.
however, if there's a consensus, i can follow. after all,
it's merely a matter of taste.
YAMAMOTO Takashi
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <20071008225528.7A0481BF486-Pcsii4f/SVk@public.gmane.org>
@ 2007-10-09 3:11 ` Balbir Singh
0 siblings, 0 replies; 25+ messages in thread
From: Balbir Singh @ 2007-10-09 3:11 UTC (permalink / raw)
To: YAMAMOTO Takashi
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg
YAMAMOTO Takashi wrote:
>> Forgot to mention one other detail, could we track the statistics
>> in bytes? We track usage and limit in bytes currently. I am open
>> to opinions on this, but the last time we discussed numbers,
>> tracking bytes was the preferred approach.
>
> while /proc/vmstat and others uses number of pages?
> IMO, numbers of pages make more sense for this kind of statistics.
>
Yes, I thought so too, but consensus was built around using
bytes for limit and usage for the memory controller. Keeping
all stats in the same units would make it less confusing for users.
> however, if there's a consensus, i can follow. after all,
> it's merely a matter of taste.
>
I agree
> YAMAMOTO Takashi
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <20071004051332.9BE2C1BF95B-Pcsii4f/SVk@public.gmane.org>
2007-10-05 3:51 ` Balbir Singh
@ 2007-10-10 1:01 ` YAMAMOTO Takashi
[not found] ` <20071010010117.D9A821BEEE7-Pcsii4f/SVk@public.gmane.org>
1 sibling, 1 reply; 25+ messages in thread
From: YAMAMOTO Takashi @ 2007-10-10 1:01 UTC (permalink / raw)
To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
svaidy-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg
hi,
> > i implemented some statistics for your memory controller.
here's a new version.
changes from the previous:
- make counters per-cpu.
- value *= PAGE_SIZE
YAMAMOTO Takashi
--- linux-2.6.23-rc8-mm2-stat/mm/memcontrol.c.BACKUP 2007-10-01 17:19:57.000000000 +0900
+++ linux-2.6.23-rc8-mm2-stat/mm/memcontrol.c 2007-10-09 13:41:43.000000000 +0900
@@ -25,6 +25,7 @@
#include <linux/backing-dev.h>
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
@@ -34,6 +35,63 @@
struct cgroup_subsys mem_cgroup_subsys;
static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
+enum mem_cgroup_stat_index {
+ /*
+ * for MEM_CONTAINER_TYPE_ALL, usage == pagecache + rss
+ */
+ MEM_CGROUP_STAT_PAGECACHE, /* # of pages charged as cache */
+ MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */
+
+ /*
+ * redundant; usage == charge - uncharge
+ */
+ MEM_CGROUP_STAT_CHARGE, /* # of pages charged */
+ MEM_CGROUP_STAT_UNCHARGE, /* # of pages uncharged */
+
+ MEM_CGROUP_STAT_ACTIVE, /* # of pages on active_list */
+ MEM_CGROUP_STAT_INACTIVE, /* # of pages on inactive_list */
+
+ MEM_CGROUP_STAT_NSTATS,
+};
+
+static const struct mem_cgroup_stat_desc {
+ const char *msg;
+ u64 unit;
+} mem_cgroup_stat_desc[] = {
+ [MEM_CGROUP_STAT_PAGECACHE] = { "page_cache", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_CHARGE] = { "charge", PAGE_SIZE },
+ [MEM_CGROUP_STAT_UNCHARGE] = { "uncharge", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_ACTIVE] = { "active", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_INACTIVE] = { "inactive", PAGE_SIZE, },
+};
+
+struct mem_cgroup_stat_cpu {
+ u64 count[MEM_CGROUP_STAT_NSTATS];
+} ____cacheline_aligned_in_smp;
+
+struct mem_cgroup_stat {
+ struct mem_cgroup_stat_cpu cpustat[NR_CPUS];
+};
+
+static inline void mem_cgroup_stat_inc(struct mem_cgroup_stat * stat,
+ enum mem_cgroup_stat_index idx)
+{
+ unsigned int cpu = get_cpu();
+
+ stat->cpustat[cpu].count[idx]++;
+ put_cpu();
+}
+
+static inline void mem_cgroup_stat_dec(struct mem_cgroup_stat * stat,
+ enum mem_cgroup_stat_index idx)
+{
+ unsigned int cpu = get_cpu();
+
+ stat->cpustat[cpu].count[idx]--;
+ put_cpu();
+}
+
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
@@ -63,6 +121,11 @@ struct mem_cgroup {
*/
spinlock_t lru_lock;
unsigned long control_type; /* control RSS or RSS+Pagecache */
+
+ /*
+ * statistics
+ */
+ struct mem_cgroup_stat stat;
};
/*
@@ -83,6 +146,9 @@ struct page_cgroup {
struct mem_cgroup *mem_cgroup;
atomic_t ref_cnt; /* Helpful when pages move b/w */
/* mapped and cached states */
+ int flags;
+#define PCGF_PAGECACHE 1 /* charged as page cache */
+#define PCGF_ACTIVE 2 /* on active_list */
};
enum {
@@ -164,10 +230,24 @@ static void __always_inline unlock_page_
static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
{
- if (active)
- list_move(&pc->lru, &pc->mem_cgroup->active_list);
- else
- list_move(&pc->lru, &pc->mem_cgroup->inactive_list);
+ struct mem_cgroup *mem = pc->mem_cgroup;
+ struct mem_cgroup_stat *stat = &mem->stat;
+
+ if (active) {
+ list_move(&pc->lru, &mem->active_list);
+ if ((pc->flags & PCGF_ACTIVE) == 0) {
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_INACTIVE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_ACTIVE);
+ pc->flags |= PCGF_ACTIVE;
+ }
+ } else {
+ list_move(&pc->lru, &mem->inactive_list);
+ if ((pc->flags & PCGF_ACTIVE) != 0) {
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_ACTIVE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_INACTIVE);
+ pc->flags &= ~PCGF_ACTIVE;
+ }
+ }
}
/*
@@ -256,10 +336,11 @@ unsigned long mem_cgroup_isolate_pages(u
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
-int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, int is_cache)
{
struct mem_cgroup *mem;
+ struct mem_cgroup_stat *stat;
struct page_cgroup *pc, *race_pc;
unsigned long flags;
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -365,8 +446,17 @@ noreclaim:
atomic_set(&pc->ref_cnt, 1);
pc->mem_cgroup = mem;
pc->page = page;
+ pc->flags = (is_cache ? PCGF_PAGECACHE : 0) | PCGF_ACTIVE;
page_assign_page_cgroup(page, pc);
+ stat = &mem->stat;
+ if (is_cache)
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_PAGECACHE);
+ else
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_RSS);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_CHARGE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_ACTIVE);
+
spin_lock_irqsave(&mem->lru_lock, flags);
list_add(&pc->lru, &mem->active_list);
spin_unlock_irqrestore(&mem->lru_lock, flags);
@@ -382,6 +472,14 @@ err:
return -ENOMEM;
}
+int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask)
+{
+
+ return mem_cgroup_charge_common(page, mm, gfp_mask, 0);
+}
+
+
/*
* See if the cached pages should be charged at all?
*/
@@ -394,7 +492,7 @@ int mem_cgroup_cache_charge(struct page
mem = rcu_dereference(mm->mem_cgroup);
if (mem->control_type == MEM_CGROUP_TYPE_ALL)
- return mem_cgroup_charge(page, mm, gfp_mask);
+ return mem_cgroup_charge_common(page, mm, gfp_mask, 1);
else
return 0;
}
@@ -417,7 +515,11 @@ void mem_cgroup_uncharge(struct page_cgr
return;
if (atomic_dec_and_test(&pc->ref_cnt)) {
+ struct mem_cgroup_stat *stat;
+ int is_cache;
+
page = pc->page;
+ is_cache = (pc->flags & PCGF_PAGECACHE) != 0;
lock_page_cgroup(page);
mem = pc->mem_cgroup;
css_put(&mem->css);
@@ -425,7 +527,19 @@ void mem_cgroup_uncharge(struct page_cgr
unlock_page_cgroup(page);
res_counter_uncharge(&mem->res, PAGE_SIZE);
+ stat = &mem->stat;
+ if (is_cache)
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_PAGECACHE);
+ else
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_RSS);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_UNCHARGE);
+ if ((pc->flags & PCGF_ACTIVE) != 0)
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_ACTIVE);
+ else
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_INACTIVE);
+
spin_lock_irqsave(&mem->lru_lock, flags);
+ BUG_ON(list_empty(&pc->lru));
list_del_init(&pc->lru);
spin_unlock_irqrestore(&mem->lru_lock, flags);
kfree(pc);
@@ -517,6 +631,49 @@ static ssize_t mem_control_type_read(str
ppos, buf, s - buf);
}
+static void mem_cgroup_stat_init(struct mem_cgroup_stat *stat)
+{
+
+ memset(stat, 0, sizeof(*stat));
+}
+
+static int mem_control_stat_show(struct seq_file *m, void *arg)
+{
+ struct cgroup *cont = m->private;
+ struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
+ struct mem_cgroup_stat *stat = &mem_cont->stat;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(stat->cpustat[0].count); i++) {
+ unsigned int cpu;
+ u64 val;
+
+ val = 0;
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ val += stat->cpustat[cpu].count[i];
+
+ seq_printf(m, "%s %llu\n", mem_cgroup_stat_desc[i].msg,
+ (unsigned long long)(val * mem_cgroup_stat_desc[i].unit));
+ }
+
+ return 0;
+}
+
+static const struct file_operations mem_control_stat_file_operations = {
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int mem_control_stat_open(struct inode *unused, struct file *file)
+{
+ /* XXX __d_cont */
+ struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
+
+ file->f_op = &mem_control_stat_file_operations;
+ return single_open(file, mem_control_stat_show, cont);
+}
+
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
@@ -539,6 +696,10 @@ static struct cftype mem_cgroup_files[]
.write = mem_control_type_write,
.read = mem_control_type_read,
},
+ {
+ .name = "stat",
+ .open = mem_control_stat_open,
+ },
};
static struct mem_cgroup init_mem_cgroup;
@@ -562,6 +723,7 @@ mem_cgroup_create(struct cgroup_subsys *
INIT_LIST_HEAD(&mem->inactive_list);
spin_lock_init(&mem->lru_lock);
mem->control_type = MEM_CGROUP_TYPE_ALL;
+ mem_cgroup_stat_init(&mem->stat);
return &mem->css;
}
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <20071010010117.D9A821BEEE7-Pcsii4f/SVk@public.gmane.org>
@ 2007-10-10 6:44 ` KAMEZAWA Hiroyuki
[not found] ` <20071010154415.951943e7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
0 siblings, 1 reply; 25+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-10-10 6:44 UTC (permalink / raw)
To: YAMAMOTO Takashi
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
On Wed, 10 Oct 2007 10:01:17 +0900 (JST)
yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org (YAMAMOTO Takashi) wrote:
> hi,
>
> > > i implemented some statistics for your memory controller.
>
> here's a new version.
>
> changes from the previous:
> - make counters per-cpu.
> - value *= PAGE_SIZE
>
YAMAMOTO-san, I like this work and the patch seems good.
But will HUNK with my work to some extent ;)
So, I'd like to merge this patch on my patch set (against -mm) if you don't mind.
If it's ok, please give your "Signed-off-by" line. Then, I'll merge this to mine.
Thanks,
-Kame
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [RFC] [PATCH] memory controller statistics
[not found] ` <20071010154415.951943e7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
@ 2007-10-10 8:19 ` YAMAMOTO Takashi
0 siblings, 0 replies; 25+ messages in thread
From: YAMAMOTO Takashi @ 2007-10-10 8:19 UTC (permalink / raw)
To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
Cc: containers-qjLDD68F18O7TbgM5vRIOg, minoura-jCdQPDEk3idL9jVzuh4AOg,
balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
> On Wed, 10 Oct 2007 10:01:17 +0900 (JST)
> yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org (YAMAMOTO Takashi) wrote:
>
> > hi,
> >
> > > > i implemented some statistics for your memory controller.
> >
> > here's a new version.
> >
> > changes from the previous:
> > - make counters per-cpu.
> > - value *= PAGE_SIZE
> >
>
> YAMAMOTO-san, I like this work and the patch seems good.
> But will HUNK with my work to some extent ;)
>
> So, I'd like to merge this patch on my patch set (against -mm) if you don't mind.
> If it's ok, please give your "Signed-off-by" line. Then, I'll merge this to mine.
>
> Thanks,
> -Kame
sure. here's the latest version.
changes from the previous:
- fix a race in uncharge. (check PCGF_ACTIVE with mem->lru_lock held)
YAMAMOTO Takashi
Signed-off-by: YAMAMOTO Takashi <yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org>
---
--- linux-2.6.23-rc8-mm2-stat/mm/memcontrol.c.BACKUP 2007-10-01 17:19:57.000000000 +0900
+++ linux-2.6.23-rc8-mm2-stat/mm/memcontrol.c 2007-10-10 12:40:48.000000000 +0900
@@ -25,6 +25,7 @@
#include <linux/backing-dev.h>
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
@@ -34,6 +35,63 @@
struct cgroup_subsys mem_cgroup_subsys;
static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
+enum mem_cgroup_stat_index {
+ /*
+ * for MEM_CONTAINER_TYPE_ALL, usage == pagecache + rss
+ */
+ MEM_CGROUP_STAT_PAGECACHE, /* # of pages charged as cache */
+ MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */
+
+ /*
+ * redundant; usage == charge - uncharge
+ */
+ MEM_CGROUP_STAT_CHARGE, /* # of pages charged */
+ MEM_CGROUP_STAT_UNCHARGE, /* # of pages uncharged */
+
+ MEM_CGROUP_STAT_ACTIVE, /* # of pages on active_list */
+ MEM_CGROUP_STAT_INACTIVE, /* # of pages on inactive_list */
+
+ MEM_CGROUP_STAT_NSTATS,
+};
+
+static const struct mem_cgroup_stat_desc {
+ const char *msg;
+ u64 unit;
+} mem_cgroup_stat_desc[] = {
+ [MEM_CGROUP_STAT_PAGECACHE] = { "page_cache", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_CHARGE] = { "charge", PAGE_SIZE },
+ [MEM_CGROUP_STAT_UNCHARGE] = { "uncharge", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_ACTIVE] = { "active", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_INACTIVE] = { "inactive", PAGE_SIZE, },
+};
+
+struct mem_cgroup_stat_cpu {
+ u64 count[MEM_CGROUP_STAT_NSTATS];
+} ____cacheline_aligned_in_smp;
+
+struct mem_cgroup_stat {
+ struct mem_cgroup_stat_cpu cpustat[NR_CPUS];
+};
+
+static inline void mem_cgroup_stat_inc(struct mem_cgroup_stat * stat,
+ enum mem_cgroup_stat_index idx)
+{
+ unsigned int cpu = get_cpu();
+
+ stat->cpustat[cpu].count[idx]++;
+ put_cpu();
+}
+
+static inline void mem_cgroup_stat_dec(struct mem_cgroup_stat * stat,
+ enum mem_cgroup_stat_index idx)
+{
+ unsigned int cpu = get_cpu();
+
+ stat->cpustat[cpu].count[idx]--;
+ put_cpu();
+}
+
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
@@ -63,6 +121,11 @@ struct mem_cgroup {
*/
spinlock_t lru_lock;
unsigned long control_type; /* control RSS or RSS+Pagecache */
+
+ /*
+ * statistics
+ */
+ struct mem_cgroup_stat stat;
};
/*
@@ -83,6 +146,9 @@ struct page_cgroup {
struct mem_cgroup *mem_cgroup;
atomic_t ref_cnt; /* Helpful when pages move b/w */
/* mapped and cached states */
+ int flags;
+#define PCGF_PAGECACHE 1 /* charged as page cache */
+#define PCGF_ACTIVE 2 /* on active_list */
};
enum {
@@ -164,10 +230,24 @@ static void __always_inline unlock_page_
static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
{
- if (active)
- list_move(&pc->lru, &pc->mem_cgroup->active_list);
- else
- list_move(&pc->lru, &pc->mem_cgroup->inactive_list);
+ struct mem_cgroup *mem = pc->mem_cgroup;
+ struct mem_cgroup_stat *stat = &mem->stat;
+
+ if (active) {
+ list_move(&pc->lru, &mem->active_list);
+ if ((pc->flags & PCGF_ACTIVE) == 0) {
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_INACTIVE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_ACTIVE);
+ pc->flags |= PCGF_ACTIVE;
+ }
+ } else {
+ list_move(&pc->lru, &mem->inactive_list);
+ if ((pc->flags & PCGF_ACTIVE) != 0) {
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_ACTIVE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_INACTIVE);
+ pc->flags &= ~PCGF_ACTIVE;
+ }
+ }
}
/*
@@ -256,10 +336,11 @@ unsigned long mem_cgroup_isolate_pages(u
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
-int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, int is_cache)
{
struct mem_cgroup *mem;
+ struct mem_cgroup_stat *stat;
struct page_cgroup *pc, *race_pc;
unsigned long flags;
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -365,8 +446,17 @@ noreclaim:
atomic_set(&pc->ref_cnt, 1);
pc->mem_cgroup = mem;
pc->page = page;
+ pc->flags = (is_cache ? PCGF_PAGECACHE : 0) | PCGF_ACTIVE;
page_assign_page_cgroup(page, pc);
+ stat = &mem->stat;
+ if (is_cache)
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_PAGECACHE);
+ else
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_RSS);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_CHARGE);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_ACTIVE);
+
spin_lock_irqsave(&mem->lru_lock, flags);
list_add(&pc->lru, &mem->active_list);
spin_unlock_irqrestore(&mem->lru_lock, flags);
@@ -382,6 +472,14 @@ err:
return -ENOMEM;
}
+int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask)
+{
+
+ return mem_cgroup_charge_common(page, mm, gfp_mask, 0);
+}
+
+
/*
* See if the cached pages should be charged at all?
*/
@@ -394,7 +492,7 @@ int mem_cgroup_cache_charge(struct page
mem = rcu_dereference(mm->mem_cgroup);
if (mem->control_type == MEM_CGROUP_TYPE_ALL)
- return mem_cgroup_charge(page, mm, gfp_mask);
+ return mem_cgroup_charge_common(page, mm, gfp_mask, 1);
else
return 0;
}
@@ -417,17 +515,34 @@ void mem_cgroup_uncharge(struct page_cgr
return;
if (atomic_dec_and_test(&pc->ref_cnt)) {
+ struct mem_cgroup_stat *stat;
+ int oflags;
+
page = pc->page;
lock_page_cgroup(page);
mem = pc->mem_cgroup;
- css_put(&mem->css);
page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);
res_counter_uncharge(&mem->res, PAGE_SIZE);
spin_lock_irqsave(&mem->lru_lock, flags);
+ oflags = pc->flags;
+ BUG_ON(list_empty(&pc->lru));
list_del_init(&pc->lru);
spin_unlock_irqrestore(&mem->lru_lock, flags);
+
+ stat = &mem->stat;
+ if ((oflags & PCGF_ACTIVE) != 0)
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_ACTIVE);
+ else
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_INACTIVE);
+ if ((oflags & PCGF_PAGECACHE) != 0)
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_PAGECACHE);
+ else
+ mem_cgroup_stat_dec(stat, MEM_CGROUP_STAT_RSS);
+ mem_cgroup_stat_inc(stat, MEM_CGROUP_STAT_UNCHARGE);
+
+ css_put(&mem->css);
kfree(pc);
}
}
@@ -517,6 +632,49 @@ static ssize_t mem_control_type_read(str
ppos, buf, s - buf);
}
+static void mem_cgroup_stat_init(struct mem_cgroup_stat *stat)
+{
+
+ memset(stat, 0, sizeof(*stat));
+}
+
+static int mem_control_stat_show(struct seq_file *m, void *arg)
+{
+ struct cgroup *cont = m->private;
+ struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
+ struct mem_cgroup_stat *stat = &mem_cont->stat;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(stat->cpustat[0].count); i++) {
+ unsigned int cpu;
+ u64 val;
+
+ val = 0;
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ val += stat->cpustat[cpu].count[i];
+
+ seq_printf(m, "%s %llu\n", mem_cgroup_stat_desc[i].msg,
+ (unsigned long long)(val * mem_cgroup_stat_desc[i].unit));
+ }
+
+ return 0;
+}
+
+static const struct file_operations mem_control_stat_file_operations = {
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int mem_control_stat_open(struct inode *unused, struct file *file)
+{
+ /* XXX __d_cont */
+ struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
+
+ file->f_op = &mem_control_stat_file_operations;
+ return single_open(file, mem_control_stat_show, cont);
+}
+
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
@@ -539,6 +697,10 @@ static struct cftype mem_cgroup_files[]
.write = mem_control_type_write,
.read = mem_control_type_read,
},
+ {
+ .name = "stat",
+ .open = mem_control_stat_open,
+ },
};
static struct mem_cgroup init_mem_cgroup;
@@ -562,6 +724,7 @@ mem_cgroup_create(struct cgroup_subsys *
INIT_LIST_HEAD(&mem->inactive_list);
spin_lock_init(&mem->lru_lock);
mem->control_type = MEM_CGROUP_TYPE_ALL;
+ mem_cgroup_stat_init(&mem->stat);
return &mem->css;
}
^ permalink raw reply [flat|nested] 25+ messages in thread
end of thread, other threads:[~2007-10-10 8:19 UTC | newest]
Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-09-07 3:39 [RFC] [PATCH] memory controller statistics YAMAMOTO Takashi
[not found] ` <20070907033942.4A6541BFA52-Pcsii4f/SVk@public.gmane.org>
2007-09-07 5:02 ` KAMEZAWA Hiroyuki
[not found] ` <20070907140234.4fe1075d.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-09-07 5:38 ` YAMAMOTO Takashi
2007-09-07 9:55 ` Balbir Singh
2007-09-07 9:55 ` Balbir Singh
[not found] ` <46E12020.1060203-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-09-10 0:32 ` 箕浦真
2007-09-10 0:32 ` 箕浦真
[not found] ` <kk5tzq39x7h.fsf-VZKBDE94do7Xw16qUzMy135BbFPdSy0r@public.gmane.org>
2007-09-10 8:26 ` Balbir Singh
2007-09-10 8:26 ` Balbir Singh
2007-09-10 23:21 ` Paul Menage
2007-09-10 23:21 ` Paul Menage
[not found] ` <6599ad830709101621r2f1763cfpa0924f884d0ee2c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-09-11 2:39 ` Balbir Singh
2007-09-11 2:39 ` Balbir Singh
2007-09-26 1:48 ` YAMAMOTO Takashi
2007-09-26 1:48 ` YAMAMOTO Takashi
[not found] ` <20070926014843.161E61BFA33-Pcsii4f/SVk@public.gmane.org>
2007-09-26 3:16 ` Balbir Singh
2007-09-26 3:16 ` Balbir Singh
2007-10-04 5:13 ` YAMAMOTO Takashi
[not found] ` <20071004051332.9BE2C1BF95B-Pcsii4f/SVk@public.gmane.org>
2007-10-05 3:51 ` Balbir Singh
[not found] ` <4705B4AA.4070604-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-10-07 5:56 ` Balbir Singh
[not found] ` <4708752B.9080107-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-10-08 22:55 ` YAMAMOTO Takashi
[not found] ` <20071008225528.7A0481BF486-Pcsii4f/SVk@public.gmane.org>
2007-10-09 3:11 ` Balbir Singh
2007-10-10 1:01 ` YAMAMOTO Takashi
[not found] ` <20071010010117.D9A821BEEE7-Pcsii4f/SVk@public.gmane.org>
2007-10-10 6:44 ` KAMEZAWA Hiroyuki
[not found] ` <20071010154415.951943e7.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-10 8:19 ` YAMAMOTO Takashi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.