From: Balbir Singh <balbir@linux.vnet.ibm.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>
Subject: Re: [RFC][PATCH 2/5] memcg: uncharge in batched manner
Date: Mon, 31 Aug 2009 16:32:04 +0530 [thread overview]
Message-ID: <20090831110204.GG4770@balbir.in.ibm.com> (raw)
In-Reply-To: <20090828132438.b33828bc.kamezawa.hiroyu@jp.fujitsu.com>
* KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> [2009-08-28 13:24:38]:
>
> In massive parallel enviroment, res_counter can be a performance bottleneck.
> This patch is a trial for reducing lock contention.
> One strong techinque to reduce lock contention is reducing calls by
> batching some amount of calls int one.
>
> Considering charge/uncharge chatacteristic,
> - charge is done one by one via demand-paging.
> - uncharge is done by
> - in chunk at munmap, truncate, exit, execve...
> - one by one via vmscan/paging.
>
> It seems we hace a chance to batched-uncharge.
> This patch is a base patch for batched uncharge. For avoiding
> scattering memcg's structure, this patch adds memcg batch uncharge
> information to the task. please see start/end usage in next patch.
>
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> ---
> include/linux/memcontrol.h | 12 +++++++
> include/linux/sched.h | 7 ++++
> mm/memcontrol.c | 70 +++++++++++++++++++++++++++++++++++++++++----
> 3 files changed, 83 insertions(+), 6 deletions(-)
>
> Index: mmotm-2.6.31-Aug27/include/linux/memcontrol.h
> ===================================================================
> --- mmotm-2.6.31-Aug27.orig/include/linux/memcontrol.h
> +++ mmotm-2.6.31-Aug27/include/linux/memcontrol.h
> @@ -54,6 +54,10 @@ extern void mem_cgroup_rotate_lru_list(s
> extern void mem_cgroup_del_lru(struct page *page);
> extern void mem_cgroup_move_lists(struct page *page,
> enum lru_list from, enum lru_list to);
> +
> +extern void mem_cgroup_uncharge_batch_start(void);
> +extern void mem_cgroup_uncharge_batch_end(void);
> +
> extern void mem_cgroup_uncharge_page(struct page *page);
> extern void mem_cgroup_uncharge_cache_page(struct page *page);
> extern int mem_cgroup_shmem_charge_fallback(struct page *page,
> @@ -151,6 +155,14 @@ static inline void mem_cgroup_cancel_cha
> {
> }
>
> +static inline void mem_cgroup_uncharge_batch_start(void)
> +{
> +}
> +
> +static inline void mem_cgroup_uncharge_batch_start(void)
> +{
> +}
> +
> static inline void mem_cgroup_uncharge_page(struct page *page)
> {
> }
> Index: mmotm-2.6.31-Aug27/mm/memcontrol.c
> ===================================================================
> --- mmotm-2.6.31-Aug27.orig/mm/memcontrol.c
> +++ mmotm-2.6.31-Aug27/mm/memcontrol.c
> @@ -1837,7 +1837,35 @@ void mem_cgroup_cancel_charge_swapin(str
> css_put(&mem->css);
> }
>
> +static bool
> +__do_batch_uncharge(struct mem_cgroup *mem, const enum charge_type ctype)
> +{
> + struct memcg_batch_info *batch = NULL;
> + bool uncharge_memsw;
> + /* If swapout, usage of swap doesn't decrease */
> + if (do_swap_account && (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> + uncharge_memsw = false;
> + else
> + uncharge_memsw = true;
>
> + if (current->memcg_batch.do_batch) {
> + batch = ¤t->memcg_batch;
> + if (batch->memcg == NULL) {
> + batch->memcg = mem;
> + css_get(&mem->css);
> + }
> + }
> + if (!batch || batch->memcg != mem) {
> + res_counter_uncharge(&mem->res, PAGE_SIZE);
> + if (uncharge_memsw)
> + res_counter_uncharge(&mem->memsw, PAGE_SIZE);
Could you please add a comment stating that if memcg is different that
we do a direct uncharge else we batch.
> + } else {
> + batch->pages += PAGE_SIZE;
> + if (uncharge_memsw)
> + batch->memsw += PAGE_SIZE;
> + }
> + return soft_limit_excess;
> +}
> /*
> * uncharge if !page_mapped(page)
> */
> @@ -1886,12 +1914,8 @@ __mem_cgroup_uncharge_common(struct page
> break;
> }
>
> - if (!mem_cgroup_is_root(mem)) {
> - res_counter_uncharge(&mem->res, PAGE_SIZE);
> - if (do_swap_account &&
> - (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> - res_counter_uncharge(&mem->memsw, PAGE_SIZE);
> - }
> + if (!mem_cgroup_is_root(mem))
> + __do_batch_uncharge(mem, ctype);
Now I am beginning to think we need a cond_mem_cgroup_is_not_root()
function.
> if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
> mem_cgroup_swap_statistics(mem, true);
> mem_cgroup_charge_statistics(mem, pc, false);
> @@ -1938,6 +1962,40 @@ void mem_cgroup_uncharge_cache_page(stru
> __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
> }
>
> +void mem_cgroup_uncharge_batch_start(void)
> +{
> + VM_BUG_ON(current->memcg_batch.do_batch);
> + /* avoid batch if killed by OOM */
> + if (test_thread_flag(TIF_MEMDIE))
> + return;
> + current->memcg_batch.do_batch = 1;
> + current->memcg_batch.memcg = NULL;
> + current->memcg_batch.pages = 0;
> + current->memcg_batch.memsw = 0;
> +}
> +
> +void mem_cgroup_uncharge_batch_end(void)
> +{
> + struct mem_cgroup *mem;
> +
> + if (!current->memcg_batch.do_batch)
> + return;
> +
> + current->memcg_batch.do_batch = 0;
> +
> + mem = current->memcg_batch.memcg;
> + if (!mem)
> + return;
> + if (current->memcg_batch.pages)
> + res_counter_uncharge(&mem->res,
> + current->memcg_batch.pages, NULL);
> + if (current->memcg_batch.memsw)
> + res_counter_uncharge(&mem->memsw,
> + current->memcg_batch.memsw, NULL);
> + /* we got css's refcnt */
> + cgroup_release_and_wakeup_rmdir(&mem->css);
Does this effect deleting of a group and delay it by a large amount?
> +}
> +
> #ifdef CONFIG_SWAP
> /*
> * called after __delete_from_swap_cache() and drop "page" account.
> Index: mmotm-2.6.31-Aug27/include/linux/sched.h
> ===================================================================
> --- mmotm-2.6.31-Aug27.orig/include/linux/sched.h
> +++ mmotm-2.6.31-Aug27/include/linux/sched.h
> @@ -1540,6 +1540,13 @@ struct task_struct {
> unsigned long trace_recursion;
> #endif /* CONFIG_TRACING */
> unsigned long stack_start;
> +#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
> + struct memcg_batch_info {
> + bool do_batch;
> + struct mem_cgroup *memcg;
> + long pages, memsw;
> + } memcg_batch;
> +#endif
> };
>
> /* Future-safe accessor for struct task_struct's cpus_allowed. */
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
Balbir
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2009-08-31 11:02 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-08-28 4:20 [RFC][PATCH 0/5] memcg: reduce lock conetion KAMEZAWA Hiroyuki
2009-08-28 4:23 ` [RFC][PATCH 1/5] memcg: change for softlimit KAMEZAWA Hiroyuki
2009-08-28 7:20 ` Balbir Singh
2009-08-28 7:35 ` KAMEZAWA Hiroyuki
2009-08-28 13:26 ` Balbir Singh
2009-08-28 14:29 ` KAMEZAWA Hiroyuki
2009-08-28 14:40 ` KAMEZAWA Hiroyuki
2009-08-28 14:46 ` Balbir Singh
2009-08-28 15:06 ` KAMEZAWA Hiroyuki
2009-08-28 15:08 ` Balbir Singh
2009-08-28 15:12 ` KAMEZAWA Hiroyuki
2009-08-28 15:15 ` Balbir Singh
2009-08-28 14:45 ` Balbir Singh
2009-08-28 14:58 ` KAMEZAWA Hiroyuki
2009-08-28 15:07 ` Balbir Singh
2009-08-28 4:24 ` [RFC][PATCH 2/5] memcg: uncharge in batched manner KAMEZAWA Hiroyuki
2009-08-28 4:53 ` KAMEZAWA Hiroyuki
2009-08-28 4:55 ` KAMEZAWA Hiroyuki
2009-08-28 15:10 ` Balbir Singh
2009-08-28 15:21 ` KAMEZAWA Hiroyuki
2009-08-28 16:03 ` Balbir Singh
2009-08-31 11:02 ` Balbir Singh [this message]
2009-08-31 11:59 ` KAMEZAWA Hiroyuki
2009-08-31 12:10 ` Balbir Singh
2009-08-31 12:14 ` KAMEZAWA Hiroyuki
2009-08-31 12:23 ` Balbir Singh
2009-08-31 14:36 ` KAMEZAWA Hiroyuki
2009-08-28 4:25 ` [RFC][PATCH 3/5] memcg: unmap, truncate, invalidate uncharege in batch KAMEZAWA Hiroyuki
2009-08-31 11:02 ` Balbir Singh
2009-08-28 4:27 ` [RFC][PATCH 4/5] memcg: per-cpu charge stock KAMEZAWA Hiroyuki
2009-08-31 11:10 ` Balbir Singh
2009-08-31 12:07 ` KAMEZAWA Hiroyuki
2009-08-28 4:28 ` [RFC][PATCH 5/5] memcg: drain per cpu stock KAMEZAWA Hiroyuki
2009-08-31 11:11 ` Balbir Singh
2009-08-31 12:09 ` KAMEZAWA Hiroyuki
2009-08-28 4:28 ` [RFC][PATCH 0/5] memcg: reduce lock conetion Balbir Singh
2009-08-28 4:33 ` KAMEZAWA Hiroyuki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090831110204.GG4770@balbir.in.ibm.com \
--to=balbir@linux.vnet.ibm.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nishimura@mxp.nes.nec.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).