All of lore.kernel.org
 help / color / mirror / Atom feed
From: Balbir Singh <balbir@linux.vnet.ibm.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>
Subject: Re: [RFC][PATCH 4/14]  delay page_cgroup freeing
Date: Tue, 26 Aug 2008 17:16:20 +0530	[thread overview]
Message-ID: <48B3ED0C.6050409@linux.vnet.ibm.com> (raw)
In-Reply-To: <20080822203324.409635c6.kamezawa.hiroyu@jp.fujitsu.com>

KAMEZAWA Hiroyuki wrote:
> Freeing page_cgroup at mem_cgroup_uncharge() in lazy way.
> 
> In mem_cgroup_uncharge_common(), we don't free page_cgroup
> and just link it to per-cpu free queue.
> And remove it later by checking threshold.
> 
> This patch is a base patch for freeing page_cgroup by RCU patch.
> This patch depends on make-page_cgroup_flag-atomic patch.
> 
> Changelog: (v1) -> (v2)
>   - fixed mem_cgroup_move_list()'s checking of PcgObsolete()
>   - fixed force_empty.
> Changelog: (preview) -> (v1)
>   - Clean up.
>   - renamed functions
> 
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> 
> ---
>  mm/memcontrol.c |  122 ++++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 110 insertions(+), 12 deletions(-)
> 
> Index: mmtom-2.6.27-rc3+/mm/memcontrol.c
> ===================================================================
> --- mmtom-2.6.27-rc3+.orig/mm/memcontrol.c
> +++ mmtom-2.6.27-rc3+/mm/memcontrol.c
> @@ -164,11 +164,13 @@ struct page_cgroup {
>  	struct page *page;
>  	struct mem_cgroup *mem_cgroup;
>  	unsigned long flags;
> +	struct page_cgroup *next;
>  };
> 
>  enum {
>  	/* flags for mem_cgroup */
>  	Pcg_CACHE, /* charged as cache */
> +	Pcg_OBSOLETE,	/* this page cgroup is invalid (unused) */
>  	/* flags for LRU placement */
>  	Pcg_ACTIVE, /* page is active in this cgroup */
>  	Pcg_FILE, /* page is file system backed */
> @@ -199,6 +201,10 @@ static inline void __ClearPcg##uname(str
>  TESTPCGFLAG(Cache, CACHE)
>  __SETPCGFLAG(Cache, CACHE)
> 
> +/* No "Clear" routine for OBSOLETE flag */
> +TESTPCGFLAG(Obsolete, OBSOLETE);
> +SETPCGFLAG(Obsolete, OBSOLETE);
> +
>  /* LRU management flags (from global-lru definition) */
>  TESTPCGFLAG(File, FILE)
>  SETPCGFLAG(File, FILE)
> @@ -225,6 +231,18 @@ static enum zone_type page_cgroup_zid(st
>  	return page_zonenum(pc->page);
>  }
> 
> +/*
> + * per-cpu slot for freeing page_cgroup in lazy manner.
> + * All page_cgroup linked to this list is OBSOLETE.
> + */
> +struct mem_cgroup_sink_list {
> +	int count;
> +	struct page_cgroup *next;
> +};

Can't we reuse the lru field in page_cgroup to build a list? Do we need them on
the memory controller LRU if they are obsolete? I want to do something similar
for both additions and deletions - reuse pagevec style, basically. I am OK,
having a list as well, in that case we can just reuse the LRU pointer.

> +DEFINE_PER_CPU(struct mem_cgroup_sink_list, memcg_sink_list);
> +#define MEMCG_LRU_THRESH	(16)
> +
> +
>  enum charge_type {
>  	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
>  	MEM_CGROUP_CHARGE_TYPE_MAPPED,
> @@ -440,7 +458,7 @@ void mem_cgroup_move_lists(struct page *
>  		/*
>  		 * check against the race with force_empty.
>  		 */
> -		if (likely(mem == pc->mem_cgroup))
> +		if (!PcgObsolete(pc) && likely(mem == pc->mem_cgroup))
>  			__mem_cgroup_move_lists(pc, lru);
>  		spin_unlock_irqrestore(&mz->lru_lock, flags);
>  	}
> @@ -531,6 +549,10 @@ unsigned long mem_cgroup_isolate_pages(u
>  	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
>  		if (scan >= nr_to_scan)
>  			break;
> +
> +		if (PcgObsolete(pc))
> +			continue;
> +
>  		page = pc->page;
> 
>  		if (unlikely(!PageLRU(page)))
> @@ -563,6 +585,81 @@ unsigned long mem_cgroup_isolate_pages(u
>  }
> 
>  /*
> + * Free obsolete page_cgroups which is linked to per-cpu drop list.
> + */
> +
> +static void __free_obsolete_page_cgroup(void)
> +{
> +	struct mem_cgroup *memcg;
> +	struct page_cgroup *pc, *next;
> +	struct mem_cgroup_per_zone *mz, *page_mz;
> +	struct mem_cgroup_sink_list *mcsl;
> +	unsigned long flags;
> +
> +	mcsl = &get_cpu_var(memcg_sink_list);
> +	next = mcsl->next;
> +	mcsl->next = NULL;
> +	mcsl->count = 0;
> +	put_cpu_var(memcg_sink_list);
> +
> +	mz = NULL;
> +
> +	local_irq_save(flags);
> +	while (next) {
> +		pc = next;
> +		VM_BUG_ON(!PcgObsolete(pc));
> +		next = pc->next;
> +		prefetch(next);
> +		page_mz = page_cgroup_zoneinfo(pc);
> +		memcg = pc->mem_cgroup;
> +		if (page_mz != mz) {
> +			if (mz)
> +				spin_unlock(&mz->lru_lock);
> +			mz = page_mz;
> +			spin_lock(&mz->lru_lock);
> +		}
> +		__mem_cgroup_remove_list(mz, pc);
> +		css_put(&memcg->css);
> +		kmem_cache_free(page_cgroup_cache, pc);
> +	}
> +	if (mz)
> +		spin_unlock(&mz->lru_lock);
> +	local_irq_restore(flags);
> +}
> +
> +static void free_obsolete_page_cgroup(struct page_cgroup *pc)
> +{
> +	int count;
> +	struct mem_cgroup_sink_list *mcsl;
> +
> +	mcsl = &get_cpu_var(memcg_sink_list);
> +	pc->next = mcsl->next;
> +	mcsl->next = pc;
> +	count = ++mcsl->count;
> +	put_cpu_var(memcg_sink_list);
> +	if (count >= MEMCG_LRU_THRESH)
> +		__free_obsolete_page_cgroup();
> +}
> +
> +/*
> + * Used when freeing memory resource controller to remove all
> + * page_cgroup (in obsolete list).
> + */
> +static DEFINE_MUTEX(memcg_force_drain_mutex);
> +
> +static void mem_cgroup_local_force_drain(struct work_struct *work)
> +{
> +	__free_obsolete_page_cgroup();
> +}
> +
> +static void mem_cgroup_all_force_drain(void)
> +{
> +	mutex_lock(&memcg_force_drain_mutex);
> +	schedule_on_each_cpu(mem_cgroup_local_force_drain);
> +	mutex_unlock(&memcg_force_drain_mutex);
> +}
> +
> +/*
>   * Charge the memory controller for page usage.
>   * Return
>   * 0 if the charge was successful
> @@ -627,6 +724,7 @@ static int mem_cgroup_charge_common(stru
>  	pc->mem_cgroup = mem;
>  	pc->page = page;
>  	pc->flags = 0;
> +	pc->next = NULL;
>  	/*
>  	 * If a page is accounted as a page cache, insert to inactive list.
>  	 * If anon, insert to active list.
> @@ -729,8 +827,6 @@ __mem_cgroup_uncharge_common(struct page
>  {
>  	struct page_cgroup *pc;
>  	struct mem_cgroup *mem;
> -	struct mem_cgroup_per_zone *mz;
> -	unsigned long flags;
> 
>  	if (mem_cgroup_subsys.disabled)
>  		return;
> @@ -748,20 +844,14 @@ __mem_cgroup_uncharge_common(struct page
>  	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
>  	    && ((PcgCache(pc) || page_mapped(page))))
>  		goto unlock;
> -
> -	mz = page_cgroup_zoneinfo(pc);
> -	spin_lock_irqsave(&mz->lru_lock, flags);
> -	__mem_cgroup_remove_list(mz, pc);
> -	spin_unlock_irqrestore(&mz->lru_lock, flags);
> -
> +	mem = pc->mem_cgroup;
> +	SetPcgObsolete(pc);
>  	page_assign_page_cgroup(page, NULL);
>  	unlock_page_cgroup(page);
> 
> -	mem = pc->mem_cgroup;
>  	res_counter_uncharge(&mem->res, PAGE_SIZE);
> -	css_put(&mem->css);
> +	free_obsolete_page_cgroup(pc);
> 
> -	kmem_cache_free(page_cgroup_cache, pc);
>  	return;
>  unlock:
>  	unlock_page_cgroup(page);
> @@ -937,6 +1027,14 @@ static void mem_cgroup_force_empty_list(
>  	spin_lock_irqsave(&mz->lru_lock, flags);
>  	while (!list_empty(list)) {
>  		pc = list_entry(list->prev, struct page_cgroup, lru);
> +		if (PcgObsolete(pc)) {
> +			list_move(&pc->lru, list);
> +			spin_unlock_irqrestore(&mz->lru_lock, flags);
> +			mem_cgroup_all_force_drain();
> +			yield();
> +			spin_lock_irqsave(&mz->lru_lock, flags);
> +			continue;
> +		}
>  		page = pc->page;
>  		if (!get_page_unless_zero(page)) {
>  			list_move(&pc->lru, list);
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>


-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2008-08-26 11:46 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-22 11:27 [RFC][PATCH 0/14] Mem+Swap Controller v2 KAMEZAWA Hiroyuki
2008-08-22 11:30 ` [RFC][PATCH 1/14] memcg: unlimted root cgroup KAMEZAWA Hiroyuki
2008-08-22 22:51   ` Balbir Singh
2008-08-23  0:38     ` kamezawa.hiroyu
2008-08-25  3:19       ` KAMEZAWA Hiroyuki
2008-08-22 11:31 ` [RFC][PATCH 2/14] memcg: rewrite force_empty KAMEZAWA Hiroyuki
2008-08-25  3:21   ` KAMEZAWA Hiroyuki
2008-08-29 11:45   ` Daisuke Nishimura
2008-08-30  7:30     ` KAMEZAWA Hiroyuki
2008-08-22 11:32 ` [RFC][PATCH 3/14] memcg: atomic_flags KAMEZAWA Hiroyuki
2008-08-26  4:55   ` Balbir Singh
2008-08-26  8:46     ` kamezawa.hiroyu
2008-08-26  8:49       ` Balbir Singh
2008-08-26 23:41         ` KAMEZAWA Hiroyuki
2008-08-26 23:50     ` KAMEZAWA Hiroyuki
2008-08-27  1:58       ` KAMEZAWA Hiroyuki
2008-08-22 11:33 ` [RFC][PATCH 4/14] delay page_cgroup freeing KAMEZAWA Hiroyuki
2008-08-26 11:46   ` Balbir Singh [this message]
2008-08-26 23:55     ` KAMEZAWA Hiroyuki
2008-08-27  1:17       ` Balbir Singh
2008-08-27  1:39         ` KAMEZAWA Hiroyuki
2008-08-27  2:25           ` Balbir Singh
2008-08-27  2:46             ` KAMEZAWA Hiroyuki
2008-08-22 11:34 ` [RFC][PATCH 5/14] memcg: free page_cgroup by RCU KAMEZAWA Hiroyuki
2008-08-28 10:06   ` Balbir Singh
2008-08-28 10:44     ` KAMEZAWA Hiroyuki
2008-09-01  6:51       ` YAMAMOTO Takashi
2008-09-01  7:01         ` KAMEZAWA Hiroyuki
2008-08-22 11:35 ` [RFC][PATCH 6/14] memcg: lockless page cgroup KAMEZAWA Hiroyuki
2008-09-09  5:40   ` Daisuke Nishimura
2008-09-09  7:56     ` KAMEZAWA Hiroyuki
2008-09-09  8:11       ` Daisuke Nishimura
2008-09-09 11:11         ` KAMEZAWA Hiroyuki
2008-09-09 11:48           ` Balbir Singh
2008-09-09 14:24         ` Balbir Singh
2008-09-09 14:04       ` Balbir Singh
2008-08-22 11:36 ` [RFC][PATCH 7/14] memcg: add prefetch to spinlock KAMEZAWA Hiroyuki
2008-08-28 11:00   ` Balbir Singh
2008-08-22 11:37 ` [RFC][PATCH 8/14] memcg: make mapping null before uncharge KAMEZAWA Hiroyuki
2008-08-22 11:38 ` [RFC][PATCH 9/14] memcg: add page_cgroup.h file KAMEZAWA Hiroyuki
2008-08-22 11:39 ` [RFC][PATCH 10/14] memcg: replace res_counter KAMEZAWA Hiroyuki
2008-08-27  0:44   ` Daisuke Nishimura
2008-08-27  1:26     ` KAMEZAWA Hiroyuki
2008-08-22 11:40 ` [RFC][PATCH 11/14] memcg: mem_cgroup private ID KAMEZAWA Hiroyuki
2008-08-22 11:41 ` [RFC][PATCH 12/14] memcg: mem+swap controller Kconfig KAMEZAWA Hiroyuki
2008-08-22 11:41 ` [RFC][PATCH 13/14] memcg: mem+swap counter KAMEZAWA Hiroyuki
2008-08-28  8:51   ` Daisuke Nishimura
2008-08-28  9:32     ` KAMEZAWA Hiroyuki
2008-08-22 11:44 ` [RFC][PATCH 14/14]memcg: mem+swap accounting KAMEZAWA Hiroyuki
2008-09-01  7:15   ` Daisuke Nishimura
2008-09-01  7:58     ` KAMEZAWA Hiroyuki
2008-09-01  8:53       ` Daisuke Nishimura
2008-09-01  9:53         ` KAMEZAWA Hiroyuki
2008-09-01 10:21           ` Daisuke Nishimura
2008-09-02  2:21           ` Daisuke Nishimura
2008-09-02 11:09           ` Daisuke Nishimura
2008-09-02 11:40             ` KAMEZAWA Hiroyuki
2008-09-03  6:23               ` Daisuke Nishimura
2008-09-03  7:05                 ` KAMEZAWA Hiroyuki
2008-08-22 13:20 ` [RFC][PATCH 0/14] Mem+Swap Controller v2 Balbir Singh
2008-08-22 15:34   ` kamezawa.hiroyu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48B3ED0C.6050409@linux.vnet.ibm.com \
    --to=balbir@linux.vnet.ibm.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=nishimura@mxp.nes.nec.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.