All of lore.kernel.org
 help / color / mirror / Atom feed
From: Balbir Singh <balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
To: KAMEZAWA Hiroyuki
	<kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
Cc: "containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org"
	<containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org>,
	"yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org"
	<yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org>,
	rientjes-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org
Subject: Re: [RFC][PATCH] memory cgroup enhancements updated [2/10] force empty interface
Date: Wed, 24 Oct 2007 19:22:00 +0530	[thread overview]
Message-ID: <471F4E00.2060809@linux.vnet.ibm.com> (raw)
In-Reply-To: <20071019183031.9ef60d07.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>

KAMEZAWA Hiroyuki wrote:
> This patch adds an interface "memory.force_empty".
> Any write to this file will drop all charges in this cgroup if
> there is no task under.
> 
> %echo 1 > /....../memory.force_empty
> 
> will drop all charges of memory cgroup if cgroup's tasks is empty.
> 
> This is useful to invoke rmdir() against memory cgroup successfully.
> 
> Tested and worked well on x86_64/fake-NUMA system.
> 
> Changelog v4 -> v5:
>   - added comments to mem_cgroup_force_empty()
>   - made mem_force_empty_read return -EINVAL
>   - cleanup mem_cgroup_force_empty_list()
>   - removed SWAP_CLUSTER_MAX
> 
> Changelog v3 -> v4:
>   - adjusted to 2.6.23-mm1
>   - fixed typo
>   - changes buf[2]="0" to static const
> 
> Changelog v2 -> v3:
>   - changed the name from force_reclaim to force_empty.
> 
> Changelog v1 -> v2:
>   - added a new interface force_reclaim.
>   - changes spin_lock to spin_lock_irqsave().
> 
> 
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
> 
> 
>  mm/memcontrol.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 103 insertions(+), 7 deletions(-)
> 
> Index: devel-2.6.23-mm1/mm/memcontrol.c
> ===================================================================
> --- devel-2.6.23-mm1.orig/mm/memcontrol.c
> +++ devel-2.6.23-mm1/mm/memcontrol.c
> @@ -480,6 +480,7 @@ void mem_cgroup_uncharge(struct page_cgr
>  		page = pc->page;
>  		/*
>  		 * get page->cgroup and clear it under lock.
> +		 * force_empty can drop page->cgroup without checking refcnt.
>  		 */
>  		if (clear_page_cgroup(page, pc) == pc) {
>  			mem = pc->mem_cgroup;
> @@ -489,13 +490,6 @@ void mem_cgroup_uncharge(struct page_cgr
>  			list_del_init(&pc->lru);
>  			spin_unlock_irqrestore(&mem->lru_lock, flags);
>  			kfree(pc);
> -		} else {
> -			/*
> -			 * Note:This will be removed when force-empty patch is
> -			 * applied. just show warning here.
> -			 */
> -			printk(KERN_ERR "Race in mem_cgroup_uncharge() ?");
> -			dump_stack();
>  		}
>  	}
>  }
> @@ -543,6 +537,76 @@ retry:
>  	return;
>  }
> 
> +/*
> + * This routine traverse page_cgroup in given list and drop them all.
> + * This routine ignores page_cgroup->ref_cnt.
> + * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
> + */
> +#define FORCE_UNCHARGE_BATCH	(128)
> +static void
> +mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct list_head *list)
> +{
> +	struct page_cgroup *pc;
> +	struct page *page;
> +	int count;
> +	unsigned long flags;
> +
> +retry:
> +	count = FORCE_UNCHARGE_BATCH;
> +	spin_lock_irqsave(&mem->lru_lock, flags);
> +
> +	while (--count && !list_empty(list)) {
> +		pc = list_entry(list->prev, struct page_cgroup, lru);
> +		page = pc->page;
> +		/* Avoid race with charge */
> +		atomic_set(&pc->ref_cnt, 0);
> +		if (clear_page_cgroup(page, pc) == pc) {
> +			css_put(&mem->css);
> +			res_counter_uncharge(&mem->res, PAGE_SIZE);
> +			list_del_init(&pc->lru);
> +			kfree(pc);
> +		} else 	/* being uncharged ? ...do relax */
> +			break;
> +	}
> +	spin_unlock_irqrestore(&mem->lru_lock, flags);
> +	if (!list_empty(list)) {
> +		cond_resched();
> +		goto retry;
> +	}
> +	return;
> +}
> +

We could potentially share some of this code with the background reclaim
code being worked upon by YAMAMOTO-San.

> +/*
> + * make mem_cgroup's charge to be 0 if there is no task.
> + * This enables deleting this mem_cgroup.
> + */
> +
> +int mem_cgroup_force_empty(struct mem_cgroup *mem)
> +{
> +	int ret = -EBUSY;
> +	css_get(&mem->css);
> +	/*
> +	 * page reclaim code (kswapd etc..) will move pages between
> +`	 * active_list <-> inactive_list while we don't take a lock.
> +	 * So, we have to do loop here until all lists are empty.
> +	 */
> +	while (!(list_empty(&mem->active_list) &&
> +		 list_empty(&mem->inactive_list))) {
> +		if (atomic_read(&mem->css.cgroup->count) > 0)
> +			goto out;
> +		/* drop all page_cgroup in active_list */
> +		mem_cgroup_force_empty_list(mem, &mem->active_list);
> +		/* drop all page_cgroup in inactive_list */
> +		mem_cgroup_force_empty_list(mem, &mem->inactive_list);
> +	}
> +	ret = 0;
> +out:
> +	css_put(&mem->css);
> +	return ret;
> +}
> +
> +
> +
>  int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
>  {
>  	*tmp = memparse(buf, &buf);
> @@ -628,6 +692,33 @@ static ssize_t mem_control_type_read(str
>  			ppos, buf, s - buf);
>  }
> 
> +
> +static ssize_t mem_force_empty_write(struct cgroup *cont,
> +				struct cftype *cft, struct file *file,
> +				const char __user *userbuf,
> +				size_t nbytes, loff_t *ppos)
> +{
> +	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
> +	int ret;
> +	ret = mem_cgroup_force_empty(mem);
> +	if (!ret)
> +		ret = nbytes;
> +	return ret;
> +}
> +
> +/*
> + * Note: This should be removed if cgroup supports write-only file.
> + */
> +
> +static ssize_t mem_force_empty_read(struct cgroup *cont,
> +				struct cftype *cft,
> +				struct file *file, char __user *userbuf,
> +				size_t nbytes, loff_t *ppos)
> +{
> +	return -EINVAL;
> +}
> +
> +
>  static struct cftype mem_cgroup_files[] = {
>  	{
>  		.name = "usage_in_bytes",
> @@ -650,6 +741,11 @@ static struct cftype mem_cgroup_files[] 
>  		.write = mem_control_type_write,
>  		.read = mem_control_type_read,
>  	},
> +	{
> +		.name = "force_empty",
> +		.write = mem_force_empty_write,
> +		.read = mem_force_empty_read,
> +	},
>  };
> 
>  static struct mem_cgroup init_mem_cgroup;
> 


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

  parent reply	other threads:[~2007-10-24 13:52 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-19  9:24 [RFC][PATCH] memory cgroup enhancements updated [0/10] intro KAMEZAWA Hiroyuki
     [not found] ` <20071019182436.485e20cd.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-19  9:29   ` [RFC][PATCH] memory cgroup enhancements updated [1/10] try_to_free_mem_cgroup_pages bugfix KAMEZAWA Hiroyuki
     [not found]     ` <20071019182952.0a414751.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-23  4:00       ` Balbir Singh
     [not found]         ` <471D71F5.9060007-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-10-23  4:19           ` KAMEZAWA Hiroyuki
2007-10-19  9:30   ` [RFC][PATCH] memory cgroup enhancements updated [2/10] force empty interface KAMEZAWA Hiroyuki
     [not found]     ` <20071019183031.9ef60d07.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-24 13:52       ` Balbir Singh [this message]
2007-10-19  9:31   ` [RFC][PATCH] memory cgroup enhancements updated [3/10] remember pagecache KAMEZAWA Hiroyuki
     [not found]     ` <20071019183106.b5afda2f.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-24 13:56       ` Balbir Singh
     [not found]         ` <471F4F12.8050708-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-10-24 15:47           ` KAMEZAWA Hiroyuki
2007-10-19  9:31   ` [RFC][PATCH] memory cgroup enhancements updated [4/10] record page cgroup on active list KAMEZAWA Hiroyuki
2007-10-19  9:32   ` [RFC][PATCH] memory cgroup enhancements updated [5/10] per cpu accounting KAMEZAWA Hiroyuki
2007-10-19  9:33   ` [RFC][PATCH] memory cgroup enhancements updated [6/10] memory.stat interface KAMEZAWA Hiroyuki
2007-10-19  9:34   ` [RFC][PATCH] memory cgroup enhancements updated [7/10] RSS/CACHE failcnt KAMEZAWA Hiroyuki
     [not found]     ` <20071019183452.3b7057ca.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-19 16:54       ` [Devel] " Paul Menage
     [not found]         ` <6599ad830710190954v276e8997l889be2b0ddfcaa61-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-10-19 22:33           ` KAMEZAWA Hiroyuki
     [not found]             ` <20071020073338.2cf41a34.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-19 22:40               ` KAMEZAWA Hiroyuki
2007-10-19  9:35   ` [RFC][PATCH] memory cgroup enhancements updated [8/10] add pre_destroy handler KAMEZAWA Hiroyuki
     [not found]     ` <20071019183544.0f5172b2.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-24 14:49       ` Balbir Singh
     [not found]         ` <471F5B7E.2020007-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-10-24 15:46           ` KAMEZAWA Hiroyuki
2007-10-19  9:36   ` [RFC][PATCH] memory cgroup enhancements updated [9/10] implicit force empty at rmdir KAMEZAWA Hiroyuki
2007-10-19  9:37   ` [RFC][PATCH] memory cgroup enhancements updated [10/10] NUMA aware account KAMEZAWA Hiroyuki
     [not found]     ` <20071019183730.a6e8c105.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2007-10-24 14:59       ` Balbir Singh
     [not found]         ` <471F5DBC.2070303-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-10-24 15:53           ` KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=471F4E00.2060809@linux.vnet.ibm.com \
    --to=balbir-23vcf4htsmix0ybbhkvfkdbpr1lh4cv8@public.gmane.org \
    --cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
    --cc=kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org \
    --cc=rientjes-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    --cc=yamamoto-jCdQPDEk3idL9jVzuh4AOg@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.