linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting
@ 2012-02-29  5:25 Hugh Dickins
  2012-02-29  5:26 ` [PATCH next] memcg: fix deadlock by avoiding stat lock when anon Hugh Dickins
                   ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: Hugh Dickins @ 2012-02-29  5:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: KAMEZAWA Hiroyuki, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

We have forgotten the rules of lock nesting: the irq-safe ones must be
taken inside the non-irq-safe ones, otherwise we are open to deadlock:

CPU0                          CPU1
----                          ----
lock(&(&pc->lock)->rlock);
                              local_irq_disable();
                              lock(&(&zone->lru_lock)->rlock);
                              lock(&(&pc->lock)->rlock);
<Interrupt>
lock(&(&zone->lru_lock)->rlock);

To check a different locking issue, I happened to add a spin_lock to
memcg's bit_spin_lock in lock_page_cgroup(), and lockdep very quickly
complained about __mem_cgroup_commit_charge_lrucare() (on CPU1 above).

So delete __mem_cgroup_commit_charge_lrucare(), passing a bool lrucare
to __mem_cgroup_commit_charge() instead, taking zone->lru_lock under
lock_page_cgroup() in the lrucare case.

The original was using spin_lock_irqsave, but we'd be in more trouble
if it were ever called at interrupt time: unconditional _irq is enough.
And ClearPageLRU before del from lru, SetPageLRU before add to lru: no
strong reason, but that is the ordering used consistently elsewhere.

Signed-off-by: Hugh Dickins <hughd@google.com>
---
Not needed for -stable: this issue came into 3.3-rc1.

 mm/memcontrol.c |   72 +++++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 35 deletions(-)

--- 3.3-rc5/mm/memcontrol.c	2012-02-25 13:02:05.165830574 -0800
+++ linux/mm/memcontrol.c	2012-02-27 23:24:54.676160755 -0800
@@ -2408,8 +2408,12 @@ static void __mem_cgroup_commit_charge(s
 				       struct page *page,
 				       unsigned int nr_pages,
 				       struct page_cgroup *pc,
-				       enum charge_type ctype)
+				       enum charge_type ctype,
+				       bool lrucare)
 {
+	struct zone *uninitialized_var(zone);
+	bool was_on_lru = false;
+
 	lock_page_cgroup(pc);
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
@@ -2420,6 +2424,21 @@ static void __mem_cgroup_commit_charge(s
 	 * we don't need page_cgroup_lock about tail pages, becase they are not
 	 * accessed by any other context at this point.
 	 */
+
+	/*
+	 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
+	 * may already be on some other mem_cgroup's LRU.  Take care of it.
+	 */
+	if (lrucare) {
+		zone = page_zone(page);
+		spin_lock_irq(&zone->lru_lock);
+		if (PageLRU(page)) {
+			ClearPageLRU(page);
+			del_page_from_lru_list(zone, page, page_lru(page));
+			was_on_lru = true;
+		}
+	}
+
 	pc->mem_cgroup = memcg;
 	/*
 	 * We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2443,9 +2462,18 @@ static void __mem_cgroup_commit_charge(s
 		break;
 	}
 
+	if (lrucare) {
+		if (was_on_lru) {
+			VM_BUG_ON(PageLRU(page));
+			SetPageLRU(page);
+			add_page_to_lru_list(zone, page, page_lru(page));
+		}
+		spin_unlock_irq(&zone->lru_lock);
+	}
+
 	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
 	unlock_page_cgroup(pc);
-	WARN_ON_ONCE(PageLRU(page));
+
 	/*
 	 * "charge_statistics" updated event counter. Then, check it.
 	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2643,7 +2671,7 @@ static int mem_cgroup_charge_common(stru
 	ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
 	if (ret == -ENOMEM)
 		return ret;
-	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
+	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype, false);
 	return 0;
 }
 
@@ -2663,35 +2691,6 @@ static void
 __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 					enum charge_type ctype);
 
-static void
-__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
-					enum charge_type ctype)
-{
-	struct page_cgroup *pc = lookup_page_cgroup(page);
-	struct zone *zone = page_zone(page);
-	unsigned long flags;
-	bool removed = false;
-
-	/*
-	 * In some case, SwapCache, FUSE(splice_buf->radixtree), the page
-	 * is already on LRU. It means the page may on some other page_cgroup's
-	 * LRU. Take care of it.
-	 */
-	spin_lock_irqsave(&zone->lru_lock, flags);
-	if (PageLRU(page)) {
-		del_page_from_lru_list(zone, page, page_lru(page));
-		ClearPageLRU(page);
-		removed = true;
-	}
-	__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
-	if (removed) {
-		add_page_to_lru_list(zone, page, page_lru(page));
-		SetPageLRU(page);
-	}
-	spin_unlock_irqrestore(&zone->lru_lock, flags);
-	return;
-}
-
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask)
 {
@@ -2769,13 +2768,16 @@ static void
 __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
 					enum charge_type ctype)
 {
+	struct page_cgroup *pc;
+
 	if (mem_cgroup_disabled())
 		return;
 	if (!memcg)
 		return;
 	cgroup_exclude_rmdir(&memcg->css);
 
-	__mem_cgroup_commit_charge_lrucare(page, memcg, ctype);
+	pc = lookup_page_cgroup(page);
+	__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype, true);
 	/*
 	 * Now swap is on-memory. This means this page may be
 	 * counted both as mem and swap....double count.
@@ -3248,7 +3250,7 @@ int mem_cgroup_prepare_migration(struct
 		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
 	else
 		ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype);
+	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype, false);
 	return ret;
 }
 
@@ -3332,7 +3334,7 @@ void mem_cgroup_replace_page_cache(struc
 	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
 	 * LRU while we overwrite pc->mem_cgroup.
 	 */
-	__mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
+	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, type, true);
 }
 
 #ifdef CONFIG_DEBUG_VM

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH next] memcg: fix deadlock by avoiding stat lock when anon
  2012-02-29  5:25 [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting Hugh Dickins
@ 2012-02-29  5:26 ` Hugh Dickins
  2012-02-29 19:35   ` Johannes Weiner
  2012-02-29  5:28 ` [PATCH next] memcg: remove PCG_FILE_MAPPED fix cosmetic fix Hugh Dickins
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Hugh Dickins @ 2012-02-29  5:26 UTC (permalink / raw)
  To: Andrew Morton
  Cc: KAMEZAWA Hiroyuki, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

Fix deadlock in "memcg: use new logic for page stat accounting".

page_remove_rmap() first calls mem_cgroup_begin_update_page_stat(),
which may take move_lock_mem_cgroup(), unlocked at the end of
page_remove_rmap() by mem_cgroup_end_update_page_stat().

The PageAnon case never needs to mem_cgroup_dec_page_stat(page,
MEMCG_NR_FILE_MAPPED); but it often needs to mem_cgroup_uncharge_page(),
which does lock_page_cgroup(), while holding that move_lock_mem_cgroup().
Whereas mem_cgroup_move_account() calls move_lock_mem_cgroup() while
holding lock_page_cgroup().

Since mem_cgroup_begin and end are unnecessary here for PageAnon,
simply avoid the deadlock and wasted calls in that case.

Signed-off-by: Hugh Dickins <hughd@google.com>
---

 mm/rmap.c |   11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

--- 3.3-rc5-next/mm/rmap.c	2012-02-26 23:51:46.506050210 -0800
+++ linux/mm/rmap.c	2012-02-27 20:25:56.423324211 -0800
@@ -1166,10 +1166,12 @@ void page_add_file_rmap(struct page *pag
  */
 void page_remove_rmap(struct page *page)
 {
+	bool anon = PageAnon(page);
 	bool locked;
 	unsigned long flags;
 
-	mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+	if (!anon)
+		mem_cgroup_begin_update_page_stat(page, &locked, &flags);
 	/* page still mapped by someone else? */
 	if (!atomic_add_negative(-1, &page->_mapcount))
 		goto out;
@@ -1181,7 +1183,7 @@ void page_remove_rmap(struct page *page)
 	 * not if it's in swapcache - there might be another pte slot
 	 * containing the swap entry, but page not yet written to swap.
 	 */
-	if ((!PageAnon(page) || PageSwapCache(page)) &&
+	if ((!anon || PageSwapCache(page)) &&
 	    page_test_and_clear_dirty(page_to_pfn(page), 1))
 		set_page_dirty(page);
 	/*
@@ -1190,7 +1192,7 @@ void page_remove_rmap(struct page *page)
 	 */
 	if (unlikely(PageHuge(page)))
 		goto out;
-	if (PageAnon(page)) {
+	if (anon) {
 		mem_cgroup_uncharge_page(page);
 		if (!PageTransHuge(page))
 			__dec_zone_page_state(page, NR_ANON_PAGES);
@@ -1211,7 +1213,8 @@ void page_remove_rmap(struct page *page)
 	 * faster for those pages still in swapcache.
 	 */
 out:
-	mem_cgroup_end_update_page_stat(page, &locked, &flags);
+	if (!anon)
+		mem_cgroup_end_update_page_stat(page, &locked, &flags);
 }
 
 /*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH next] memcg: remove PCG_FILE_MAPPED fix cosmetic fix
  2012-02-29  5:25 [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting Hugh Dickins
  2012-02-29  5:26 ` [PATCH next] memcg: fix deadlock by avoiding stat lock when anon Hugh Dickins
@ 2012-02-29  5:28 ` Hugh Dickins
  2012-02-29  5:40   ` KAMEZAWA Hiroyuki
  2012-02-29 19:35   ` Johannes Weiner
  2012-02-29  5:30 ` [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix Hugh Dickins
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 20+ messages in thread
From: Hugh Dickins @ 2012-02-29  5:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: KAMEZAWA Hiroyuki, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

mem_cgroup_move_account() begins with "anon = PageAnon(page)", and
then anon is used thereafter: testing PageAnon(page) in the middle
makes the reader wonder if there's some race to guard against - no.

Signed-off-by: Hugh Dickins <hughd@google.com>
---

 mm/memcontrol.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- 3.3-rc5-next/mm/memcontrol.c	2012-02-27 09:56:59.072001463 -0800
+++ linux/mm/memcontrol.c	2012-02-28 20:45:43.488100423 -0800
@@ -2560,7 +2560,7 @@ static int mem_cgroup_move_account(struc
 
 	move_lock_mem_cgroup(from, &flags);
 
-	if (!PageAnon(page) && page_mapped(page)) {
+	if (!anon && page_mapped(page)) {
 		/* Update mapped_file data for mem_cgroup */
 		preempt_disable();
 		__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix
  2012-02-29  5:25 [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting Hugh Dickins
  2012-02-29  5:26 ` [PATCH next] memcg: fix deadlock by avoiding stat lock when anon Hugh Dickins
  2012-02-29  5:28 ` [PATCH next] memcg: remove PCG_FILE_MAPPED fix cosmetic fix Hugh Dickins
@ 2012-02-29  5:30 ` Hugh Dickins
  2012-02-29  5:54   ` KAMEZAWA Hiroyuki
  2012-02-29 19:43   ` Johannes Weiner
  2012-02-29  5:39 ` [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting KAMEZAWA Hiroyuki
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 20+ messages in thread
From: Hugh Dickins @ 2012-02-29  5:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: KAMEZAWA Hiroyuki, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

Swapping tmpfs loads show absurd wrapped rss and wrong cache in memcg's
memory.stat statistics: __mem_cgroup_uncharge_common() is failing to
distinguish the anon and tmpfs cases.

Mostly we can decide between them by PageAnon, which is reliable once
it has been set; but there are several callers who need to uncharge a
MEM_CGROUP_CHARGE_TYPE_MAPPED page before it was fully initialized,
so allow that case to override the PageAnon decision.

Signed-off-by: Hugh Dickins <hughd@google.com>
---

 mm/memcontrol.c |    7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

--- 3.3-rc5-next/mm/memcontrol.c	2012-02-25 10:06:52.496035568 -0800
+++ linux/mm/memcontrol.c	2012-02-26 10:44:32.146365398 -0800
@@ -2944,13 +2944,16 @@ __mem_cgroup_uncharge_common(struct page
 	if (!PageCgroupUsed(pc))
 		goto unlock_out;
 
+	anon = PageAnon(page);
+
 	switch (ctype) {
 	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+		anon = true;
+		/* fallthrough */
 	case MEM_CGROUP_CHARGE_TYPE_DROP:
 		/* See mem_cgroup_prepare_migration() */
 		if (page_mapped(page) || PageCgroupMigration(pc))
 			goto unlock_out;
-		anon = true;
 		break;
 	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
 		if (!PageAnon(page)) {	/* Shared memory */
@@ -2958,10 +2961,8 @@ __mem_cgroup_uncharge_common(struct page
 				goto unlock_out;
 		} else if (page_mapped(page)) /* Anon */
 				goto unlock_out;
-		anon = true;
 		break;
 	default:
-		anon = false;
 		break;
 	}
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting
  2012-02-29  5:25 [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting Hugh Dickins
                   ` (2 preceding siblings ...)
  2012-02-29  5:30 ` [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix Hugh Dickins
@ 2012-02-29  5:39 ` KAMEZAWA Hiroyuki
  2012-02-29 19:00 ` Johannes Weiner
  2012-02-29 22:04 ` Andrew Morton
  5 siblings, 0 replies; 20+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-29  5:39 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Tue, 28 Feb 2012 21:25:02 -0800 (PST)
Hugh Dickins <hughd@google.com> wrote:

> We have forgotten the rules of lock nesting: the irq-safe ones must be
> taken inside the non-irq-safe ones, otherwise we are open to deadlock:
> 
> CPU0                          CPU1
> ----                          ----
> lock(&(&pc->lock)->rlock);
>                               local_irq_disable();
>                               lock(&(&zone->lru_lock)->rlock);
>                               lock(&(&pc->lock)->rlock);
> <Interrupt>
> lock(&(&zone->lru_lock)->rlock);
> 
> To check a different locking issue, I happened to add a spin_lock to
> memcg's bit_spin_lock in lock_page_cgroup(), and lockdep very quickly
> complained about __mem_cgroup_commit_charge_lrucare() (on CPU1 above).
> 
> So delete __mem_cgroup_commit_charge_lrucare(), passing a bool lrucare
> to __mem_cgroup_commit_charge() instead, taking zone->lru_lock under
> lock_page_cgroup() in the lrucare case.
> 
> The original was using spin_lock_irqsave, but we'd be in more trouble
> if it were ever called at interrupt time: unconditional _irq is enough.
> And ClearPageLRU before del from lru, SetPageLRU before add to lru: no
> strong reason, but that is the ordering used consistently elsewhere.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>

Thank you. And I'm sorry for adding new bug :(

This is a fix for "memcg: simplify corner case handling of LRU"
as commit 36b62ad539498d00c2d280a151abad5f7630fa73 in upstream.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>


> ---
> Not needed for -stable: this issue came into 3.3-rc1.
> 
>  mm/memcontrol.c |   72 +++++++++++++++++++++++-----------------------
>  1 file changed, 37 insertions(+), 35 deletions(-)
> 
> --- 3.3-rc5/mm/memcontrol.c	2012-02-25 13:02:05.165830574 -0800
> +++ linux/mm/memcontrol.c	2012-02-27 23:24:54.676160755 -0800
> @@ -2408,8 +2408,12 @@ static void __mem_cgroup_commit_charge(s
>  				       struct page *page,
>  				       unsigned int nr_pages,
>  				       struct page_cgroup *pc,
> -				       enum charge_type ctype)
> +				       enum charge_type ctype,
> +				       bool lrucare)
>  {
> +	struct zone *uninitialized_var(zone);
> +	bool was_on_lru = false;
> +
>  	lock_page_cgroup(pc);
>  	if (unlikely(PageCgroupUsed(pc))) {
>  		unlock_page_cgroup(pc);
> @@ -2420,6 +2424,21 @@ static void __mem_cgroup_commit_charge(s
>  	 * we don't need page_cgroup_lock about tail pages, becase they are not
>  	 * accessed by any other context at this point.
>  	 */
> +
> +	/*
> +	 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
> +	 * may already be on some other mem_cgroup's LRU.  Take care of it.
> +	 */
> +	if (lrucare) {
> +		zone = page_zone(page);
> +		spin_lock_irq(&zone->lru_lock);
> +		if (PageLRU(page)) {
> +			ClearPageLRU(page);
> +			del_page_from_lru_list(zone, page, page_lru(page));
> +			was_on_lru = true;
> +		}
> +	}
> +
>  	pc->mem_cgroup = memcg;
>  	/*
>  	 * We access a page_cgroup asynchronously without lock_page_cgroup().
> @@ -2443,9 +2462,18 @@ static void __mem_cgroup_commit_charge(s
>  		break;
>  	}
>  
> +	if (lrucare) {
> +		if (was_on_lru) {
> +			VM_BUG_ON(PageLRU(page));
> +			SetPageLRU(page);
> +			add_page_to_lru_list(zone, page, page_lru(page));
> +		}
> +		spin_unlock_irq(&zone->lru_lock);
> +	}
> +
>  	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
>  	unlock_page_cgroup(pc);
> -	WARN_ON_ONCE(PageLRU(page));
> +
>  	/*
>  	 * "charge_statistics" updated event counter. Then, check it.
>  	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
> @@ -2643,7 +2671,7 @@ static int mem_cgroup_charge_common(stru
>  	ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
>  	if (ret == -ENOMEM)
>  		return ret;
> -	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
> +	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype, false);
>  	return 0;
>  }
>  
> @@ -2663,35 +2691,6 @@ static void
>  __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
>  					enum charge_type ctype);
>  
> -static void
> -__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
> -					enum charge_type ctype)
> -{
> -	struct page_cgroup *pc = lookup_page_cgroup(page);
> -	struct zone *zone = page_zone(page);
> -	unsigned long flags;
> -	bool removed = false;
> -
> -	/*
> -	 * In some case, SwapCache, FUSE(splice_buf->radixtree), the page
> -	 * is already on LRU. It means the page may on some other page_cgroup's
> -	 * LRU. Take care of it.
> -	 */
> -	spin_lock_irqsave(&zone->lru_lock, flags);
> -	if (PageLRU(page)) {
> -		del_page_from_lru_list(zone, page, page_lru(page));
> -		ClearPageLRU(page);
> -		removed = true;
> -	}
> -	__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
> -	if (removed) {
> -		add_page_to_lru_list(zone, page, page_lru(page));
> -		SetPageLRU(page);
> -	}
> -	spin_unlock_irqrestore(&zone->lru_lock, flags);
> -	return;
> -}
> -
>  int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
>  				gfp_t gfp_mask)
>  {
> @@ -2769,13 +2768,16 @@ static void
>  __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
>  					enum charge_type ctype)
>  {
> +	struct page_cgroup *pc;
> +
>  	if (mem_cgroup_disabled())
>  		return;
>  	if (!memcg)
>  		return;
>  	cgroup_exclude_rmdir(&memcg->css);
>  
> -	__mem_cgroup_commit_charge_lrucare(page, memcg, ctype);
> +	pc = lookup_page_cgroup(page);
> +	__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype, true);
>  	/*
>  	 * Now swap is on-memory. This means this page may be
>  	 * counted both as mem and swap....double count.
> @@ -3248,7 +3250,7 @@ int mem_cgroup_prepare_migration(struct
>  		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
>  	else
>  		ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
> -	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype);
> +	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype, false);
>  	return ret;
>  }
>  
> @@ -3332,7 +3334,7 @@ void mem_cgroup_replace_page_cache(struc
>  	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
>  	 * LRU while we overwrite pc->mem_cgroup.
>  	 */
> -	__mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
> +	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, type, true);
>  }
>  
>  #ifdef CONFIG_DEBUG_VM
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH next] memcg: remove PCG_FILE_MAPPED fix cosmetic fix
  2012-02-29  5:28 ` [PATCH next] memcg: remove PCG_FILE_MAPPED fix cosmetic fix Hugh Dickins
@ 2012-02-29  5:40   ` KAMEZAWA Hiroyuki
  2012-02-29 19:35   ` Johannes Weiner
  1 sibling, 0 replies; 20+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-29  5:40 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Tue, 28 Feb 2012 21:28:40 -0800 (PST)
Hugh Dickins <hughd@google.com> wrote:

> mem_cgroup_move_account() begins with "anon = PageAnon(page)", and
> then anon is used thereafter: testing PageAnon(page) in the middle
> makes the reader wonder if there's some race to guard against - no.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>

Thanks.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

> ---
> 
>  mm/memcontrol.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> --- 3.3-rc5-next/mm/memcontrol.c	2012-02-27 09:56:59.072001463 -0800
> +++ linux/mm/memcontrol.c	2012-02-28 20:45:43.488100423 -0800
> @@ -2560,7 +2560,7 @@ static int mem_cgroup_move_account(struc
>  
>  	move_lock_mem_cgroup(from, &flags);
>  
> -	if (!PageAnon(page) && page_mapped(page)) {
> +	if (!anon && page_mapped(page)) {
>  		/* Update mapped_file data for mem_cgroup */
>  		preempt_disable();
>  		__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix
  2012-02-29  5:30 ` [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix Hugh Dickins
@ 2012-02-29  5:54   ` KAMEZAWA Hiroyuki
  2012-02-29 19:43   ` Johannes Weiner
  1 sibling, 0 replies; 20+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-29  5:54 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Tue, 28 Feb 2012 21:30:17 -0800 (PST)
Hugh Dickins <hughd@google.com> wrote:

> Swapping tmpfs loads show absurd wrapped rss and wrong cache in memcg's
> memory.stat statistics: __mem_cgroup_uncharge_common() is failing to
> distinguish the anon and tmpfs cases.
> 
I thought I tested this..maybe my test was wrong, sorry.


> Mostly we can decide between them by PageAnon, which is reliable once
> it has been set; but there are several callers who need to uncharge a
> MEM_CGROUP_CHARGE_TYPE_MAPPED page before it was fully initialized,
> so allow that case to override the PageAnon decision.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>
> ---

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

It seems I should revisit these and consinder some cleanup...

_OFF TOPIC_
To be honest, I don't like to have anon/rss counter in memory.stat because
we have LRU statistics. It seems enough.. If shmem counter is required,
I think we should have shmem counter rather than anon/rss.


> 
>  mm/memcontrol.c |    7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> --- 3.3-rc5-next/mm/memcontrol.c	2012-02-25 10:06:52.496035568 -0800
> +++ linux/mm/memcontrol.c	2012-02-26 10:44:32.146365398 -0800
> @@ -2944,13 +2944,16 @@ __mem_cgroup_uncharge_common(struct page
>  	if (!PageCgroupUsed(pc))
>  		goto unlock_out;
>  
> +	anon = PageAnon(page);
> +
>  	switch (ctype) {
>  	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
> +		anon = true;
> +		/* fallthrough */
>  	case MEM_CGROUP_CHARGE_TYPE_DROP:
>  		/* See mem_cgroup_prepare_migration() */
>  		if (page_mapped(page) || PageCgroupMigration(pc))
>  			goto unlock_out;
> -		anon = true;
>  		break;
>  	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
>  		if (!PageAnon(page)) {	/* Shared memory */
> @@ -2958,10 +2961,8 @@ __mem_cgroup_uncharge_common(struct page
>  				goto unlock_out;
>  		} else if (page_mapped(page)) /* Anon */
>  				goto unlock_out;
> -		anon = true;
>  		break;
>  	default:
> -		anon = false;
>  		break;
>  	}
>  
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting
  2012-02-29  5:25 [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting Hugh Dickins
                   ` (3 preceding siblings ...)
  2012-02-29  5:39 ` [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting KAMEZAWA Hiroyuki
@ 2012-02-29 19:00 ` Johannes Weiner
  2012-02-29 22:04 ` Andrew Morton
  5 siblings, 0 replies; 20+ messages in thread
From: Johannes Weiner @ 2012-02-29 19:00 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Tue, Feb 28, 2012 at 09:25:02PM -0800, Hugh Dickins wrote:
> We have forgotten the rules of lock nesting: the irq-safe ones must be
> taken inside the non-irq-safe ones, otherwise we are open to deadlock:
> 
> CPU0                          CPU1
> ----                          ----
> lock(&(&pc->lock)->rlock);
>                               local_irq_disable();
>                               lock(&(&zone->lru_lock)->rlock);
>                               lock(&(&pc->lock)->rlock);
> <Interrupt>
> lock(&(&zone->lru_lock)->rlock);
> 
> To check a different locking issue, I happened to add a spin_lock to
> memcg's bit_spin_lock in lock_page_cgroup(), and lockdep very quickly
> complained about __mem_cgroup_commit_charge_lrucare() (on CPU1 above).
> 
> So delete __mem_cgroup_commit_charge_lrucare(), passing a bool lrucare
> to __mem_cgroup_commit_charge() instead, taking zone->lru_lock under
> lock_page_cgroup() in the lrucare case.
> 
> The original was using spin_lock_irqsave, but we'd be in more trouble
> if it were ever called at interrupt time: unconditional _irq is enough.
> And ClearPageLRU before del from lru, SetPageLRU before add to lru: no
> strong reason, but that is the ordering used consistently elsewhere.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH next] memcg: fix deadlock by avoiding stat lock when anon
  2012-02-29  5:26 ` [PATCH next] memcg: fix deadlock by avoiding stat lock when anon Hugh Dickins
@ 2012-02-29 19:35   ` Johannes Weiner
  2012-03-01  1:18     ` Hugh Dickins
  0 siblings, 1 reply; 20+ messages in thread
From: Johannes Weiner @ 2012-02-29 19:35 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Tue, Feb 28, 2012 at 09:26:56PM -0800, Hugh Dickins wrote:
> Fix deadlock in "memcg: use new logic for page stat accounting".
> 
> page_remove_rmap() first calls mem_cgroup_begin_update_page_stat(),
> which may take move_lock_mem_cgroup(), unlocked at the end of
> page_remove_rmap() by mem_cgroup_end_update_page_stat().
> 
> The PageAnon case never needs to mem_cgroup_dec_page_stat(page,
> MEMCG_NR_FILE_MAPPED); but it often needs to mem_cgroup_uncharge_page(),
> which does lock_page_cgroup(), while holding that move_lock_mem_cgroup().
> Whereas mem_cgroup_move_account() calls move_lock_mem_cgroup() while
> holding lock_page_cgroup().
> 
> Since mem_cgroup_begin and end are unnecessary here for PageAnon,
> simply avoid the deadlock and wasted calls in that case.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>

Eek.

Saving the begin/end_update_page_stat() calls for the anon case where
we know in advance we don't need them is one thing, but this also
hides a dependencies that even eludes lockdep behind what looks like a
minor optimization of the anon case.

Wouldn't this be more robust if we turned the ordering inside out in
move_account instead?

> ---
> 
>  mm/rmap.c |   11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> --- 3.3-rc5-next/mm/rmap.c	2012-02-26 23:51:46.506050210 -0800
> +++ linux/mm/rmap.c	2012-02-27 20:25:56.423324211 -0800
> @@ -1166,10 +1166,12 @@ void page_add_file_rmap(struct page *pag
>   */
>  void page_remove_rmap(struct page *page)
>  {
> +	bool anon = PageAnon(page);
>  	bool locked;
>  	unsigned long flags;
>  
> -	mem_cgroup_begin_update_page_stat(page, &locked, &flags);
> +	if (!anon)
> +		mem_cgroup_begin_update_page_stat(page, &locked, &flags);
>  	/* page still mapped by someone else? */
>  	if (!atomic_add_negative(-1, &page->_mapcount))
>  		goto out;
> @@ -1181,7 +1183,7 @@ void page_remove_rmap(struct page *page)
>  	 * not if it's in swapcache - there might be another pte slot
>  	 * containing the swap entry, but page not yet written to swap.
>  	 */
> -	if ((!PageAnon(page) || PageSwapCache(page)) &&
> +	if ((!anon || PageSwapCache(page)) &&
>  	    page_test_and_clear_dirty(page_to_pfn(page), 1))
>  		set_page_dirty(page);
>  	/*
> @@ -1190,7 +1192,7 @@ void page_remove_rmap(struct page *page)
>  	 */
>  	if (unlikely(PageHuge(page)))
>  		goto out;
> -	if (PageAnon(page)) {
> +	if (anon) {
>  		mem_cgroup_uncharge_page(page);
>  		if (!PageTransHuge(page))
>  			__dec_zone_page_state(page, NR_ANON_PAGES);
> @@ -1211,7 +1213,8 @@ void page_remove_rmap(struct page *page)
>  	 * faster for those pages still in swapcache.
>  	 */
>  out:
> -	mem_cgroup_end_update_page_stat(page, &locked, &flags);
> +	if (!anon)
> +		mem_cgroup_end_update_page_stat(page, &locked, &flags);
>  }
>  
>  /*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH next] memcg: remove PCG_FILE_MAPPED fix cosmetic fix
  2012-02-29  5:28 ` [PATCH next] memcg: remove PCG_FILE_MAPPED fix cosmetic fix Hugh Dickins
  2012-02-29  5:40   ` KAMEZAWA Hiroyuki
@ 2012-02-29 19:35   ` Johannes Weiner
  1 sibling, 0 replies; 20+ messages in thread
From: Johannes Weiner @ 2012-02-29 19:35 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Tue, Feb 28, 2012 at 09:28:40PM -0800, Hugh Dickins wrote:
> mem_cgroup_move_account() begins with "anon = PageAnon(page)", and
> then anon is used thereafter: testing PageAnon(page) in the middle
> makes the reader wonder if there's some race to guard against - no.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix
  2012-02-29  5:30 ` [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix Hugh Dickins
  2012-02-29  5:54   ` KAMEZAWA Hiroyuki
@ 2012-02-29 19:43   ` Johannes Weiner
  2012-03-01  1:21     ` Hugh Dickins
  1 sibling, 1 reply; 20+ messages in thread
From: Johannes Weiner @ 2012-02-29 19:43 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Tue, Feb 28, 2012 at 09:30:17PM -0800, Hugh Dickins wrote:
> Swapping tmpfs loads show absurd wrapped rss and wrong cache in memcg's
> memory.stat statistics: __mem_cgroup_uncharge_common() is failing to
> distinguish the anon and tmpfs cases.
> 
> Mostly we can decide between them by PageAnon, which is reliable once
> it has been set; but there are several callers who need to uncharge a
> MEM_CGROUP_CHARGE_TYPE_MAPPED page before it was fully initialized,
> so allow that case to override the PageAnon decision.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>
> ---
> 
>  mm/memcontrol.c |    7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> --- 3.3-rc5-next/mm/memcontrol.c	2012-02-25 10:06:52.496035568 -0800
> +++ linux/mm/memcontrol.c	2012-02-26 10:44:32.146365398 -0800
> @@ -2944,13 +2944,16 @@ __mem_cgroup_uncharge_common(struct page
>  	if (!PageCgroupUsed(pc))
>  		goto unlock_out;
>  
> +	anon = PageAnon(page);
> +
>  	switch (ctype) {
>  	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
> +		anon = true;
> +		/* fallthrough */

If you don't mind, could you add a small comment on why this is the
exception where we don't trust page->mapping?

Other than that,
Acked-by: Johannes Weiner <hannes@cmpxchg.org>

>  	case MEM_CGROUP_CHARGE_TYPE_DROP:
>  		/* See mem_cgroup_prepare_migration() */
>  		if (page_mapped(page) || PageCgroupMigration(pc))
>  			goto unlock_out;
> -		anon = true;
>  		break;
>  	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
>  		if (!PageAnon(page)) {	/* Shared memory */
> @@ -2958,10 +2961,8 @@ __mem_cgroup_uncharge_common(struct page
>  				goto unlock_out;
>  		} else if (page_mapped(page)) /* Anon */
>  				goto unlock_out;
> -		anon = true;
>  		break;
>  	default:
> -		anon = false;
>  		break;
>  	}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting
  2012-02-29  5:25 [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting Hugh Dickins
                   ` (4 preceding siblings ...)
  2012-02-29 19:00 ` Johannes Weiner
@ 2012-02-29 22:04 ` Andrew Morton
  2012-03-01  0:43   ` Hugh Dickins
  5 siblings, 1 reply; 20+ messages in thread
From: Andrew Morton @ 2012-02-29 22:04 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: KAMEZAWA Hiroyuki, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Tue, 28 Feb 2012 21:25:02 -0800 (PST)
Hugh Dickins <hughd@google.com> wrote:

> We have forgotten the rules of lock nesting: the irq-safe ones must be
> taken inside the non-irq-safe ones, otherwise we are open to deadlock:
> 
> CPU0                          CPU1
> ----                          ----
> lock(&(&pc->lock)->rlock);
>                               local_irq_disable();
>                               lock(&(&zone->lru_lock)->rlock);
>                               lock(&(&pc->lock)->rlock);
> <Interrupt>
> lock(&(&zone->lru_lock)->rlock);
> 
> To check a different locking issue, I happened to add a spin_lock to
> memcg's bit_spin_lock in lock_page_cgroup(), and lockdep very quickly
> complained about __mem_cgroup_commit_charge_lrucare() (on CPU1 above).
> 
> So delete __mem_cgroup_commit_charge_lrucare(), passing a bool lrucare
> to __mem_cgroup_commit_charge() instead, taking zone->lru_lock under
> lock_page_cgroup() in the lrucare case.
> 
> The original was using spin_lock_irqsave, but we'd be in more trouble
> if it were ever called at interrupt time: unconditional _irq is enough.
> And ClearPageLRU before del from lru, SetPageLRU before add to lru: no
> strong reason, but that is the ordering used consistently elsewhere.

This patch makes rather a mess of "memcg: remove PCG_CACHE page_cgroup
flag".

--- mm/memcontrol.c~memcg-remove-pcg_cache-page_cgroup-flag
+++ mm/memcontrol.c
@@ -2410,6 +2414,8 @@
 				       struct page_cgroup *pc,
 				       enum charge_type ctype)
 {
+	bool anon;
+
 	lock_page_cgroup(pc);
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
@@ -2429,21 +2435,14 @@
 	 * See mem_cgroup_add_lru_list(), etc.
  	 */
 	smp_wmb();
-	switch (ctype) {
-	case MEM_CGROUP_CHARGE_TYPE_CACHE:
-	case MEM_CGROUP_CHARGE_TYPE_SHMEM:
-		SetPageCgroupCache(pc);
-		SetPageCgroupUsed(pc);
-		break;
-	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
-		ClearPageCgroupCache(pc);
-		SetPageCgroupUsed(pc);
-		break;
-	default:
-		break;
-	}
 
-	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
+	SetPageCgroupUsed(pc);
+	if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+		anon = true;
+	else
+		anon = false;
+
+	mem_cgroup_charge_statistics(memcg, anon, nr_pages);
 	unlock_page_cgroup(pc);
 	WARN_ON_ONCE(PageLRU(page));
 	/*

I did it this way:

static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
				       struct page *page,
				       unsigned int nr_pages,
				       struct page_cgroup *pc,
				       enum charge_type ctype,
				       bool lrucare)
{
	struct zone *uninitialized_var(zone);
	bool was_on_lru = false;
	bool anon;

	lock_page_cgroup(pc);
	if (unlikely(PageCgroupUsed(pc))) {
		unlock_page_cgroup(pc);
		__mem_cgroup_cancel_charge(memcg, nr_pages);
		return;
	}
	/*
	 * we don't need page_cgroup_lock about tail pages, becase they are not
	 * accessed by any other context at this point.
	 */

	/*
	 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
	 * may already be on some other mem_cgroup's LRU.  Take care of it.
	 */
	if (lrucare) {
		zone = page_zone(page);
		spin_lock_irq(&zone->lru_lock);
		if (PageLRU(page)) {
			ClearPageLRU(page);
			del_page_from_lru_list(zone, page, page_lru(page));
			was_on_lru = true;
		}
	}

	pc->mem_cgroup = memcg;
	/*
	 * We access a page_cgroup asynchronously without lock_page_cgroup().
	 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
	 * is accessed after testing USED bit. To make pc->mem_cgroup visible
	 * before USED bit, we need memory barrier here.
	 * See mem_cgroup_add_lru_list(), etc.
 	 */
	smp_wmb();
	SetPageCgroupUsed(pc);

	if (lrucare) {
		if (was_on_lru) {
			VM_BUG_ON(PageLRU(page));
			SetPageLRU(page);
			add_page_to_lru_list(zone, page, page_lru(page));
		}
		spin_unlock_irq(&zone->lru_lock);
	}

	if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
		anon = true;
	else
		anon = false;

	mem_cgroup_charge_statistics(memcg, anon, nr_pages);
	unlock_page_cgroup(pc);

	/*
	 * "charge_statistics" updated event counter. Then, check it.
	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
	 * if they exceeds softlimit.
	 */
	memcg_check_events(memcg, page);
}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting
  2012-02-29 22:04 ` Andrew Morton
@ 2012-03-01  0:43   ` Hugh Dickins
  0 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2012-03-01  0:43 UTC (permalink / raw)
  To: Andrew Morton
  Cc: KAMEZAWA Hiroyuki, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Wed, 29 Feb 2012, Andrew Morton wrote:
> On Tue, 28 Feb 2012 21:25:02 -0800 (PST)
> Hugh Dickins <hughd@google.com> wrote:
> 
> > We have forgotten the rules of lock nesting: the irq-safe ones must be
> > taken inside the non-irq-safe ones, otherwise we are open to deadlock:
> 
> This patch makes rather a mess of "memcg: remove PCG_CACHE page_cgroup
> flag".

Sorry about that.

> 
> I did it this way:

Exactly right, thank you.  In my tree I end up with a blank line
in between the smp_wmb() and the SetPageCgroupUsed(pc), but I
prefer the way you have grouped it.

Hugh

> 
> static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
> 				       struct page *page,
> 				       unsigned int nr_pages,
> 				       struct page_cgroup *pc,
> 				       enum charge_type ctype,
> 				       bool lrucare)
> {
> 	struct zone *uninitialized_var(zone);
> 	bool was_on_lru = false;
> 	bool anon;
> 
> 	lock_page_cgroup(pc);
> 	if (unlikely(PageCgroupUsed(pc))) {
> 		unlock_page_cgroup(pc);
> 		__mem_cgroup_cancel_charge(memcg, nr_pages);
> 		return;
> 	}
> 	/*
> 	 * we don't need page_cgroup_lock about tail pages, becase they are not
> 	 * accessed by any other context at this point.
> 	 */
> 
> 	/*
> 	 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
> 	 * may already be on some other mem_cgroup's LRU.  Take care of it.
> 	 */
> 	if (lrucare) {
> 		zone = page_zone(page);
> 		spin_lock_irq(&zone->lru_lock);
> 		if (PageLRU(page)) {
> 			ClearPageLRU(page);
> 			del_page_from_lru_list(zone, page, page_lru(page));
> 			was_on_lru = true;
> 		}
> 	}
> 
> 	pc->mem_cgroup = memcg;
> 	/*
> 	 * We access a page_cgroup asynchronously without lock_page_cgroup().
> 	 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
> 	 * is accessed after testing USED bit. To make pc->mem_cgroup visible
> 	 * before USED bit, we need memory barrier here.
> 	 * See mem_cgroup_add_lru_list(), etc.
>  	 */
> 	smp_wmb();
> 	SetPageCgroupUsed(pc);
> 
> 	if (lrucare) {
> 		if (was_on_lru) {
> 			VM_BUG_ON(PageLRU(page));
> 			SetPageLRU(page);
> 			add_page_to_lru_list(zone, page, page_lru(page));
> 		}
> 		spin_unlock_irq(&zone->lru_lock);
> 	}
> 
> 	if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
> 		anon = true;
> 	else
> 		anon = false;
> 
> 	mem_cgroup_charge_statistics(memcg, anon, nr_pages);
> 	unlock_page_cgroup(pc);
> 
> 	/*
> 	 * "charge_statistics" updated event counter. Then, check it.
> 	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
> 	 * if they exceeds softlimit.
> 	 */
> 	memcg_check_events(memcg, page);
> }
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH next] memcg: fix deadlock by avoiding stat lock when anon
  2012-02-29 19:35   ` Johannes Weiner
@ 2012-03-01  1:18     ` Hugh Dickins
  2012-03-01  2:44       ` [PATCH v2 " Hugh Dickins
  0 siblings, 1 reply; 20+ messages in thread
From: Hugh Dickins @ 2012-03-01  1:18 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Wed, 29 Feb 2012, Johannes Weiner wrote:
> 
> Saving the begin/end_update_page_stat() calls for the anon case where
> we know in advance we don't need them is one thing, but this also
> hides a dependencies that even eludes lockdep behind what looks like a
> minor optimization of the anon case.

Sounds like you'd appreciate a comment there: akpm has not put
this version in yet, so I'll send an updated version shortly.

> 
> Wouldn't this be more robust if we turned the ordering inside out in
> move_account instead?

I think we need more than the one user of this infrastructure before
that can be decided.

But I didn't actually consider that at all: perhaps prejudiced by the
way I had solved the race Konstantin pointed out in my patchset of 10
last week, by using the lruvec lock for move_lock_mem_cgroup too,
which fits with it being inside the page_cgroup lock.

Hmm, I notice move_lock_mem_cgroup is likewise spin_lock_irqsave:
if it needs to be (and I guess the idea is that it doesn't need to be
today, but for generality later on had better be), then it has to be
inside page_cgroup lock.

(If FILE_MAPPED were to be the only user of the infrastructure, I'd
actually prefer to remove the begin/end, and make move_account raise
the file page's mapcount temporarily, doing its own page_remove_rmap
after, to solve these races.  There's probably one or two VM_BUG_ONs
elsewhere that would need deleting to make that completely safe.
But I understand there may be more users to come - and mapcount
games might not fit your desire for robustness.)

Hugh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix
  2012-02-29 19:43   ` Johannes Weiner
@ 2012-03-01  1:21     ` Hugh Dickins
  2012-03-01  2:42       ` [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix2 Hugh Dickins
  0 siblings, 1 reply; 20+ messages in thread
From: Hugh Dickins @ 2012-03-01  1:21 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Wed, 29 Feb 2012, Johannes Weiner wrote:
> On Tue, Feb 28, 2012 at 09:30:17PM -0800, Hugh Dickins wrote:
> >  
> > +	anon = PageAnon(page);
> > +
> >  	switch (ctype) {
> >  	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
> > +		anon = true;
> > +		/* fallthrough */
> 
> If you don't mind, could you add a small comment on why this is the
> exception where we don't trust page->mapping?

Right, I'll send an incremental fix for that.

> 
> Other than that,
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>

Thanks,
Hugh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix2
  2012-03-01  1:21     ` Hugh Dickins
@ 2012-03-01  2:42       ` Hugh Dickins
  2012-03-01  9:16         ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 20+ messages in thread
From: Hugh Dickins @ 2012-03-01  2:42 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

Add comment to MEM_CGROUP_CHARGE_TYPE_MAPPED case in
__mem_cgroup_uncharge_common().

Signed-off-by: Hugh Dickins <hughd@google.com>
---
This one incremental to patch already in mm-commits.

 mm/memcontrol.c |    5 +++++
 1 file changed, 5 insertions(+)

--- mm-commits/mm/memcontrol.c	2012-02-28 20:45:43.488100423 -0800
+++ linux/mm/memcontrol.c	2012-02-29 18:21:49.144702180 -0800
@@ -2953,6 +2953,11 @@ __mem_cgroup_uncharge_common(struct page
 
 	switch (ctype) {
 	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+		/*
+		 * Generally PageAnon tells if it's the anon statistics to be
+		 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
+		 * used before page reached the stage of being marked PageAnon.
+		 */
 		anon = true;
 		/* fallthrough */
 	case MEM_CGROUP_CHARGE_TYPE_DROP:

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 next] memcg: fix deadlock by avoiding stat lock when anon
  2012-03-01  1:18     ` Hugh Dickins
@ 2012-03-01  2:44       ` Hugh Dickins
  2012-03-01  9:18         ` KAMEZAWA Hiroyuki
  2012-03-01 10:00         ` Johannes Weiner
  0 siblings, 2 replies; 20+ messages in thread
From: Hugh Dickins @ 2012-03-01  2:44 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

Fix deadlock in "memcg: use new logic for page stat accounting".

page_remove_rmap() first calls mem_cgroup_begin_update_page_stat(),
which may take move_lock_mem_cgroup(), unlocked at the end of
page_remove_rmap() by mem_cgroup_end_update_page_stat().

The PageAnon case never needs to mem_cgroup_dec_page_stat(page,
MEMCG_NR_FILE_MAPPED); but it often needs to mem_cgroup_uncharge_page(),
which does lock_page_cgroup(), while holding that move_lock_mem_cgroup().
Whereas mem_cgroup_move_account() calls move_lock_mem_cgroup() while
holding lock_page_cgroup().

Since mem_cgroup_begin and end are unnecessary here for PageAnon,
simply avoid the deadlock and wasted calls in that case.

Signed-off-by: Hugh Dickins <hughd@google.com>
---
v2: added comment in the code so it's not thought just an optimization.

 mm/rmap.c |   17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

--- 3.3-rc5-next/mm/rmap.c	2012-02-26 23:51:46.506050210 -0800
+++ linux/mm/rmap.c	2012-02-29 17:55:42.868665736 -0800
@@ -1166,10 +1166,18 @@ void page_add_file_rmap(struct page *pag
  */
 void page_remove_rmap(struct page *page)
 {
+	bool anon = PageAnon(page);
 	bool locked;
 	unsigned long flags;
 
-	mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+	/*
+	 * The anon case has no mem_cgroup page_stat to update; but may
+	 * uncharge_page() below, where the lock ordering can deadlock if
+	 * we hold the lock against page_stat move: so avoid it on anon.
+	 */
+	if (!anon)
+		mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+
 	/* page still mapped by someone else? */
 	if (!atomic_add_negative(-1, &page->_mapcount))
 		goto out;
@@ -1181,7 +1189,7 @@ void page_remove_rmap(struct page *page)
 	 * not if it's in swapcache - there might be another pte slot
 	 * containing the swap entry, but page not yet written to swap.
 	 */
-	if ((!PageAnon(page) || PageSwapCache(page)) &&
+	if ((!anon || PageSwapCache(page)) &&
 	    page_test_and_clear_dirty(page_to_pfn(page), 1))
 		set_page_dirty(page);
 	/*
@@ -1190,7 +1198,7 @@ void page_remove_rmap(struct page *page)
 	 */
 	if (unlikely(PageHuge(page)))
 		goto out;
-	if (PageAnon(page)) {
+	if (anon) {
 		mem_cgroup_uncharge_page(page);
 		if (!PageTransHuge(page))
 			__dec_zone_page_state(page, NR_ANON_PAGES);
@@ -1211,7 +1219,8 @@ void page_remove_rmap(struct page *page)
 	 * faster for those pages still in swapcache.
 	 */
 out:
-	mem_cgroup_end_update_page_stat(page, &locked, &flags);
+	if (!anon)
+		mem_cgroup_end_update_page_stat(page, &locked, &flags);
 }
 
 /*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix2
  2012-03-01  2:42       ` [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix2 Hugh Dickins
@ 2012-03-01  9:16         ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 20+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-03-01  9:16 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Wed, 29 Feb 2012 18:42:57 -0800 (PST)
Hugh Dickins <hughd@google.com> wrote:

> Add comment to MEM_CGROUP_CHARGE_TYPE_MAPPED case in
> __mem_cgroup_uncharge_common().
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

> ---
> This one incremental to patch already in mm-commits.
> 
>  mm/memcontrol.c |    5 +++++
>  1 file changed, 5 insertions(+)
> 
> --- mm-commits/mm/memcontrol.c	2012-02-28 20:45:43.488100423 -0800
> +++ linux/mm/memcontrol.c	2012-02-29 18:21:49.144702180 -0800
> @@ -2953,6 +2953,11 @@ __mem_cgroup_uncharge_common(struct page
>  
>  	switch (ctype) {
>  	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
> +		/*
> +		 * Generally PageAnon tells if it's the anon statistics to be
> +		 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
> +		 * used before page reached the stage of being marked PageAnon.
> +		 */
>  		anon = true;
>  		/* fallthrough */
>  	case MEM_CGROUP_CHARGE_TYPE_DROP:
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 next] memcg: fix deadlock by avoiding stat lock when anon
  2012-03-01  2:44       ` [PATCH v2 " Hugh Dickins
@ 2012-03-01  9:18         ` KAMEZAWA Hiroyuki
  2012-03-01 10:00         ` Johannes Weiner
  1 sibling, 0 replies; 20+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-03-01  9:18 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, Johannes Weiner, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Wed, 29 Feb 2012 18:44:59 -0800 (PST)
Hugh Dickins <hughd@google.com> wrote:

> Fix deadlock in "memcg: use new logic for page stat accounting".
> 
> page_remove_rmap() first calls mem_cgroup_begin_update_page_stat(),
> which may take move_lock_mem_cgroup(), unlocked at the end of
> page_remove_rmap() by mem_cgroup_end_update_page_stat().
> 
> The PageAnon case never needs to mem_cgroup_dec_page_stat(page,
> MEMCG_NR_FILE_MAPPED); but it often needs to mem_cgroup_uncharge_page(),
> which does lock_page_cgroup(), while holding that move_lock_mem_cgroup().
> Whereas mem_cgroup_move_account() calls move_lock_mem_cgroup() while
> holding lock_page_cgroup().
> 
> Since mem_cgroup_begin and end are unnecessary here for PageAnon,
> simply avoid the deadlock and wasted calls in that case.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>

Thank you and I'm sorry.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>


> ---
> v2: added comment in the code so it's not thought just an optimization.
> 
>  mm/rmap.c |   17 +++++++++++++----
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> --- 3.3-rc5-next/mm/rmap.c	2012-02-26 23:51:46.506050210 -0800
> +++ linux/mm/rmap.c	2012-02-29 17:55:42.868665736 -0800
> @@ -1166,10 +1166,18 @@ void page_add_file_rmap(struct page *pag
>   */
>  void page_remove_rmap(struct page *page)
>  {
> +	bool anon = PageAnon(page);
>  	bool locked;
>  	unsigned long flags;
>  
> -	mem_cgroup_begin_update_page_stat(page, &locked, &flags);
> +	/*
> +	 * The anon case has no mem_cgroup page_stat to update; but may
> +	 * uncharge_page() below, where the lock ordering can deadlock if
> +	 * we hold the lock against page_stat move: so avoid it on anon.
> +	 */
> +	if (!anon)
> +		mem_cgroup_begin_update_page_stat(page, &locked, &flags);
> +
>  	/* page still mapped by someone else? */
>  	if (!atomic_add_negative(-1, &page->_mapcount))
>  		goto out;
> @@ -1181,7 +1189,7 @@ void page_remove_rmap(struct page *page)
>  	 * not if it's in swapcache - there might be another pte slot
>  	 * containing the swap entry, but page not yet written to swap.
>  	 */
> -	if ((!PageAnon(page) || PageSwapCache(page)) &&
> +	if ((!anon || PageSwapCache(page)) &&
>  	    page_test_and_clear_dirty(page_to_pfn(page), 1))
>  		set_page_dirty(page);
>  	/*
> @@ -1190,7 +1198,7 @@ void page_remove_rmap(struct page *page)
>  	 */
>  	if (unlikely(PageHuge(page)))
>  		goto out;
> -	if (PageAnon(page)) {
> +	if (anon) {
>  		mem_cgroup_uncharge_page(page);
>  		if (!PageTransHuge(page))
>  			__dec_zone_page_state(page, NR_ANON_PAGES);
> @@ -1211,7 +1219,8 @@ void page_remove_rmap(struct page *page)
>  	 * faster for those pages still in swapcache.
>  	 */
>  out:
> -	mem_cgroup_end_update_page_stat(page, &locked, &flags);
> +	if (!anon)
> +		mem_cgroup_end_update_page_stat(page, &locked, &flags);
>  }
>  
>  /*
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 next] memcg: fix deadlock by avoiding stat lock when anon
  2012-03-01  2:44       ` [PATCH v2 " Hugh Dickins
  2012-03-01  9:18         ` KAMEZAWA Hiroyuki
@ 2012-03-01 10:00         ` Johannes Weiner
  1 sibling, 0 replies; 20+ messages in thread
From: Johannes Weiner @ 2012-03-01 10:00 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, Konstantin Khlebnikov,
	linux-kernel, linux-mm

On Wed, Feb 29, 2012 at 06:44:59PM -0800, Hugh Dickins wrote:
> Fix deadlock in "memcg: use new logic for page stat accounting".
> 
> page_remove_rmap() first calls mem_cgroup_begin_update_page_stat(),
> which may take move_lock_mem_cgroup(), unlocked at the end of
> page_remove_rmap() by mem_cgroup_end_update_page_stat().
> 
> The PageAnon case never needs to mem_cgroup_dec_page_stat(page,
> MEMCG_NR_FILE_MAPPED); but it often needs to mem_cgroup_uncharge_page(),
> which does lock_page_cgroup(), while holding that move_lock_mem_cgroup().
> Whereas mem_cgroup_move_account() calls move_lock_mem_cgroup() while
> holding lock_page_cgroup().
> 
> Since mem_cgroup_begin and end are unnecessary here for PageAnon,
> simply avoid the deadlock and wasted calls in that case.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>

Agreed, let's keep that lock ordering for now, and the comment makes
it clear.  Thanks!

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2012-03-01 10:00 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-29  5:25 [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting Hugh Dickins
2012-02-29  5:26 ` [PATCH next] memcg: fix deadlock by avoiding stat lock when anon Hugh Dickins
2012-02-29 19:35   ` Johannes Weiner
2012-03-01  1:18     ` Hugh Dickins
2012-03-01  2:44       ` [PATCH v2 " Hugh Dickins
2012-03-01  9:18         ` KAMEZAWA Hiroyuki
2012-03-01 10:00         ` Johannes Weiner
2012-02-29  5:28 ` [PATCH next] memcg: remove PCG_FILE_MAPPED fix cosmetic fix Hugh Dickins
2012-02-29  5:40   ` KAMEZAWA Hiroyuki
2012-02-29 19:35   ` Johannes Weiner
2012-02-29  5:30 ` [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix Hugh Dickins
2012-02-29  5:54   ` KAMEZAWA Hiroyuki
2012-02-29 19:43   ` Johannes Weiner
2012-03-01  1:21     ` Hugh Dickins
2012-03-01  2:42       ` [PATCH next] memcg: remove PCG_CACHE page_cgroup flag fix2 Hugh Dickins
2012-03-01  9:16         ` KAMEZAWA Hiroyuki
2012-02-29  5:39 ` [PATCH 3.3] memcg: fix deadlock by inverting lrucare nesting KAMEZAWA Hiroyuki
2012-02-29 19:00 ` Johannes Weiner
2012-02-29 22:04 ` Andrew Morton
2012-03-01  0:43   ` Hugh Dickins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).