All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <509A2849.9090509@parallels.com>

diff --git a/a/2.txt b/N1/2.txt
index 8b13789..f1ca272 100644
--- a/a/2.txt
+++ b/N1/2.txt
@@ -1 +1,216 @@
+>From c99404a760fa69e8ccda0ff4b2636c6abd1ac990 Mon Sep 17 00:00:00 2001
+From: Glauber Costa <glommer@parallels.com>
+Date: Thu, 3 May 2012 13:33:03 -0300
+Subject: [PATCH v3 15/16] memcg/sl[au]b: shrink dead caches
 
+In the slub allocator, when the last object of a page goes away, we
+don't necessarily free it - there is not necessarily a test for empty
+page in any slab_free path.
+
+This means that when we destroy a memcg cache that happened to be empty,
+those caches may take a lot of time to go away: removing the memcg
+reference won't destroy them - because there are pending references, and
+the empty pages will stay there, until a shrinker is called upon for any
+reason.
+
+This patch marks all memcg caches as dead. kmem_cache_shrink is called
+for the ones who are not yet dead - this will force internal cache
+reorganization, and then all references to empty pages will be removed.
+
+An unlikely branch is used to make sure this case does not affect
+performance in the usual slab_free path.
+
+The slab allocator has a time based reaper that would eventually get rid
+of the objects, but we can also call it explicitly, since dead caches
+are not a likely event.
+
+[ v2: also call verify_dead for the slab ]
+
+Signed-off-by: Glauber Costa <glommer@parallels.com>
+CC: Christoph Lameter <cl@linux.com>
+CC: Pekka Enberg <penberg@cs.helsinki.fi>
+CC: Michal Hocko <mhocko@suse.cz>
+CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+CC: Johannes Weiner <hannes@cmpxchg.org>
+CC: Suleiman Souhlal <suleiman@google.com>
+---
+ include/linux/slab.h |  3 +++
+ mm/memcontrol.c      | 44 +++++++++++++++++++++++++++++++++++++++++++-
+ mm/slab.c            |  2 ++
+ mm/slab.h            | 10 ++++++++++
+ mm/slub.c            |  1 +
+ 5 files changed, 59 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/slab.h b/include/linux/slab.h
+index 9badb8c..765e12c 100644
+--- a/include/linux/slab.h
++++ b/include/linux/slab.h
+@@ -182,6 +182,8 @@ unsigned int kmem_cache_size(struct kmem_cache *);
+ #endif
+ 
+ #ifdef CONFIG_MEMCG_KMEM
++#include <linux/workqueue.h>
++
+ struct mem_cgroup_cache_params {
+ 	struct mem_cgroup *memcg;
+ 	struct kmem_cache *parent;
+@@ -190,6 +192,7 @@ struct mem_cgroup_cache_params {
+ 	atomic_t nr_pages;
+ 	struct list_head destroyed_list; /* Used when deleting memcg cache */
+ 	struct list_head sibling_list;
++	struct work_struct cache_shrinker;
+ };
+ #endif
+ 
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index da38652..c0cf564 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -578,7 +578,7 @@ static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *cache
+ 
+ 	BUG_ON(dentry == NULL);
+ 
+-	name = kasprintf(GFP_KERNEL, "%s(%d:%s)",
++	name = kasprintf(GFP_KERNEL, "%s(%d:%s)dead",
+ 	    cachep->name, css_id(&memcg->css), dentry->d_name.name);
+ 
+ 	return name;
+@@ -739,12 +739,25 @@ static void disarm_kmem_keys(struct mem_cgroup *memcg)
+ 	WARN_ON(res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0);
+ }
+ 
++static void cache_shrinker_work_func(struct work_struct *work)
++{
++	struct mem_cgroup_cache_params *params;
++	struct kmem_cache *cachep;
++
++	params = container_of(work, struct mem_cgroup_cache_params,
++			      cache_shrinker);
++	cachep = container_of(params, struct kmem_cache, memcg_params);
++
++	kmem_cache_shrink(cachep);
++}
++
+ static DEFINE_MUTEX(memcg_cache_mutex);
+ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+ 						  struct kmem_cache *cachep)
+ {
+ 	struct kmem_cache *new_cachep;
+ 	int idx;
++	char *name;
+ 
+ 	BUG_ON(!memcg_can_account_kmem(memcg));
+ 
+@@ -764,10 +777,21 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+ 		goto out;
+ 	}
+ 
++	/*
++	 * Because the cache is expected to duplicate the string,
++	 * we must make sure it has opportunity to copy its full
++	 * name. Only now we can remove the dead part from it
++	 */
++	name = (char *)new_cachep->name;
++	if (name)
++		name[strlen(name) - 4] = '\0';
++
+ 	mem_cgroup_get(memcg);
+ 	memcg->slabs[idx] = new_cachep;
+ 	new_cachep->memcg_params.memcg = memcg;
+ 	atomic_set(&new_cachep->memcg_params.nr_pages , 0);
++	INIT_WORK(&new_cachep->memcg_params.cache_shrinker,
++		  cache_shrinker_work_func);
+ out:
+ 	mutex_unlock(&memcg_cache_mutex);
+ 	return new_cachep;
+@@ -790,6 +814,21 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
+ 	struct mem_cgroup_cache_params *p, *tmp;
+ 	unsigned long flags;
+ 	LIST_HEAD(del_unlocked);
++	LIST_HEAD(shrinkers);
++
++	spin_lock_irqsave(&cache_queue_lock, flags);
++	list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {
++		cachep = container_of(p, struct kmem_cache, memcg_params);
++		if (atomic_read(&cachep->memcg_params.nr_pages) != 0)
++			list_move(&cachep->memcg_params.destroyed_list, &shrinkers);
++	}
++	spin_unlock_irqrestore(&cache_queue_lock, flags);
++
++	list_for_each_entry_safe(p, tmp, &shrinkers, destroyed_list) {
++		cachep = container_of(p, struct kmem_cache, memcg_params);
++		list_del(&cachep->memcg_params.destroyed_list);
++		kmem_cache_shrink(cachep);
++	}
+ 
+ 	spin_lock_irqsave(&cache_queue_lock, flags);
+ 	list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {
+@@ -867,11 +906,14 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+ 
+ 	spin_lock_irqsave(&cache_queue_lock, flags);
+ 	for (i = 0; i < MAX_KMEM_CACHE_TYPES; i++) {
++		char *name;
+ 		cachep = memcg->slabs[i];
+ 		if (!cachep)
+ 			continue;
+ 
+ 		cachep->memcg_params.dead = true;
++		name = (char *)cachep->name;
++		name[strlen(name)] = 'd';
+ 		__mem_cgroup_destroy_cache(cachep);
+ 	}
+ 	spin_unlock_irqrestore(&cache_queue_lock, flags);
+diff --git a/mm/slab.c b/mm/slab.c
+index bd9928f..6cb4abf 100644
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -3785,6 +3785,8 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
+ 	}
+ 
+ 	ac_put_obj(cachep, ac, objp);
++
++	kmem_cache_verify_dead(cachep);
+ }
+ 
+ /**
+diff --git a/mm/slab.h b/mm/slab.h
+index 6024ad1..d21b982 100644
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -80,6 +80,12 @@ static inline bool slab_equal_or_parent(struct kmem_cache *s,
+ {
+ 	return (p == s) || (p == s->memcg_params.parent);
+ }
++
++static inline void kmem_cache_verify_dead(struct kmem_cache *s)
++{
++	if (unlikely(s->memcg_params.dead))
++		schedule_work(&s->memcg_params.cache_shrinker);
++}
+ #else
+ static inline bool cache_match_memcg(struct kmem_cache *cachep,
+ 				     struct mem_cgroup *memcg)
+@@ -100,5 +106,9 @@ static inline bool slab_equal_or_parent(struct kmem_cache *s,
+ {
+ 	return true;
+ }
++
++static inline void kmem_cache_verify_dead(struct kmem_cache *s)
++{
++}
+ #endif
+ #endif
+diff --git a/mm/slub.c b/mm/slub.c
+index 0b68d15..9d79216 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2602,6 +2602,7 @@ redo:
+ 	} else
+ 		__slab_free(s, page, x, addr);
+ 
++	kmem_cache_verify_dead(s);
+ }
+ 
+ void kmem_cache_free(struct kmem_cache *s, void *x)
+-- 
+1.7.11.4
diff --git a/a/content_digest b/N1/content_digest
index 4b74ad0..b9749d2 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -7,9 +7,9 @@
  "Subject\0Re: [PATCH v6 25/29] memcg/sl[au]b: shrink dead caches\0"
  "Date\0Wed, 7 Nov 2012 10:22:17 +0100\0"
  "To\0Andrew Morton <akpm@linux-foundation.org>\0"
- "Cc\0linux-mm@kvack.org"
-  linux-kernel@vger.kernel.org
-  kamezawa.hiroyu@jp.fujitsu.com
+ "Cc\0<linux-mm@kvack.org>"
+  <linux-kernel@vger.kernel.org>
+  <kamezawa.hiroyu@jp.fujitsu.com>
   Johannes Weiner <hannes@cmpxchg.org>
   Tejun Heo <tj@kernel.org>
   Michal Hocko <mhocko@suse.cz>
@@ -94,5 +94,221 @@
  "\01:2\0"
  "fn\00015-memcg-sl-au-b-shrink-dead-caches.patch\0"
  "b\0"
+ ">From c99404a760fa69e8ccda0ff4b2636c6abd1ac990 Mon Sep 17 00:00:00 2001\n"
+ "From: Glauber Costa <glommer@parallels.com>\n"
+ "Date: Thu, 3 May 2012 13:33:03 -0300\n"
+ "Subject: [PATCH v3 15/16] memcg/sl[au]b: shrink dead caches\n"
+ "\n"
+ "In the slub allocator, when the last object of a page goes away, we\n"
+ "don't necessarily free it - there is not necessarily a test for empty\n"
+ "page in any slab_free path.\n"
+ "\n"
+ "This means that when we destroy a memcg cache that happened to be empty,\n"
+ "those caches may take a lot of time to go away: removing the memcg\n"
+ "reference won't destroy them - because there are pending references, and\n"
+ "the empty pages will stay there, until a shrinker is called upon for any\n"
+ "reason.\n"
+ "\n"
+ "This patch marks all memcg caches as dead. kmem_cache_shrink is called\n"
+ "for the ones who are not yet dead - this will force internal cache\n"
+ "reorganization, and then all references to empty pages will be removed.\n"
+ "\n"
+ "An unlikely branch is used to make sure this case does not affect\n"
+ "performance in the usual slab_free path.\n"
+ "\n"
+ "The slab allocator has a time based reaper that would eventually get rid\n"
+ "of the objects, but we can also call it explicitly, since dead caches\n"
+ "are not a likely event.\n"
+ "\n"
+ "[ v2: also call verify_dead for the slab ]\n"
+ "\n"
+ "Signed-off-by: Glauber Costa <glommer@parallels.com>\n"
+ "CC: Christoph Lameter <cl@linux.com>\n"
+ "CC: Pekka Enberg <penberg@cs.helsinki.fi>\n"
+ "CC: Michal Hocko <mhocko@suse.cz>\n"
+ "CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>\n"
+ "CC: Johannes Weiner <hannes@cmpxchg.org>\n"
+ "CC: Suleiman Souhlal <suleiman@google.com>\n"
+ "---\n"
+ " include/linux/slab.h |  3 +++\n"
+ " mm/memcontrol.c      | 44 +++++++++++++++++++++++++++++++++++++++++++-\n"
+ " mm/slab.c            |  2 ++\n"
+ " mm/slab.h            | 10 ++++++++++\n"
+ " mm/slub.c            |  1 +\n"
+ " 5 files changed, 59 insertions(+), 1 deletion(-)\n"
+ "\n"
+ "diff --git a/include/linux/slab.h b/include/linux/slab.h\n"
+ "index 9badb8c..765e12c 100644\n"
+ "--- a/include/linux/slab.h\n"
+ "+++ b/include/linux/slab.h\n"
+ "@@ -182,6 +182,8 @@ unsigned int kmem_cache_size(struct kmem_cache *);\n"
+ " #endif\n"
+ " \n"
+ " #ifdef CONFIG_MEMCG_KMEM\n"
+ "+#include <linux/workqueue.h>\n"
+ "+\n"
+ " struct mem_cgroup_cache_params {\n"
+ " \tstruct mem_cgroup *memcg;\n"
+ " \tstruct kmem_cache *parent;\n"
+ "@@ -190,6 +192,7 @@ struct mem_cgroup_cache_params {\n"
+ " \tatomic_t nr_pages;\n"
+ " \tstruct list_head destroyed_list; /* Used when deleting memcg cache */\n"
+ " \tstruct list_head sibling_list;\n"
+ "+\tstruct work_struct cache_shrinker;\n"
+ " };\n"
+ " #endif\n"
+ " \n"
+ "diff --git a/mm/memcontrol.c b/mm/memcontrol.c\n"
+ "index da38652..c0cf564 100644\n"
+ "--- a/mm/memcontrol.c\n"
+ "+++ b/mm/memcontrol.c\n"
+ "@@ -578,7 +578,7 @@ static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *cache\n"
+ " \n"
+ " \tBUG_ON(dentry == NULL);\n"
+ " \n"
+ "-\tname = kasprintf(GFP_KERNEL, \"%s(%d:%s)\",\n"
+ "+\tname = kasprintf(GFP_KERNEL, \"%s(%d:%s)dead\",\n"
+ " \t    cachep->name, css_id(&memcg->css), dentry->d_name.name);\n"
+ " \n"
+ " \treturn name;\n"
+ "@@ -739,12 +739,25 @@ static void disarm_kmem_keys(struct mem_cgroup *memcg)\n"
+ " \tWARN_ON(res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0);\n"
+ " }\n"
+ " \n"
+ "+static void cache_shrinker_work_func(struct work_struct *work)\n"
+ "+{\n"
+ "+\tstruct mem_cgroup_cache_params *params;\n"
+ "+\tstruct kmem_cache *cachep;\n"
+ "+\n"
+ "+\tparams = container_of(work, struct mem_cgroup_cache_params,\n"
+ "+\t\t\t      cache_shrinker);\n"
+ "+\tcachep = container_of(params, struct kmem_cache, memcg_params);\n"
+ "+\n"
+ "+\tkmem_cache_shrink(cachep);\n"
+ "+}\n"
+ "+\n"
+ " static DEFINE_MUTEX(memcg_cache_mutex);\n"
+ " static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,\n"
+ " \t\t\t\t\t\t  struct kmem_cache *cachep)\n"
+ " {\n"
+ " \tstruct kmem_cache *new_cachep;\n"
+ " \tint idx;\n"
+ "+\tchar *name;\n"
+ " \n"
+ " \tBUG_ON(!memcg_can_account_kmem(memcg));\n"
+ " \n"
+ "@@ -764,10 +777,21 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,\n"
+ " \t\tgoto out;\n"
+ " \t}\n"
+ " \n"
+ "+\t/*\n"
+ "+\t * Because the cache is expected to duplicate the string,\n"
+ "+\t * we must make sure it has opportunity to copy its full\n"
+ "+\t * name. Only now we can remove the dead part from it\n"
+ "+\t */\n"
+ "+\tname = (char *)new_cachep->name;\n"
+ "+\tif (name)\n"
+ "+\t\tname[strlen(name) - 4] = '\\0';\n"
+ "+\n"
+ " \tmem_cgroup_get(memcg);\n"
+ " \tmemcg->slabs[idx] = new_cachep;\n"
+ " \tnew_cachep->memcg_params.memcg = memcg;\n"
+ " \tatomic_set(&new_cachep->memcg_params.nr_pages , 0);\n"
+ "+\tINIT_WORK(&new_cachep->memcg_params.cache_shrinker,\n"
+ "+\t\t  cache_shrinker_work_func);\n"
+ " out:\n"
+ " \tmutex_unlock(&memcg_cache_mutex);\n"
+ " \treturn new_cachep;\n"
+ "@@ -790,6 +814,21 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)\n"
+ " \tstruct mem_cgroup_cache_params *p, *tmp;\n"
+ " \tunsigned long flags;\n"
+ " \tLIST_HEAD(del_unlocked);\n"
+ "+\tLIST_HEAD(shrinkers);\n"
+ "+\n"
+ "+\tspin_lock_irqsave(&cache_queue_lock, flags);\n"
+ "+\tlist_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {\n"
+ "+\t\tcachep = container_of(p, struct kmem_cache, memcg_params);\n"
+ "+\t\tif (atomic_read(&cachep->memcg_params.nr_pages) != 0)\n"
+ "+\t\t\tlist_move(&cachep->memcg_params.destroyed_list, &shrinkers);\n"
+ "+\t}\n"
+ "+\tspin_unlock_irqrestore(&cache_queue_lock, flags);\n"
+ "+\n"
+ "+\tlist_for_each_entry_safe(p, tmp, &shrinkers, destroyed_list) {\n"
+ "+\t\tcachep = container_of(p, struct kmem_cache, memcg_params);\n"
+ "+\t\tlist_del(&cachep->memcg_params.destroyed_list);\n"
+ "+\t\tkmem_cache_shrink(cachep);\n"
+ "+\t}\n"
+ " \n"
+ " \tspin_lock_irqsave(&cache_queue_lock, flags);\n"
+ " \tlist_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {\n"
+ "@@ -867,11 +906,14 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)\n"
+ " \n"
+ " \tspin_lock_irqsave(&cache_queue_lock, flags);\n"
+ " \tfor (i = 0; i < MAX_KMEM_CACHE_TYPES; i++) {\n"
+ "+\t\tchar *name;\n"
+ " \t\tcachep = memcg->slabs[i];\n"
+ " \t\tif (!cachep)\n"
+ " \t\t\tcontinue;\n"
+ " \n"
+ " \t\tcachep->memcg_params.dead = true;\n"
+ "+\t\tname = (char *)cachep->name;\n"
+ "+\t\tname[strlen(name)] = 'd';\n"
+ " \t\t__mem_cgroup_destroy_cache(cachep);\n"
+ " \t}\n"
+ " \tspin_unlock_irqrestore(&cache_queue_lock, flags);\n"
+ "diff --git a/mm/slab.c b/mm/slab.c\n"
+ "index bd9928f..6cb4abf 100644\n"
+ "--- a/mm/slab.c\n"
+ "+++ b/mm/slab.c\n"
+ "@@ -3785,6 +3785,8 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,\n"
+ " \t}\n"
+ " \n"
+ " \tac_put_obj(cachep, ac, objp);\n"
+ "+\n"
+ "+\tkmem_cache_verify_dead(cachep);\n"
+ " }\n"
+ " \n"
+ " /**\n"
+ "diff --git a/mm/slab.h b/mm/slab.h\n"
+ "index 6024ad1..d21b982 100644\n"
+ "--- a/mm/slab.h\n"
+ "+++ b/mm/slab.h\n"
+ "@@ -80,6 +80,12 @@ static inline bool slab_equal_or_parent(struct kmem_cache *s,\n"
+ " {\n"
+ " \treturn (p == s) || (p == s->memcg_params.parent);\n"
+ " }\n"
+ "+\n"
+ "+static inline void kmem_cache_verify_dead(struct kmem_cache *s)\n"
+ "+{\n"
+ "+\tif (unlikely(s->memcg_params.dead))\n"
+ "+\t\tschedule_work(&s->memcg_params.cache_shrinker);\n"
+ "+}\n"
+ " #else\n"
+ " static inline bool cache_match_memcg(struct kmem_cache *cachep,\n"
+ " \t\t\t\t     struct mem_cgroup *memcg)\n"
+ "@@ -100,5 +106,9 @@ static inline bool slab_equal_or_parent(struct kmem_cache *s,\n"
+ " {\n"
+ " \treturn true;\n"
+ " }\n"
+ "+\n"
+ "+static inline void kmem_cache_verify_dead(struct kmem_cache *s)\n"
+ "+{\n"
+ "+}\n"
+ " #endif\n"
+ " #endif\n"
+ "diff --git a/mm/slub.c b/mm/slub.c\n"
+ "index 0b68d15..9d79216 100644\n"
+ "--- a/mm/slub.c\n"
+ "+++ b/mm/slub.c\n"
+ "@@ -2602,6 +2602,7 @@ redo:\n"
+ " \t} else\n"
+ " \t\t__slab_free(s, page, x, addr);\n"
+ " \n"
+ "+\tkmem_cache_verify_dead(s);\n"
+ " }\n"
+ " \n"
+ " void kmem_cache_free(struct kmem_cache *s, void *x)\n"
+ "-- \n"
+ 1.7.11.4
 
-0fe4f001146babce6f082fb3387a841b3e02651c0d7c6a2034d5f633a59c0cab
+65ce43b0ad53ee4ff8fe7b6b6b9d5f96e7028e3d4ce91da50173cc572b4073ce

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.