diff for duplicates of <509A2849.9090509@parallels.com> diff --git a/a/2.txt b/N1/2.txt index 8b13789..f1ca272 100644 --- a/a/2.txt +++ b/N1/2.txt @@ -1 +1,216 @@ +>From c99404a760fa69e8ccda0ff4b2636c6abd1ac990 Mon Sep 17 00:00:00 2001 +From: Glauber Costa <glommer@parallels.com> +Date: Thu, 3 May 2012 13:33:03 -0300 +Subject: [PATCH v3 15/16] memcg/sl[au]b: shrink dead caches +In the slub allocator, when the last object of a page goes away, we +don't necessarily free it - there is not necessarily a test for empty +page in any slab_free path. + +This means that when we destroy a memcg cache that happened to be empty, +those caches may take a lot of time to go away: removing the memcg +reference won't destroy them - because there are pending references, and +the empty pages will stay there, until a shrinker is called upon for any +reason. + +This patch marks all memcg caches as dead. kmem_cache_shrink is called +for the ones who are not yet dead - this will force internal cache +reorganization, and then all references to empty pages will be removed. + +An unlikely branch is used to make sure this case does not affect +performance in the usual slab_free path. + +The slab allocator has a time based reaper that would eventually get rid +of the objects, but we can also call it explicitly, since dead caches +are not a likely event. + +[ v2: also call verify_dead for the slab ] + +Signed-off-by: Glauber Costa <glommer@parallels.com> +CC: Christoph Lameter <cl@linux.com> +CC: Pekka Enberg <penberg@cs.helsinki.fi> +CC: Michal Hocko <mhocko@suse.cz> +CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> +CC: Johannes Weiner <hannes@cmpxchg.org> +CC: Suleiman Souhlal <suleiman@google.com> +--- + include/linux/slab.h | 3 +++ + mm/memcontrol.c | 44 +++++++++++++++++++++++++++++++++++++++++++- + mm/slab.c | 2 ++ + mm/slab.h | 10 ++++++++++ + mm/slub.c | 1 + + 5 files changed, 59 insertions(+), 1 deletion(-) + +diff --git a/include/linux/slab.h b/include/linux/slab.h +index 9badb8c..765e12c 100644 +--- a/include/linux/slab.h ++++ b/include/linux/slab.h +@@ -182,6 +182,8 @@ unsigned int kmem_cache_size(struct kmem_cache *); + #endif + + #ifdef CONFIG_MEMCG_KMEM ++#include <linux/workqueue.h> ++ + struct mem_cgroup_cache_params { + struct mem_cgroup *memcg; + struct kmem_cache *parent; +@@ -190,6 +192,7 @@ struct mem_cgroup_cache_params { + atomic_t nr_pages; + struct list_head destroyed_list; /* Used when deleting memcg cache */ + struct list_head sibling_list; ++ struct work_struct cache_shrinker; + }; + #endif + +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index da38652..c0cf564 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -578,7 +578,7 @@ static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *cache + + BUG_ON(dentry == NULL); + +- name = kasprintf(GFP_KERNEL, "%s(%d:%s)", ++ name = kasprintf(GFP_KERNEL, "%s(%d:%s)dead", + cachep->name, css_id(&memcg->css), dentry->d_name.name); + + return name; +@@ -739,12 +739,25 @@ static void disarm_kmem_keys(struct mem_cgroup *memcg) + WARN_ON(res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0); + } + ++static void cache_shrinker_work_func(struct work_struct *work) ++{ ++ struct mem_cgroup_cache_params *params; ++ struct kmem_cache *cachep; ++ ++ params = container_of(work, struct mem_cgroup_cache_params, ++ cache_shrinker); ++ cachep = container_of(params, struct kmem_cache, memcg_params); ++ ++ kmem_cache_shrink(cachep); ++} ++ + static DEFINE_MUTEX(memcg_cache_mutex); + static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, + struct kmem_cache *cachep) + { + struct kmem_cache *new_cachep; + int idx; ++ char *name; + + BUG_ON(!memcg_can_account_kmem(memcg)); + +@@ -764,10 +777,21 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, + goto out; + } + ++ /* ++ * Because the cache is expected to duplicate the string, ++ * we must make sure it has opportunity to copy its full ++ * name. Only now we can remove the dead part from it ++ */ ++ name = (char *)new_cachep->name; ++ if (name) ++ name[strlen(name) - 4] = '\0'; ++ + mem_cgroup_get(memcg); + memcg->slabs[idx] = new_cachep; + new_cachep->memcg_params.memcg = memcg; + atomic_set(&new_cachep->memcg_params.nr_pages , 0); ++ INIT_WORK(&new_cachep->memcg_params.cache_shrinker, ++ cache_shrinker_work_func); + out: + mutex_unlock(&memcg_cache_mutex); + return new_cachep; +@@ -790,6 +814,21 @@ static void kmem_cache_destroy_work_func(struct work_struct *w) + struct mem_cgroup_cache_params *p, *tmp; + unsigned long flags; + LIST_HEAD(del_unlocked); ++ LIST_HEAD(shrinkers); ++ ++ spin_lock_irqsave(&cache_queue_lock, flags); ++ list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) { ++ cachep = container_of(p, struct kmem_cache, memcg_params); ++ if (atomic_read(&cachep->memcg_params.nr_pages) != 0) ++ list_move(&cachep->memcg_params.destroyed_list, &shrinkers); ++ } ++ spin_unlock_irqrestore(&cache_queue_lock, flags); ++ ++ list_for_each_entry_safe(p, tmp, &shrinkers, destroyed_list) { ++ cachep = container_of(p, struct kmem_cache, memcg_params); ++ list_del(&cachep->memcg_params.destroyed_list); ++ kmem_cache_shrink(cachep); ++ } + + spin_lock_irqsave(&cache_queue_lock, flags); + list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) { +@@ -867,11 +906,14 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) + + spin_lock_irqsave(&cache_queue_lock, flags); + for (i = 0; i < MAX_KMEM_CACHE_TYPES; i++) { ++ char *name; + cachep = memcg->slabs[i]; + if (!cachep) + continue; + + cachep->memcg_params.dead = true; ++ name = (char *)cachep->name; ++ name[strlen(name)] = 'd'; + __mem_cgroup_destroy_cache(cachep); + } + spin_unlock_irqrestore(&cache_queue_lock, flags); +diff --git a/mm/slab.c b/mm/slab.c +index bd9928f..6cb4abf 100644 +--- a/mm/slab.c ++++ b/mm/slab.c +@@ -3785,6 +3785,8 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, + } + + ac_put_obj(cachep, ac, objp); ++ ++ kmem_cache_verify_dead(cachep); + } + + /** +diff --git a/mm/slab.h b/mm/slab.h +index 6024ad1..d21b982 100644 +--- a/mm/slab.h ++++ b/mm/slab.h +@@ -80,6 +80,12 @@ static inline bool slab_equal_or_parent(struct kmem_cache *s, + { + return (p == s) || (p == s->memcg_params.parent); + } ++ ++static inline void kmem_cache_verify_dead(struct kmem_cache *s) ++{ ++ if (unlikely(s->memcg_params.dead)) ++ schedule_work(&s->memcg_params.cache_shrinker); ++} + #else + static inline bool cache_match_memcg(struct kmem_cache *cachep, + struct mem_cgroup *memcg) +@@ -100,5 +106,9 @@ static inline bool slab_equal_or_parent(struct kmem_cache *s, + { + return true; + } ++ ++static inline void kmem_cache_verify_dead(struct kmem_cache *s) ++{ ++} + #endif + #endif +diff --git a/mm/slub.c b/mm/slub.c +index 0b68d15..9d79216 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2602,6 +2602,7 @@ redo: + } else + __slab_free(s, page, x, addr); + ++ kmem_cache_verify_dead(s); + } + + void kmem_cache_free(struct kmem_cache *s, void *x) +-- +1.7.11.4 diff --git a/a/content_digest b/N1/content_digest index 4b74ad0..b9749d2 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -7,9 +7,9 @@ "Subject\0Re: [PATCH v6 25/29] memcg/sl[au]b: shrink dead caches\0" "Date\0Wed, 7 Nov 2012 10:22:17 +0100\0" "To\0Andrew Morton <akpm@linux-foundation.org>\0" - "Cc\0linux-mm@kvack.org" - linux-kernel@vger.kernel.org - kamezawa.hiroyu@jp.fujitsu.com + "Cc\0<linux-mm@kvack.org>" + <linux-kernel@vger.kernel.org> + <kamezawa.hiroyu@jp.fujitsu.com> Johannes Weiner <hannes@cmpxchg.org> Tejun Heo <tj@kernel.org> Michal Hocko <mhocko@suse.cz> @@ -94,5 +94,221 @@ "\01:2\0" "fn\00015-memcg-sl-au-b-shrink-dead-caches.patch\0" "b\0" + ">From c99404a760fa69e8ccda0ff4b2636c6abd1ac990 Mon Sep 17 00:00:00 2001\n" + "From: Glauber Costa <glommer@parallels.com>\n" + "Date: Thu, 3 May 2012 13:33:03 -0300\n" + "Subject: [PATCH v3 15/16] memcg/sl[au]b: shrink dead caches\n" + "\n" + "In the slub allocator, when the last object of a page goes away, we\n" + "don't necessarily free it - there is not necessarily a test for empty\n" + "page in any slab_free path.\n" + "\n" + "This means that when we destroy a memcg cache that happened to be empty,\n" + "those caches may take a lot of time to go away: removing the memcg\n" + "reference won't destroy them - because there are pending references, and\n" + "the empty pages will stay there, until a shrinker is called upon for any\n" + "reason.\n" + "\n" + "This patch marks all memcg caches as dead. kmem_cache_shrink is called\n" + "for the ones who are not yet dead - this will force internal cache\n" + "reorganization, and then all references to empty pages will be removed.\n" + "\n" + "An unlikely branch is used to make sure this case does not affect\n" + "performance in the usual slab_free path.\n" + "\n" + "The slab allocator has a time based reaper that would eventually get rid\n" + "of the objects, but we can also call it explicitly, since dead caches\n" + "are not a likely event.\n" + "\n" + "[ v2: also call verify_dead for the slab ]\n" + "\n" + "Signed-off-by: Glauber Costa <glommer@parallels.com>\n" + "CC: Christoph Lameter <cl@linux.com>\n" + "CC: Pekka Enberg <penberg@cs.helsinki.fi>\n" + "CC: Michal Hocko <mhocko@suse.cz>\n" + "CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>\n" + "CC: Johannes Weiner <hannes@cmpxchg.org>\n" + "CC: Suleiman Souhlal <suleiman@google.com>\n" + "---\n" + " include/linux/slab.h | 3 +++\n" + " mm/memcontrol.c | 44 +++++++++++++++++++++++++++++++++++++++++++-\n" + " mm/slab.c | 2 ++\n" + " mm/slab.h | 10 ++++++++++\n" + " mm/slub.c | 1 +\n" + " 5 files changed, 59 insertions(+), 1 deletion(-)\n" + "\n" + "diff --git a/include/linux/slab.h b/include/linux/slab.h\n" + "index 9badb8c..765e12c 100644\n" + "--- a/include/linux/slab.h\n" + "+++ b/include/linux/slab.h\n" + "@@ -182,6 +182,8 @@ unsigned int kmem_cache_size(struct kmem_cache *);\n" + " #endif\n" + " \n" + " #ifdef CONFIG_MEMCG_KMEM\n" + "+#include <linux/workqueue.h>\n" + "+\n" + " struct mem_cgroup_cache_params {\n" + " \tstruct mem_cgroup *memcg;\n" + " \tstruct kmem_cache *parent;\n" + "@@ -190,6 +192,7 @@ struct mem_cgroup_cache_params {\n" + " \tatomic_t nr_pages;\n" + " \tstruct list_head destroyed_list; /* Used when deleting memcg cache */\n" + " \tstruct list_head sibling_list;\n" + "+\tstruct work_struct cache_shrinker;\n" + " };\n" + " #endif\n" + " \n" + "diff --git a/mm/memcontrol.c b/mm/memcontrol.c\n" + "index da38652..c0cf564 100644\n" + "--- a/mm/memcontrol.c\n" + "+++ b/mm/memcontrol.c\n" + "@@ -578,7 +578,7 @@ static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *cache\n" + " \n" + " \tBUG_ON(dentry == NULL);\n" + " \n" + "-\tname = kasprintf(GFP_KERNEL, \"%s(%d:%s)\",\n" + "+\tname = kasprintf(GFP_KERNEL, \"%s(%d:%s)dead\",\n" + " \t cachep->name, css_id(&memcg->css), dentry->d_name.name);\n" + " \n" + " \treturn name;\n" + "@@ -739,12 +739,25 @@ static void disarm_kmem_keys(struct mem_cgroup *memcg)\n" + " \tWARN_ON(res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0);\n" + " }\n" + " \n" + "+static void cache_shrinker_work_func(struct work_struct *work)\n" + "+{\n" + "+\tstruct mem_cgroup_cache_params *params;\n" + "+\tstruct kmem_cache *cachep;\n" + "+\n" + "+\tparams = container_of(work, struct mem_cgroup_cache_params,\n" + "+\t\t\t cache_shrinker);\n" + "+\tcachep = container_of(params, struct kmem_cache, memcg_params);\n" + "+\n" + "+\tkmem_cache_shrink(cachep);\n" + "+}\n" + "+\n" + " static DEFINE_MUTEX(memcg_cache_mutex);\n" + " static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,\n" + " \t\t\t\t\t\t struct kmem_cache *cachep)\n" + " {\n" + " \tstruct kmem_cache *new_cachep;\n" + " \tint idx;\n" + "+\tchar *name;\n" + " \n" + " \tBUG_ON(!memcg_can_account_kmem(memcg));\n" + " \n" + "@@ -764,10 +777,21 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,\n" + " \t\tgoto out;\n" + " \t}\n" + " \n" + "+\t/*\n" + "+\t * Because the cache is expected to duplicate the string,\n" + "+\t * we must make sure it has opportunity to copy its full\n" + "+\t * name. Only now we can remove the dead part from it\n" + "+\t */\n" + "+\tname = (char *)new_cachep->name;\n" + "+\tif (name)\n" + "+\t\tname[strlen(name) - 4] = '\\0';\n" + "+\n" + " \tmem_cgroup_get(memcg);\n" + " \tmemcg->slabs[idx] = new_cachep;\n" + " \tnew_cachep->memcg_params.memcg = memcg;\n" + " \tatomic_set(&new_cachep->memcg_params.nr_pages , 0);\n" + "+\tINIT_WORK(&new_cachep->memcg_params.cache_shrinker,\n" + "+\t\t cache_shrinker_work_func);\n" + " out:\n" + " \tmutex_unlock(&memcg_cache_mutex);\n" + " \treturn new_cachep;\n" + "@@ -790,6 +814,21 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)\n" + " \tstruct mem_cgroup_cache_params *p, *tmp;\n" + " \tunsigned long flags;\n" + " \tLIST_HEAD(del_unlocked);\n" + "+\tLIST_HEAD(shrinkers);\n" + "+\n" + "+\tspin_lock_irqsave(&cache_queue_lock, flags);\n" + "+\tlist_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {\n" + "+\t\tcachep = container_of(p, struct kmem_cache, memcg_params);\n" + "+\t\tif (atomic_read(&cachep->memcg_params.nr_pages) != 0)\n" + "+\t\t\tlist_move(&cachep->memcg_params.destroyed_list, &shrinkers);\n" + "+\t}\n" + "+\tspin_unlock_irqrestore(&cache_queue_lock, flags);\n" + "+\n" + "+\tlist_for_each_entry_safe(p, tmp, &shrinkers, destroyed_list) {\n" + "+\t\tcachep = container_of(p, struct kmem_cache, memcg_params);\n" + "+\t\tlist_del(&cachep->memcg_params.destroyed_list);\n" + "+\t\tkmem_cache_shrink(cachep);\n" + "+\t}\n" + " \n" + " \tspin_lock_irqsave(&cache_queue_lock, flags);\n" + " \tlist_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {\n" + "@@ -867,11 +906,14 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)\n" + " \n" + " \tspin_lock_irqsave(&cache_queue_lock, flags);\n" + " \tfor (i = 0; i < MAX_KMEM_CACHE_TYPES; i++) {\n" + "+\t\tchar *name;\n" + " \t\tcachep = memcg->slabs[i];\n" + " \t\tif (!cachep)\n" + " \t\t\tcontinue;\n" + " \n" + " \t\tcachep->memcg_params.dead = true;\n" + "+\t\tname = (char *)cachep->name;\n" + "+\t\tname[strlen(name)] = 'd';\n" + " \t\t__mem_cgroup_destroy_cache(cachep);\n" + " \t}\n" + " \tspin_unlock_irqrestore(&cache_queue_lock, flags);\n" + "diff --git a/mm/slab.c b/mm/slab.c\n" + "index bd9928f..6cb4abf 100644\n" + "--- a/mm/slab.c\n" + "+++ b/mm/slab.c\n" + "@@ -3785,6 +3785,8 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,\n" + " \t}\n" + " \n" + " \tac_put_obj(cachep, ac, objp);\n" + "+\n" + "+\tkmem_cache_verify_dead(cachep);\n" + " }\n" + " \n" + " /**\n" + "diff --git a/mm/slab.h b/mm/slab.h\n" + "index 6024ad1..d21b982 100644\n" + "--- a/mm/slab.h\n" + "+++ b/mm/slab.h\n" + "@@ -80,6 +80,12 @@ static inline bool slab_equal_or_parent(struct kmem_cache *s,\n" + " {\n" + " \treturn (p == s) || (p == s->memcg_params.parent);\n" + " }\n" + "+\n" + "+static inline void kmem_cache_verify_dead(struct kmem_cache *s)\n" + "+{\n" + "+\tif (unlikely(s->memcg_params.dead))\n" + "+\t\tschedule_work(&s->memcg_params.cache_shrinker);\n" + "+}\n" + " #else\n" + " static inline bool cache_match_memcg(struct kmem_cache *cachep,\n" + " \t\t\t\t struct mem_cgroup *memcg)\n" + "@@ -100,5 +106,9 @@ static inline bool slab_equal_or_parent(struct kmem_cache *s,\n" + " {\n" + " \treturn true;\n" + " }\n" + "+\n" + "+static inline void kmem_cache_verify_dead(struct kmem_cache *s)\n" + "+{\n" + "+}\n" + " #endif\n" + " #endif\n" + "diff --git a/mm/slub.c b/mm/slub.c\n" + "index 0b68d15..9d79216 100644\n" + "--- a/mm/slub.c\n" + "+++ b/mm/slub.c\n" + "@@ -2602,6 +2602,7 @@ redo:\n" + " \t} else\n" + " \t\t__slab_free(s, page, x, addr);\n" + " \n" + "+\tkmem_cache_verify_dead(s);\n" + " }\n" + " \n" + " void kmem_cache_free(struct kmem_cache *s, void *x)\n" + "-- \n" + 1.7.11.4 -0fe4f001146babce6f082fb3387a841b3e02651c0d7c6a2034d5f633a59c0cab +65ce43b0ad53ee4ff8fe7b6b6b9d5f96e7028e3d4ce91da50173cc572b4073ce
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.