Re: [PATCH 2/7] memcg,list_lru: duplicate LRUs upon kmemcg creation

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Kamezawa Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
To: Glauber Costa <glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Andrew Morton
	<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>,
	Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>,
	Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>,
	Dave Shrinnker <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Dave Chinner <dchinner-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Mel Gorman <mgorman-l3A5Bk7waGM@public.gmane.org>,
	Rik van Riel <riel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Subject: Re: [PATCH 2/7] memcg,list_lru: duplicate LRUs upon kmemcg creation
Date: Fri, 15 Feb 2013 18:21:38 +0900	[thread overview]
Message-ID: <511DFE22.4000003@jp.fujitsu.com> (raw)
In-Reply-To: <1360328857-28070-3-git-send-email-glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>

(2013/02/08 22:07), Glauber Costa wrote:
> When a new memcg is created, we need to open up room for its descriptors
> in all of the list_lrus that are marked per-memcg. The process is quite
> similar to the one we are using for the kmem caches: we initialize the
> new structures in an array indexed by kmemcg_id, and grow the array if
> needed. Key data like the size of the array will be shared between the
> kmem cache code and the list_lru code (they basically describe the same
> thing)
> 
> Signed-off-by: Glauber Costa <glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
> Cc: Dave Chinner <dchinner-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
> Cc: Mel Gorman <mgorman-l3A5Bk7waGM@public.gmane.org>
> Cc: Rik van Riel <riel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
> Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
> Cc: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
> Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
> Cc: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
> ---
>   include/linux/list_lru.h   |  47 +++++++++++++++++
>   include/linux/memcontrol.h |   6 +++
>   lib/list_lru.c             | 115 +++++++++++++++++++++++++++++++++++++---
>   mm/memcontrol.c            | 128 ++++++++++++++++++++++++++++++++++++++++++---
>   mm/slab_common.c           |   1 -
>   5 files changed, 283 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> index 02796da..370b989 100644
> --- a/include/linux/list_lru.h
> +++ b/include/linux/list_lru.h
> @@ -16,11 +16,58 @@ struct list_lru_node {
>   	long			nr_items;
>   } ____cacheline_aligned_in_smp;
>   
> +struct list_lru_array {
> +	struct list_lru_node node[1];
> +};

size is up to nr_node_ids ?

> +
>   struct list_lru {
> +	struct list_head	lrus;
>   	struct list_lru_node	node[MAX_NUMNODES];
>   	nodemask_t		active_nodes;
> +#ifdef CONFIG_MEMCG_KMEM
> +	struct list_lru_array	**memcg_lrus;
> +#endif
>   };
size is up to memcg_limited_groups_array_size ?


>   
> +struct mem_cgroup;
> +#ifdef CONFIG_MEMCG_KMEM
> +/*
> + * We will reuse the last bit of the pointer to tell the lru subsystem that
> + * this particular lru should be replicated when a memcg comes in.
> + */
> +static inline void lru_memcg_enable(struct list_lru *lru)
> +{
> +	lru->memcg_lrus = (void *)0x1ULL;
> +}
> +

This "enable" is not used in this patch itself, right ?

> +/*
> + * This will return true if we have already allocated and assignment a memcg
> + * pointer set to the LRU. Therefore, we need to mask the first bit out
> + */
> +static inline bool lru_memcg_is_assigned(struct list_lru *lru)
> +{
> +	return (unsigned long)lru->memcg_lrus & ~0x1ULL;
> +}
> +
> +struct list_lru_array *lru_alloc_array(void);
> +int memcg_update_all_lrus(unsigned long num);
> +void list_lru_destroy(struct list_lru *lru);
> +void list_lru_destroy_memcg(struct mem_cgroup *memcg);
> +#else
> +static inline void lru_memcg_enable(struct list_lru *lru)
> +{
> +}
> +
> +static inline bool lru_memcg_is_assigned(struct list_lru *lru)
> +{
> +	return false;
> +}
> +
> +static inline void list_lru_destroy(struct list_lru *lru)
> +{
> +}
> +#endif
> +
>   int list_lru_init(struct list_lru *lru);
>   int list_lru_add(struct list_lru *lru, struct list_head *item);
>   int list_lru_del(struct list_lru *lru, struct list_head *item);
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index b7de557..f9558d0 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -23,6 +23,7 @@
>   #include <linux/vm_event_item.h>
>   #include <linux/hardirq.h>
>   #include <linux/jump_label.h>
> +#include <linux/list_lru.h>
>   
>   struct mem_cgroup;
>   struct page_cgroup;
> @@ -475,6 +476,11 @@ void memcg_update_array_size(int num_groups);
>   struct kmem_cache *
>   __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
>   
> +int memcg_new_lru(struct list_lru *lru);
> +
> +int memcg_kmem_update_lru_size(struct list_lru *lru, int num_groups,
> +			       bool new_lru);
> +
>   void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
>   void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
>   
> diff --git a/lib/list_lru.c b/lib/list_lru.c
> index 0f08ed6..3b0e89d 100644
> --- a/lib/list_lru.c
> +++ b/lib/list_lru.c
> @@ -8,6 +8,7 @@
>   #include <linux/module.h>
>   #include <linux/mm.h>
>   #include <linux/list_lru.h>
> +#include <linux/memcontrol.h>
>   
>   int
>   list_lru_add(
> @@ -184,18 +185,118 @@ list_lru_dispose_all(
>   	return total;
>   }
>   
> -int
> -list_lru_init(
> -	struct list_lru	*lru)
> +/*
> + * This protects the list of all LRU in the system. One only needs
> + * to take when registering an LRU, or when duplicating the list of lrus.
> + * Transversing an LRU can and should be done outside the lock
> + */
> +static DEFINE_MUTEX(all_lrus_mutex);
> +static LIST_HEAD(all_lrus);
> +
> +static void list_lru_init_one(struct list_lru_node *lru)
> +{
> +	spin_lock_init(&lru->lock);
> +	INIT_LIST_HEAD(&lru->list);
> +	lru->nr_items = 0;
> +}
> +
> +struct list_lru_array *lru_alloc_array(void)
> +{
> +	struct list_lru_array *lru_array;
> +	int i;
> +
> +	lru_array = kzalloc(nr_node_ids * sizeof(struct list_lru_node),
> +				GFP_KERNEL);
> +	if (!lru_array)
> +		return NULL;
> +
> +	for (i = 0; i < nr_node_ids ; i++)
> +		list_lru_init_one(&lru_array->node[i]);
> +
> +	return lru_array;
> +}
> +
> +int __list_lru_init(struct list_lru *lru)
>   {
>   	int i;
>   
>   	nodes_clear(lru->active_nodes);
> -	for (i = 0; i < MAX_NUMNODES; i++) {
> -		spin_lock_init(&lru->node[i].lock);
> -		INIT_LIST_HEAD(&lru->node[i].list);
> -		lru->node[i].nr_items = 0;
> +	for (i = 0; i < MAX_NUMNODES; i++)
> +		list_lru_init_one(&lru->node[i]);

Hmm. lru_list is up to MAX_NUMNODES, your new one is up to nr_node_ids...

> +
> +	return 0;
> +}
> +
> +#ifdef CONFIG_MEMCG_KMEM
> +static int memcg_init_lru(struct list_lru *lru)
> +{
> +	int ret;
> +
> +	if (!lru->memcg_lrus)
> +		return 0;
> +
> +	INIT_LIST_HEAD(&lru->lrus);
> +	mutex_lock(&all_lrus_mutex);
> +	list_add(&lru->lrus, &all_lrus);
> +	ret = memcg_new_lru(lru);
> +	mutex_unlock(&all_lrus_mutex);
> +	return ret;
> +}

 only writer takes this mutex ?

> +
> +int memcg_update_all_lrus(unsigned long num)
> +{
> +	int ret = 0;
> +	struct list_lru *lru;
> +
> +	mutex_lock(&all_lrus_mutex);
> +	list_for_each_entry(lru, &all_lrus, lrus) {
> +		if (!lru->memcg_lrus)
> +			continue;
> +
> +		ret = memcg_kmem_update_lru_size(lru, num, false);
> +		if (ret)
> +			goto out;
> +	}
> +out:
> +	mutex_unlock(&all_lrus_mutex);
> +	return ret;
> +}
> +
> +void list_lru_destroy(struct list_lru *lru)
> +{
> +	if (!lru->memcg_lrus)
> +		return;
> +
> +	mutex_lock(&all_lrus_mutex);
> +	list_del(&lru->lrus);
> +	mutex_unlock(&all_lrus_mutex);
> +}
> +
> +void list_lru_destroy_memcg(struct mem_cgroup *memcg)
> +{
> +	struct list_lru *lru;
> +	mutex_lock(&all_lrus_mutex);
> +	list_for_each_entry(lru, &all_lrus, lrus) {
> +		lru->memcg_lrus[memcg_cache_id(memcg)] = NULL;
> +		/* everybody must beaware that this memcg is no longer valid */

Hm, the object pointed by this array entry will be freed by some other func ?

> +		wmb();
>   	}
> +	mutex_unlock(&all_lrus_mutex);
> +}
> +#else
> +static int memcg_init_lru(struct list_lru *lru)
> +{
>   	return 0;
>   }
> +#endif
> +
> +int list_lru_init(struct list_lru *lru)
> +{
> +	int ret;
> +	ret = __list_lru_init(lru);
> +	if (ret)
> +		return ret;
> +
> +	return memcg_init_lru(lru);
> +}
>   EXPORT_SYMBOL_GPL(list_lru_init);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b1d4dfa..b9e1941 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3032,16 +3032,30 @@ int memcg_update_cache_sizes(struct mem_cgroup *memcg)
>   	memcg_kmem_set_activated(memcg);
>   
>   	ret = memcg_update_all_caches(num+1);
> -	if (ret) {
> -		ida_simple_remove(&kmem_limited_groups, num);
> -		memcg_kmem_clear_activated(memcg);
> -		return ret;
> -	}
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * We should make sure that the array size is not updated until we are
> +	 * done; otherwise we have no easy way to know whether or not we should
> +	 * grow the array.
> +	 */
> +	ret = memcg_update_all_lrus(num + 1);
> +	if (ret)
> +		goto out;
>   
>   	memcg->kmemcg_id = num;
> +
> +	memcg_update_array_size(num + 1);
> +
>   	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
>   	mutex_init(&memcg->slab_caches_mutex);
> +
>   	return 0;
> +out:
> +	ida_simple_remove(&kmem_limited_groups, num);
> +	memcg_kmem_clear_activated(memcg);
> +	return ret;
>   }
>   
>   static size_t memcg_caches_array_size(int num_groups)
> @@ -3121,6 +3135,106 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
>   	return 0;
>   }
>   
> +/*
> + * memcg_kmem_update_lru_size - fill in kmemcg info into a list_lru
> + *
> + * @lru: the lru we are operating with
> + * @num_groups: how many kmem-limited cgroups we have
> + * @new_lru: true if this is a new_lru being created, false if this
> + * was triggered from the memcg side
> + *
> + * Returns 0 on success, and an error code otherwise.
> + *
> + * This function can be called either when a new kmem-limited memcg appears,
> + * or when a new list_lru is created. The work is roughly the same in two cases,
> + * but in the later we never have to expand the array size.
> + *
> + * This is always protected by the all_lrus_mutex from the list_lru side.
> + */
> +int memcg_kmem_update_lru_size(struct list_lru *lru, int num_groups,
> +			       bool new_lru)
> +{
> +	struct list_lru_array **new_lru_array;
> +	struct list_lru_array *lru_array;
> +

Both are named as array ...confusing ;)

> +	lru_array = lru_alloc_array();
> +	if (!lru_array)
> +		return -ENOMEM;
> +
> +	/* need some fucked up locking around the list acquisition */
> +	if ((num_groups > memcg_limited_groups_array_size) || new_lru) {
> +		int i;
> +		struct list_lru_array **old_array;
> +		size_t size = memcg_caches_array_size(num_groups);
> +
> +		new_lru_array = kzalloc(size * sizeof(void *), GFP_KERNEL);
> +		if (!new_lru_array) {
> +			kfree(lru_array);
> +			return -ENOMEM;
> +		}
> +
> +		for (i = 0; i < memcg_limited_groups_array_size; i++) {
> +			if (!lru_memcg_is_assigned(lru) || lru->memcg_lrus[i])
> +				continue;
> +			new_lru_array[i] =  lru->memcg_lrus[i];
> +		}
> +
> +		old_array = lru->memcg_lrus;
> +		lru->memcg_lrus = new_lru_array;
> +		/*
> +		 * We don't need a barrier here because we are just copying
> +		 * information over. Anybody operating in memcg_lrus will
> +		 * either follow the new array or the old one and they contain
> +		 * exactly the same information. The new space in the end is
> +		 * always empty anyway.
> +		 *
> +		 * We do have to make sure that no more users of the old
> +		 * memcg_lrus array exist before we free, and this is achieved
> +		 * by the synchronize_lru below.
> +		 */
> +		if (lru_memcg_is_assigned(lru)) {
> +			synchronize_rcu();
> +			kfree(old_array);
> +		}
> +
> +	}
> +
> +	if (lru_memcg_is_assigned(lru)) {
> +		lru->memcg_lrus[num_groups - 1] = lru_array;

Can't this pointer already set ?

> +		/*
> +		 * Here we do need the barrier, because of the state transition
> +		 * implied by the assignment of the array. All users should be
> +		 * able to see it
> +		 */
> +		wmb();
> +	}
> +
> +	return 0;
> +
> +}
> +
> +int memcg_new_lru(struct list_lru *lru)
> +{
> +	struct mem_cgroup *iter;
> +
> +	if (!memcg_kmem_enabled())
> +		return 0;
> +
> +	for_each_mem_cgroup(iter) {
> +		int ret;
> +		int memcg_id = memcg_cache_id(iter);
> +		if (memcg_id < 0)
> +			continue;
> +
> +		ret = memcg_kmem_update_lru_size(lru, memcg_id + 1, true);
> +		if (ret) {
> +			mem_cgroup_iter_break(root_mem_cgroup, iter);
> +			return ret;
> +		}
> +	}
> +	return 0;
> +}
> +
>   int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
>   			 struct kmem_cache *root_cache)
>   {
> @@ -5914,8 +6028,10 @@ static void kmem_cgroup_destroy(struct mem_cgroup *memcg)
>   	 * possible that the charges went down to 0 between mark_dead and the
>   	 * res_counter read, so in that case, we don't need the put
>   	 */
> -	if (memcg_kmem_test_and_clear_dead(memcg))
> +	if (memcg_kmem_test_and_clear_dead(memcg)) {
> +		list_lru_destroy_memcg(memcg);
>   		mem_cgroup_put(memcg);
> +	}
>   }
>   #else
>   static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> indek
x 3f3cd97..2470d11 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -102,7 +102,6 @@ int memcg_update_all_caches(int num_memcgs)
>   			goto out;
>   	}
>   
> -	memcg_update_array_size(num_memcgs);
>   out:
>   	mutex_unlock(&slab_mutex);
>   	return ret;
> 


Thanks,
-Kame

WARNING: multiple messages have this Message-ID (diff)

From: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: Glauber Costa <glommer@parallels.com>
Cc: linux-mm@kvack.org, cgroups@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Michal Hocko <mhocko@suse.cz>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Dave Shrinnker <david@fromorbit.com>,
	linux-fsdevel@vger.kernel.org, Dave Chinner <dchinner@redhat.com>,
	Mel Gorman <mgorman@suse.de>, Rik van Riel <riel@redhat.com>,
	Hugh Dickins <hughd@google.com>
Subject: Re: [PATCH 2/7] memcg,list_lru: duplicate LRUs upon kmemcg creation
Date: Fri, 15 Feb 2013 18:21:38 +0900	[thread overview]
Message-ID: <511DFE22.4000003@jp.fujitsu.com> (raw)
In-Reply-To: <1360328857-28070-3-git-send-email-glommer@parallels.com>

(2013/02/08 22:07), Glauber Costa wrote:
> When a new memcg is created, we need to open up room for its descriptors
> in all of the list_lrus that are marked per-memcg. The process is quite
> similar to the one we are using for the kmem caches: we initialize the
> new structures in an array indexed by kmemcg_id, and grow the array if
> needed. Key data like the size of the array will be shared between the
> kmem cache code and the list_lru code (they basically describe the same
> thing)
> 
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> Cc: Dave Chinner <dchinner@redhat.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Rik van Riel <riel@redhat.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> ---
>   include/linux/list_lru.h   |  47 +++++++++++++++++
>   include/linux/memcontrol.h |   6 +++
>   lib/list_lru.c             | 115 +++++++++++++++++++++++++++++++++++++---
>   mm/memcontrol.c            | 128 ++++++++++++++++++++++++++++++++++++++++++---
>   mm/slab_common.c           |   1 -
>   5 files changed, 283 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> index 02796da..370b989 100644
> --- a/include/linux/list_lru.h
> +++ b/include/linux/list_lru.h
> @@ -16,11 +16,58 @@ struct list_lru_node {
>   	long			nr_items;
>   } ____cacheline_aligned_in_smp;
>   
> +struct list_lru_array {
> +	struct list_lru_node node[1];
> +};

size is up to nr_node_ids ?

> +
>   struct list_lru {
> +	struct list_head	lrus;
>   	struct list_lru_node	node[MAX_NUMNODES];
>   	nodemask_t		active_nodes;
> +#ifdef CONFIG_MEMCG_KMEM
> +	struct list_lru_array	**memcg_lrus;
> +#endif
>   };
size is up to memcg_limited_groups_array_size ?


>   
> +struct mem_cgroup;
> +#ifdef CONFIG_MEMCG_KMEM
> +/*
> + * We will reuse the last bit of the pointer to tell the lru subsystem that
> + * this particular lru should be replicated when a memcg comes in.
> + */
> +static inline void lru_memcg_enable(struct list_lru *lru)
> +{
> +	lru->memcg_lrus = (void *)0x1ULL;
> +}
> +

This "enable" is not used in this patch itself, right ?

> +/*
> + * This will return true if we have already allocated and assignment a memcg
> + * pointer set to the LRU. Therefore, we need to mask the first bit out
> + */
> +static inline bool lru_memcg_is_assigned(struct list_lru *lru)
> +{
> +	return (unsigned long)lru->memcg_lrus & ~0x1ULL;
> +}
> +
> +struct list_lru_array *lru_alloc_array(void);
> +int memcg_update_all_lrus(unsigned long num);
> +void list_lru_destroy(struct list_lru *lru);
> +void list_lru_destroy_memcg(struct mem_cgroup *memcg);
> +#else
> +static inline void lru_memcg_enable(struct list_lru *lru)
> +{
> +}
> +
> +static inline bool lru_memcg_is_assigned(struct list_lru *lru)
> +{
> +	return false;
> +}
> +
> +static inline void list_lru_destroy(struct list_lru *lru)
> +{
> +}
> +#endif
> +
>   int list_lru_init(struct list_lru *lru);
>   int list_lru_add(struct list_lru *lru, struct list_head *item);
>   int list_lru_del(struct list_lru *lru, struct list_head *item);
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index b7de557..f9558d0 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -23,6 +23,7 @@
>   #include <linux/vm_event_item.h>
>   #include <linux/hardirq.h>
>   #include <linux/jump_label.h>
> +#include <linux/list_lru.h>
>   
>   struct mem_cgroup;
>   struct page_cgroup;
> @@ -475,6 +476,11 @@ void memcg_update_array_size(int num_groups);
>   struct kmem_cache *
>   __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
>   
> +int memcg_new_lru(struct list_lru *lru);
> +
> +int memcg_kmem_update_lru_size(struct list_lru *lru, int num_groups,
> +			       bool new_lru);
> +
>   void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
>   void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
>   
> diff --git a/lib/list_lru.c b/lib/list_lru.c
> index 0f08ed6..3b0e89d 100644
> --- a/lib/list_lru.c
> +++ b/lib/list_lru.c
> @@ -8,6 +8,7 @@
>   #include <linux/module.h>
>   #include <linux/mm.h>
>   #include <linux/list_lru.h>
> +#include <linux/memcontrol.h>
>   
>   int
>   list_lru_add(
> @@ -184,18 +185,118 @@ list_lru_dispose_all(
>   	return total;
>   }
>   
> -int
> -list_lru_init(
> -	struct list_lru	*lru)
> +/*
> + * This protects the list of all LRU in the system. One only needs
> + * to take when registering an LRU, or when duplicating the list of lrus.
> + * Transversing an LRU can and should be done outside the lock
> + */
> +static DEFINE_MUTEX(all_lrus_mutex);
> +static LIST_HEAD(all_lrus);
> +
> +static void list_lru_init_one(struct list_lru_node *lru)
> +{
> +	spin_lock_init(&lru->lock);
> +	INIT_LIST_HEAD(&lru->list);
> +	lru->nr_items = 0;
> +}
> +
> +struct list_lru_array *lru_alloc_array(void)
> +{
> +	struct list_lru_array *lru_array;
> +	int i;
> +
> +	lru_array = kzalloc(nr_node_ids * sizeof(struct list_lru_node),
> +				GFP_KERNEL);
> +	if (!lru_array)
> +		return NULL;
> +
> +	for (i = 0; i < nr_node_ids ; i++)
> +		list_lru_init_one(&lru_array->node[i]);
> +
> +	return lru_array;
> +}
> +
> +int __list_lru_init(struct list_lru *lru)
>   {
>   	int i;
>   
>   	nodes_clear(lru->active_nodes);
> -	for (i = 0; i < MAX_NUMNODES; i++) {
> -		spin_lock_init(&lru->node[i].lock);
> -		INIT_LIST_HEAD(&lru->node[i].list);
> -		lru->node[i].nr_items = 0;
> +	for (i = 0; i < MAX_NUMNODES; i++)
> +		list_lru_init_one(&lru->node[i]);

Hmm. lru_list is up to MAX_NUMNODES, your new one is up to nr_node_ids...

> +
> +	return 0;
> +}
> +
> +#ifdef CONFIG_MEMCG_KMEM
> +static int memcg_init_lru(struct list_lru *lru)
> +{
> +	int ret;
> +
> +	if (!lru->memcg_lrus)
> +		return 0;
> +
> +	INIT_LIST_HEAD(&lru->lrus);
> +	mutex_lock(&all_lrus_mutex);
> +	list_add(&lru->lrus, &all_lrus);
> +	ret = memcg_new_lru(lru);
> +	mutex_unlock(&all_lrus_mutex);
> +	return ret;
> +}

 only writer takes this mutex ?

> +
> +int memcg_update_all_lrus(unsigned long num)
> +{
> +	int ret = 0;
> +	struct list_lru *lru;
> +
> +	mutex_lock(&all_lrus_mutex);
> +	list_for_each_entry(lru, &all_lrus, lrus) {
> +		if (!lru->memcg_lrus)
> +			continue;
> +
> +		ret = memcg_kmem_update_lru_size(lru, num, false);
> +		if (ret)
> +			goto out;
> +	}
> +out:
> +	mutex_unlock(&all_lrus_mutex);
> +	return ret;
> +}
> +
> +void list_lru_destroy(struct list_lru *lru)
> +{
> +	if (!lru->memcg_lrus)
> +		return;
> +
> +	mutex_lock(&all_lrus_mutex);
> +	list_del(&lru->lrus);
> +	mutex_unlock(&all_lrus_mutex);
> +}
> +
> +void list_lru_destroy_memcg(struct mem_cgroup *memcg)
> +{
> +	struct list_lru *lru;
> +	mutex_lock(&all_lrus_mutex);
> +	list_for_each_entry(lru, &all_lrus, lrus) {
> +		lru->memcg_lrus[memcg_cache_id(memcg)] = NULL;
> +		/* everybody must beaware that this memcg is no longer valid */

Hm, the object pointed by this array entry will be freed by some other func ?

> +		wmb();
>   	}
> +	mutex_unlock(&all_lrus_mutex);
> +}
> +#else
> +static int memcg_init_lru(struct list_lru *lru)
> +{
>   	return 0;
>   }
> +#endif
> +
> +int list_lru_init(struct list_lru *lru)
> +{
> +	int ret;
> +	ret = __list_lru_init(lru);
> +	if (ret)
> +		return ret;
> +
> +	return memcg_init_lru(lru);
> +}
>   EXPORT_SYMBOL_GPL(list_lru_init);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b1d4dfa..b9e1941 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3032,16 +3032,30 @@ int memcg_update_cache_sizes(struct mem_cgroup *memcg)
>   	memcg_kmem_set_activated(memcg);
>   
>   	ret = memcg_update_all_caches(num+1);
> -	if (ret) {
> -		ida_simple_remove(&kmem_limited_groups, num);
> -		memcg_kmem_clear_activated(memcg);
> -		return ret;
> -	}
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * We should make sure that the array size is not updated until we are
> +	 * done; otherwise we have no easy way to know whether or not we should
> +	 * grow the array.
> +	 */
> +	ret = memcg_update_all_lrus(num + 1);
> +	if (ret)
> +		goto out;
>   
>   	memcg->kmemcg_id = num;
> +
> +	memcg_update_array_size(num + 1);
> +
>   	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
>   	mutex_init(&memcg->slab_caches_mutex);
> +
>   	return 0;
> +out:
> +	ida_simple_remove(&kmem_limited_groups, num);
> +	memcg_kmem_clear_activated(memcg);
> +	return ret;
>   }
>   
>   static size_t memcg_caches_array_size(int num_groups)
> @@ -3121,6 +3135,106 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
>   	return 0;
>   }
>   
> +/*
> + * memcg_kmem_update_lru_size - fill in kmemcg info into a list_lru
> + *
> + * @lru: the lru we are operating with
> + * @num_groups: how many kmem-limited cgroups we have
> + * @new_lru: true if this is a new_lru being created, false if this
> + * was triggered from the memcg side
> + *
> + * Returns 0 on success, and an error code otherwise.
> + *
> + * This function can be called either when a new kmem-limited memcg appears,
> + * or when a new list_lru is created. The work is roughly the same in two cases,
> + * but in the later we never have to expand the array size.
> + *
> + * This is always protected by the all_lrus_mutex from the list_lru side.
> + */
> +int memcg_kmem_update_lru_size(struct list_lru *lru, int num_groups,
> +			       bool new_lru)
> +{
> +	struct list_lru_array **new_lru_array;
> +	struct list_lru_array *lru_array;
> +

Both are named as array ...confusing ;)

> +	lru_array = lru_alloc_array();
> +	if (!lru_array)
> +		return -ENOMEM;
> +
> +	/* need some fucked up locking around the list acquisition */
> +	if ((num_groups > memcg_limited_groups_array_size) || new_lru) {
> +		int i;
> +		struct list_lru_array **old_array;
> +		size_t size = memcg_caches_array_size(num_groups);
> +
> +		new_lru_array = kzalloc(size * sizeof(void *), GFP_KERNEL);
> +		if (!new_lru_array) {
> +			kfree(lru_array);
> +			return -ENOMEM;
> +		}
> +
> +		for (i = 0; i < memcg_limited_groups_array_size; i++) {
> +			if (!lru_memcg_is_assigned(lru) || lru->memcg_lrus[i])
> +				continue;
> +			new_lru_array[i] =  lru->memcg_lrus[i];
> +		}
> +
> +		old_array = lru->memcg_lrus;
> +		lru->memcg_lrus = new_lru_array;
> +		/*
> +		 * We don't need a barrier here because we are just copying
> +		 * information over. Anybody operating in memcg_lrus will
> +		 * either follow the new array or the old one and they contain
> +		 * exactly the same information. The new space in the end is
> +		 * always empty anyway.
> +		 *
> +		 * We do have to make sure that no more users of the old
> +		 * memcg_lrus array exist before we free, and this is achieved
> +		 * by the synchronize_lru below.
> +		 */
> +		if (lru_memcg_is_assigned(lru)) {
> +			synchronize_rcu();
> +			kfree(old_array);
> +		}
> +
> +	}
> +
> +	if (lru_memcg_is_assigned(lru)) {
> +		lru->memcg_lrus[num_groups - 1] = lru_array;

Can't this pointer already set ?

> +		/*
> +		 * Here we do need the barrier, because of the state transition
> +		 * implied by the assignment of the array. All users should be
> +		 * able to see it
> +		 */
> +		wmb();
> +	}
> +
> +	return 0;
> +
> +}
> +
> +int memcg_new_lru(struct list_lru *lru)
> +{
> +	struct mem_cgroup *iter;
> +
> +	if (!memcg_kmem_enabled())
> +		return 0;
> +
> +	for_each_mem_cgroup(iter) {
> +		int ret;
> +		int memcg_id = memcg_cache_id(iter);
> +		if (memcg_id < 0)
> +			continue;
> +
> +		ret = memcg_kmem_update_lru_size(lru, memcg_id + 1, true);
> +		if (ret) {
> +			mem_cgroup_iter_break(root_mem_cgroup, iter);
> +			return ret;
> +		}
> +	}
> +	return 0;
> +}
> +
>   int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
>   			 struct kmem_cache *root_cache)
>   {
> @@ -5914,8 +6028,10 @@ static void kmem_cgroup_destroy(struct mem_cgroup *memcg)
>   	 * possible that the charges went down to 0 between mark_dead and the
>   	 * res_counter read, so in that case, we don't need the put
>   	 */
> -	if (memcg_kmem_test_and_clear_dead(memcg))
> +	if (memcg_kmem_test_and_clear_dead(memcg)) {
> +		list_lru_destroy_memcg(memcg);
>   		mem_cgroup_put(memcg);
> +	}
>   }
>   #else
>   static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> indek
x 3f3cd97..2470d11 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -102,7 +102,6 @@ int memcg_update_all_caches(int num_memcgs)
>   			goto out;
>   	}
>   
> -	memcg_update_array_size(num_memcgs);
>   out:
>   	mutex_unlock(&slab_mutex);
>   	return ret;
> 


Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2013-02-15  9:21 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-08 13:07 [PATCH 0/7] memcg targeted shrinking Glauber Costa
2013-02-08 13:07 ` Glauber Costa
2013-02-08 13:07 ` Glauber Costa
2013-02-08 13:07 ` [PATCH 1/7] vmscan: also shrink slab in memcg pressure Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-15  1:27   ` Greg Thelen
2013-02-15  1:27     ` Greg Thelen
2013-02-15  1:27     ` Greg Thelen
2013-02-15 10:46     ` Glauber Costa
2013-02-15 10:46       ` Glauber Costa
2013-02-15 10:46       ` Glauber Costa
2013-02-15  8:37   ` Kamezawa Hiroyuki
2013-02-15  8:37     ` Kamezawa Hiroyuki
     [not found]     ` <511DF3CB.7020206-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2013-02-15 10:30       ` Glauber Costa
2013-02-15 10:30         ` Glauber Costa
2013-02-15 10:30         ` Glauber Costa
2013-02-08 13:07 ` [PATCH 2/7] memcg,list_lru: duplicate LRUs upon kmemcg creation Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-15  1:31   ` Greg Thelen
2013-02-15  1:31     ` Greg Thelen
2013-02-15  1:31     ` Greg Thelen
     [not found]     ` <xr934nhenz18.fsf-aSPv4SP+Du0KgorLzL7FmE7CuiCeIGUxQQ4Iyu8u01E@public.gmane.org>
2013-02-15 10:54       ` Glauber Costa
2013-02-15 10:54         ` Glauber Costa
2013-02-15 10:54         ` Glauber Costa
2013-02-20  7:46         ` Greg Thelen
2013-02-20  7:46           ` Greg Thelen
2013-02-20  7:46           ` Greg Thelen
     [not found]   ` <1360328857-28070-3-git-send-email-glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2013-02-15  9:21     ` Kamezawa Hiroyuki [this message]
2013-02-15  9:21       ` Kamezawa Hiroyuki
2013-02-15 10:36       ` Glauber Costa
2013-02-15 10:36         ` Glauber Costa
2013-02-15 10:36         ` Glauber Costa
2013-02-08 13:07 ` [PATCH 3/7] lru: add an element to a memcg list Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-15  1:32   ` Greg Thelen
2013-02-15  1:32     ` Greg Thelen
     [not found]     ` <xr93txpemkeo.fsf-aSPv4SP+Du0KgorLzL7FmE7CuiCeIGUxQQ4Iyu8u01E@public.gmane.org>
2013-02-15 10:57       ` Glauber Costa
2013-02-15 10:57         ` Glauber Costa
2013-02-15 10:57         ` Glauber Costa
2013-02-08 13:07 ` [PATCH 4/7] list_lru: also include memcg lists in counts and scans Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07 ` [PATCH 5/7] list_lru: per-memcg walks Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07 ` [PATCH 6/7] super: targeted memcg reclaim Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07 ` [PATCH 7/7] memcg: per-memcg kmem shrinking Glauber Costa
2013-02-08 13:07   ` Glauber Costa
2013-02-08 13:07   ` Glauber Costa
     [not found] ` <1360328857-28070-1-git-send-email-glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2013-02-15  1:28   ` [PATCH 0/7] memcg targeted shrinking Greg Thelen
2013-02-15  1:28     ` Greg Thelen
2013-02-15  1:28     ` Greg Thelen
     [not found]     ` <xr93ip5unz52.fsf-aSPv4SP+Du0KgorLzL7FmE7CuiCeIGUxQQ4Iyu8u01E@public.gmane.org>
2013-02-15 10:42       ` Glauber Costa
2013-02-15 10:42         ` Glauber Costa
2013-02-15 10:42         ` Glauber Costa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=511DFE22.4000003@jp.fujitsu.com \
    --to=kamezawa.hiroyu-+cum20s59erqfuhtdcdx3a@public.gmane.org \
    --cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    --cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org \
    --cc=dchinner-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org \
    --cc=hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org \
    --cc=hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org \
    --cc=mgorman-l3A5Bk7waGM@public.gmane.org \
    --cc=mhocko-AlSwsSmVLrQ@public.gmane.org \
    --cc=riel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.