* [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one
@ 2018-03-27 7:59 Li RongQing
2018-03-27 8:15 ` Michal Hocko
2018-03-28 7:59 ` kbuild test robot
0 siblings, 2 replies; 7+ messages in thread
From: Li RongQing @ 2018-03-27 7:59 UTC (permalink / raw)
To: linux-kernel, linux-mm; +Cc: Andrew Morton, Michal Hocko, Johannes Weiner
when reclaim memory, shink_slab will take lots of time even if
no memory is reclaimed, since list_lru_count_one called by it
needs to take a spinlock
try to optimize it by replacing spinlock with RCU in
__list_lru_count_one
$dd if=aaa of=bbb bs=1k count=3886080
$rm -f bbb
$time echo 100000000 >/cgroup/memory/test/memory.limit_in_bytes
Before: 0m0.415s ===> after: 0m0.395s
Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
include/linux/list_lru.h | 2 ++
mm/list_lru.c | 69 ++++++++++++++++++++++++++++++++++--------------
2 files changed, 51 insertions(+), 20 deletions(-)
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index bb8129a3474d..ae472538038e 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -29,6 +29,7 @@ struct list_lru_one {
struct list_head list;
/* may become negative during memcg reparenting */
long nr_items;
+ struct rcu_head rcu;
};
struct list_lru_memcg {
@@ -46,6 +47,7 @@ struct list_lru_node {
struct list_lru_memcg *memcg_lrus;
#endif
long nr_items;
+ struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
struct list_lru {
diff --git a/mm/list_lru.c b/mm/list_lru.c
index fd41e969ede5..4c58ed861729 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -52,13 +52,13 @@ static inline bool list_lru_memcg_aware(struct list_lru *lru)
static inline struct list_lru_one *
list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
{
- /*
- * The lock protects the array of per cgroup lists from relocation
- * (see memcg_update_list_lru_node).
- */
- lockdep_assert_held(&nlru->lock);
- if (nlru->memcg_lrus && idx >= 0)
- return nlru->memcg_lrus->lru[idx];
+ struct list_lru_memcg *tmp;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ tmp = rcu_dereference(nlru->memcg_lrus);
+ if (tmp && idx >= 0)
+ return rcu_dereference(tmp->lru[idx]);
return &nlru->lru;
}
@@ -113,14 +113,17 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
struct list_lru_one *l;
spin_lock(&nlru->lock);
+ rcu_read_lock();
if (list_empty(item)) {
l = list_lru_from_kmem(nlru, item);
list_add_tail(item, &l->list);
l->nr_items++;
nlru->nr_items++;
+ rcu_read_unlock();
spin_unlock(&nlru->lock);
return true;
}
+ rcu_read_unlock();
spin_unlock(&nlru->lock);
return false;
}
@@ -133,14 +136,17 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
struct list_lru_one *l;
spin_lock(&nlru->lock);
+ rcu_read_lock();
if (!list_empty(item)) {
l = list_lru_from_kmem(nlru, item);
list_del_init(item);
l->nr_items--;
nlru->nr_items--;
+ rcu_read_unlock();
spin_unlock(&nlru->lock);
return true;
}
+ rcu_read_unlock();
spin_unlock(&nlru->lock);
return false;
}
@@ -166,12 +172,13 @@ static unsigned long __list_lru_count_one(struct list_lru *lru,
{
struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
- unsigned long count;
+ unsigned long count = 0;
- spin_lock(&nlru->lock);
+ rcu_read_lock();
l = list_lru_from_memcg_idx(nlru, memcg_idx);
- count = l->nr_items;
- spin_unlock(&nlru->lock);
+ if (l)
+ count = l->nr_items;
+ rcu_read_unlock();
return count;
}
@@ -204,6 +211,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
unsigned long isolated = 0;
spin_lock(&nlru->lock);
+ rcu_read_lock();
l = list_lru_from_memcg_idx(nlru, memcg_idx);
restart:
list_for_each_safe(item, n, &l->list) {
@@ -250,6 +258,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
}
}
+ rcu_read_unlock();
spin_unlock(&nlru->lock);
return isolated;
}
@@ -296,9 +305,14 @@ static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
int begin, int end)
{
int i;
+ struct list_lru_one *tmp;
- for (i = begin; i < end; i++)
- kfree(memcg_lrus->lru[i]);
+ for (i = begin; i < end; i++) {
+ tmp = memcg_lrus->lru[i];
+ rcu_assign_pointer(memcg_lrus->lru[i], NULL);
+ if (tmp)
+ kfree_rcu(tmp, rcu);
+ }
}
static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
@@ -314,7 +328,7 @@ static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
goto fail;
init_one_lru(l);
- memcg_lrus->lru[i] = l;
+ rcu_assign_pointer(memcg_lrus->lru[i], l);
}
return 0;
fail:
@@ -325,25 +339,37 @@ static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
static int memcg_init_list_lru_node(struct list_lru_node *nlru)
{
int size = memcg_nr_cache_ids;
+ struct list_lru_memcg *tmp;
- nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL);
- if (!nlru->memcg_lrus)
+ tmp = kvmalloc(size * sizeof(void *), GFP_KERNEL);
+ if (!tmp)
return -ENOMEM;
- if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
- kvfree(nlru->memcg_lrus);
+ if (__memcg_init_list_lru_node(tmp, 0, size)) {
+ kvfree(tmp);
return -ENOMEM;
}
+ rcu_assign_pointer(nlru->memcg_lrus, tmp);
+
return 0;
}
-static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
+static void memcg_destroy_list_lru_node_rcu(struct rcu_head *rcu)
{
+ struct list_lru_node *nlru;
+
+ nlru = container_of(rcu, struct list_lru_node, rcu);
+
__memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
kvfree(nlru->memcg_lrus);
}
+static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
+{
+ call_rcu(&nlru->rcu, memcg_destroy_list_lru_node_rcu);
+}
+
static int memcg_update_list_lru_node(struct list_lru_node *nlru,
int old_size, int new_size)
{
@@ -371,9 +397,10 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
* we have to use IRQ-safe primitives here to avoid deadlock.
*/
spin_lock_irq(&nlru->lock);
- nlru->memcg_lrus = new;
+ rcu_assign_pointer(nlru->memcg_lrus, new);
spin_unlock_irq(&nlru->lock);
+ synchronize_rcu();
kvfree(old);
return 0;
}
@@ -487,6 +514,7 @@ static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
* we have to use IRQ-safe primitives here to avoid deadlock.
*/
spin_lock_irq(&nlru->lock);
+ rcu_read_lock();
src = list_lru_from_memcg_idx(nlru, src_idx);
dst = list_lru_from_memcg_idx(nlru, dst_idx);
@@ -495,6 +523,7 @@ static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
dst->nr_items += src->nr_items;
src->nr_items = 0;
+ rcu_read_unlock();
spin_unlock_irq(&nlru->lock);
}
--
2.11.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one 2018-03-27 7:59 [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one Li RongQing @ 2018-03-27 8:15 ` Michal Hocko 2018-03-27 9:08 ` Vladimir Davydov 2018-03-28 7:59 ` kbuild test robot 1 sibling, 1 reply; 7+ messages in thread From: Michal Hocko @ 2018-03-27 8:15 UTC (permalink / raw) To: Li RongQing Cc: linux-kernel, linux-mm, Andrew Morton, Johannes Weiner, Dave Chinner [CC Dave] On Tue 27-03-18 15:59:04, Li RongQing wrote: > when reclaim memory, shink_slab will take lots of time even if > no memory is reclaimed, since list_lru_count_one called by it > needs to take a spinlock > > try to optimize it by replacing spinlock with RCU in > __list_lru_count_one Isn't the RCU overkill here? Why cannot we simply do an optimistic lockless check for nr_items? It would be racy but does it actually matter? We should be able to tolerate occasional 0 to non-zero and vice versa transitions AFAICS. > > $dd if=aaa of=bbb bs=1k count=3886080 > $rm -f bbb > $time echo 100000000 >/cgroup/memory/test/memory.limit_in_bytes > > Before: 0m0.415s ===> after: 0m0.395s > > Signed-off-by: Li RongQing <lirongqing@baidu.com> > --- > include/linux/list_lru.h | 2 ++ > mm/list_lru.c | 69 ++++++++++++++++++++++++++++++++++-------------- > 2 files changed, 51 insertions(+), 20 deletions(-) > > diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h > index bb8129a3474d..ae472538038e 100644 > --- a/include/linux/list_lru.h > +++ b/include/linux/list_lru.h > @@ -29,6 +29,7 @@ struct list_lru_one { > struct list_head list; > /* may become negative during memcg reparenting */ > long nr_items; > + struct rcu_head rcu; > }; > > struct list_lru_memcg { > @@ -46,6 +47,7 @@ struct list_lru_node { > struct list_lru_memcg *memcg_lrus; > #endif > long nr_items; > + struct rcu_head rcu; > } ____cacheline_aligned_in_smp; > > struct list_lru { > diff --git a/mm/list_lru.c b/mm/list_lru.c > index fd41e969ede5..4c58ed861729 100644 > --- a/mm/list_lru.c > +++ b/mm/list_lru.c > @@ -52,13 +52,13 @@ static inline bool list_lru_memcg_aware(struct list_lru *lru) > static inline struct list_lru_one * > list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) > { > - /* > - * The lock protects the array of per cgroup lists from relocation > - * (see memcg_update_list_lru_node). > - */ > - lockdep_assert_held(&nlru->lock); > - if (nlru->memcg_lrus && idx >= 0) > - return nlru->memcg_lrus->lru[idx]; > + struct list_lru_memcg *tmp; > + > + WARN_ON_ONCE(!rcu_read_lock_held()); > + > + tmp = rcu_dereference(nlru->memcg_lrus); > + if (tmp && idx >= 0) > + return rcu_dereference(tmp->lru[idx]); > > return &nlru->lru; > } > @@ -113,14 +113,17 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item) > struct list_lru_one *l; > > spin_lock(&nlru->lock); > + rcu_read_lock(); > if (list_empty(item)) { > l = list_lru_from_kmem(nlru, item); > list_add_tail(item, &l->list); > l->nr_items++; > nlru->nr_items++; > + rcu_read_unlock(); > spin_unlock(&nlru->lock); > return true; > } > + rcu_read_unlock(); > spin_unlock(&nlru->lock); > return false; > } > @@ -133,14 +136,17 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) > struct list_lru_one *l; > > spin_lock(&nlru->lock); > + rcu_read_lock(); > if (!list_empty(item)) { > l = list_lru_from_kmem(nlru, item); > list_del_init(item); > l->nr_items--; > nlru->nr_items--; > + rcu_read_unlock(); > spin_unlock(&nlru->lock); > return true; > } > + rcu_read_unlock(); > spin_unlock(&nlru->lock); > return false; > } > @@ -166,12 +172,13 @@ static unsigned long __list_lru_count_one(struct list_lru *lru, > { > struct list_lru_node *nlru = &lru->node[nid]; > struct list_lru_one *l; > - unsigned long count; > + unsigned long count = 0; > > - spin_lock(&nlru->lock); > + rcu_read_lock(); > l = list_lru_from_memcg_idx(nlru, memcg_idx); > - count = l->nr_items; > - spin_unlock(&nlru->lock); > + if (l) > + count = l->nr_items; > + rcu_read_unlock(); > > return count; > } > @@ -204,6 +211,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, > unsigned long isolated = 0; > > spin_lock(&nlru->lock); > + rcu_read_lock(); > l = list_lru_from_memcg_idx(nlru, memcg_idx); > restart: > list_for_each_safe(item, n, &l->list) { > @@ -250,6 +258,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, > } > } > > + rcu_read_unlock(); > spin_unlock(&nlru->lock); > return isolated; > } > @@ -296,9 +305,14 @@ static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, > int begin, int end) > { > int i; > + struct list_lru_one *tmp; > > - for (i = begin; i < end; i++) > - kfree(memcg_lrus->lru[i]); > + for (i = begin; i < end; i++) { > + tmp = memcg_lrus->lru[i]; > + rcu_assign_pointer(memcg_lrus->lru[i], NULL); > + if (tmp) > + kfree_rcu(tmp, rcu); > + } > } > > static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, > @@ -314,7 +328,7 @@ static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, > goto fail; > > init_one_lru(l); > - memcg_lrus->lru[i] = l; > + rcu_assign_pointer(memcg_lrus->lru[i], l); > } > return 0; > fail: > @@ -325,25 +339,37 @@ static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, > static int memcg_init_list_lru_node(struct list_lru_node *nlru) > { > int size = memcg_nr_cache_ids; > + struct list_lru_memcg *tmp; > > - nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL); > - if (!nlru->memcg_lrus) > + tmp = kvmalloc(size * sizeof(void *), GFP_KERNEL); > + if (!tmp) > return -ENOMEM; > > - if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { > - kvfree(nlru->memcg_lrus); > + if (__memcg_init_list_lru_node(tmp, 0, size)) { > + kvfree(tmp); > return -ENOMEM; > } > > + rcu_assign_pointer(nlru->memcg_lrus, tmp); > + > return 0; > } > > -static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) > +static void memcg_destroy_list_lru_node_rcu(struct rcu_head *rcu) > { > + struct list_lru_node *nlru; > + > + nlru = container_of(rcu, struct list_lru_node, rcu); > + > __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); > kvfree(nlru->memcg_lrus); > } > > +static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) > +{ > + call_rcu(&nlru->rcu, memcg_destroy_list_lru_node_rcu); > +} > + > static int memcg_update_list_lru_node(struct list_lru_node *nlru, > int old_size, int new_size) > { > @@ -371,9 +397,10 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, > * we have to use IRQ-safe primitives here to avoid deadlock. > */ > spin_lock_irq(&nlru->lock); > - nlru->memcg_lrus = new; > + rcu_assign_pointer(nlru->memcg_lrus, new); > spin_unlock_irq(&nlru->lock); > > + synchronize_rcu(); > kvfree(old); > return 0; > } > @@ -487,6 +514,7 @@ static void memcg_drain_list_lru_node(struct list_lru_node *nlru, > * we have to use IRQ-safe primitives here to avoid deadlock. > */ > spin_lock_irq(&nlru->lock); > + rcu_read_lock(); > > src = list_lru_from_memcg_idx(nlru, src_idx); > dst = list_lru_from_memcg_idx(nlru, dst_idx); > @@ -495,6 +523,7 @@ static void memcg_drain_list_lru_node(struct list_lru_node *nlru, > dst->nr_items += src->nr_items; > src->nr_items = 0; > > + rcu_read_unlock(); > spin_unlock_irq(&nlru->lock); > } > > -- > 2.11.0 -- Michal Hocko SUSE Labs ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one 2018-03-27 8:15 ` Michal Hocko @ 2018-03-27 9:08 ` Vladimir Davydov 2018-03-27 9:30 ` 答复: " Li,Rongqing 0 siblings, 1 reply; 7+ messages in thread From: Vladimir Davydov @ 2018-03-27 9:08 UTC (permalink / raw) To: Michal Hocko Cc: Li RongQing, linux-kernel, linux-mm, Andrew Morton, Johannes Weiner, Dave Chinner, Kirill Tkhai [Cc Kirill] AFAIU this has already been fixed in exactly the same fashion by Kirill (mmotm commit 8e7d1201ec71 "mm: make counting of list_lru_one::nr_items lockless"). Kirill is working on further optimizations right now, see https://lkml.kernel.org/r/152163840790.21546.980703278415599202.stgit@localhost.localdomain On Tue, Mar 27, 2018 at 10:15:46AM +0200, Michal Hocko wrote: > [CC Dave] > > On Tue 27-03-18 15:59:04, Li RongQing wrote: > > when reclaim memory, shink_slab will take lots of time even if > > no memory is reclaimed, since list_lru_count_one called by it > > needs to take a spinlock > > > > try to optimize it by replacing spinlock with RCU in > > __list_lru_count_one > > Isn't the RCU overkill here? Why cannot we simply do an optimistic > lockless check for nr_items? It would be racy but does it actually > matter? We should be able to tolerate occasional 0 to non-zero and vice > versa transitions AFAICS. > > > > > $dd if=aaa of=bbb bs=1k count=3886080 > > $rm -f bbb > > $time echo 100000000 >/cgroup/memory/test/memory.limit_in_bytes > > > > Before: 0m0.415s ===> after: 0m0.395s > > > > Signed-off-by: Li RongQing <lirongqing@baidu.com> > > --- > > include/linux/list_lru.h | 2 ++ > > mm/list_lru.c | 69 ++++++++++++++++++++++++++++++++++-------------- > > 2 files changed, 51 insertions(+), 20 deletions(-) > > > > diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h > > index bb8129a3474d..ae472538038e 100644 > > --- a/include/linux/list_lru.h > > +++ b/include/linux/list_lru.h > > @@ -29,6 +29,7 @@ struct list_lru_one { > > struct list_head list; > > /* may become negative during memcg reparenting */ > > long nr_items; > > + struct rcu_head rcu; > > }; > > > > struct list_lru_memcg { > > @@ -46,6 +47,7 @@ struct list_lru_node { > > struct list_lru_memcg *memcg_lrus; > > #endif > > long nr_items; > > + struct rcu_head rcu; > > } ____cacheline_aligned_in_smp; > > > > struct list_lru { > > diff --git a/mm/list_lru.c b/mm/list_lru.c > > index fd41e969ede5..4c58ed861729 100644 > > --- a/mm/list_lru.c > > +++ b/mm/list_lru.c > > @@ -52,13 +52,13 @@ static inline bool list_lru_memcg_aware(struct list_lru *lru) > > static inline struct list_lru_one * > > list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) > > { > > - /* > > - * The lock protects the array of per cgroup lists from relocation > > - * (see memcg_update_list_lru_node). > > - */ > > - lockdep_assert_held(&nlru->lock); > > - if (nlru->memcg_lrus && idx >= 0) > > - return nlru->memcg_lrus->lru[idx]; > > + struct list_lru_memcg *tmp; > > + > > + WARN_ON_ONCE(!rcu_read_lock_held()); > > + > > + tmp = rcu_dereference(nlru->memcg_lrus); > > + if (tmp && idx >= 0) > > + return rcu_dereference(tmp->lru[idx]); > > > > return &nlru->lru; > > } > > @@ -113,14 +113,17 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item) > > struct list_lru_one *l; > > > > spin_lock(&nlru->lock); > > + rcu_read_lock(); > > if (list_empty(item)) { > > l = list_lru_from_kmem(nlru, item); > > list_add_tail(item, &l->list); > > l->nr_items++; > > nlru->nr_items++; > > + rcu_read_unlock(); > > spin_unlock(&nlru->lock); > > return true; > > } > > + rcu_read_unlock(); > > spin_unlock(&nlru->lock); > > return false; > > } > > @@ -133,14 +136,17 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) > > struct list_lru_one *l; > > > > spin_lock(&nlru->lock); > > + rcu_read_lock(); > > if (!list_empty(item)) { > > l = list_lru_from_kmem(nlru, item); > > list_del_init(item); > > l->nr_items--; > > nlru->nr_items--; > > + rcu_read_unlock(); > > spin_unlock(&nlru->lock); > > return true; > > } > > + rcu_read_unlock(); > > spin_unlock(&nlru->lock); > > return false; > > } > > @@ -166,12 +172,13 @@ static unsigned long __list_lru_count_one(struct list_lru *lru, > > { > > struct list_lru_node *nlru = &lru->node[nid]; > > struct list_lru_one *l; > > - unsigned long count; > > + unsigned long count = 0; > > > > - spin_lock(&nlru->lock); > > + rcu_read_lock(); > > l = list_lru_from_memcg_idx(nlru, memcg_idx); > > - count = l->nr_items; > > - spin_unlock(&nlru->lock); > > + if (l) > > + count = l->nr_items; > > + rcu_read_unlock(); > > > > return count; > > } > > @@ -204,6 +211,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, > > unsigned long isolated = 0; > > > > spin_lock(&nlru->lock); > > + rcu_read_lock(); > > l = list_lru_from_memcg_idx(nlru, memcg_idx); > > restart: > > list_for_each_safe(item, n, &l->list) { > > @@ -250,6 +258,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, > > } > > } > > > > + rcu_read_unlock(); > > spin_unlock(&nlru->lock); > > return isolated; > > } > > @@ -296,9 +305,14 @@ static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, > > int begin, int end) > > { > > int i; > > + struct list_lru_one *tmp; > > > > - for (i = begin; i < end; i++) > > - kfree(memcg_lrus->lru[i]); > > + for (i = begin; i < end; i++) { > > + tmp = memcg_lrus->lru[i]; > > + rcu_assign_pointer(memcg_lrus->lru[i], NULL); > > + if (tmp) > > + kfree_rcu(tmp, rcu); > > + } > > } > > > > static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, > > @@ -314,7 +328,7 @@ static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, > > goto fail; > > > > init_one_lru(l); > > - memcg_lrus->lru[i] = l; > > + rcu_assign_pointer(memcg_lrus->lru[i], l); > > } > > return 0; > > fail: > > @@ -325,25 +339,37 @@ static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, > > static int memcg_init_list_lru_node(struct list_lru_node *nlru) > > { > > int size = memcg_nr_cache_ids; > > + struct list_lru_memcg *tmp; > > > > - nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL); > > - if (!nlru->memcg_lrus) > > + tmp = kvmalloc(size * sizeof(void *), GFP_KERNEL); > > + if (!tmp) > > return -ENOMEM; > > > > - if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { > > - kvfree(nlru->memcg_lrus); > > + if (__memcg_init_list_lru_node(tmp, 0, size)) { > > + kvfree(tmp); > > return -ENOMEM; > > } > > > > + rcu_assign_pointer(nlru->memcg_lrus, tmp); > > + > > return 0; > > } > > > > -static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) > > +static void memcg_destroy_list_lru_node_rcu(struct rcu_head *rcu) > > { > > + struct list_lru_node *nlru; > > + > > + nlru = container_of(rcu, struct list_lru_node, rcu); > > + > > __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); > > kvfree(nlru->memcg_lrus); > > } > > > > +static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) > > +{ > > + call_rcu(&nlru->rcu, memcg_destroy_list_lru_node_rcu); > > +} > > + > > static int memcg_update_list_lru_node(struct list_lru_node *nlru, > > int old_size, int new_size) > > { > > @@ -371,9 +397,10 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, > > * we have to use IRQ-safe primitives here to avoid deadlock. > > */ > > spin_lock_irq(&nlru->lock); > > - nlru->memcg_lrus = new; > > + rcu_assign_pointer(nlru->memcg_lrus, new); > > spin_unlock_irq(&nlru->lock); > > > > + synchronize_rcu(); > > kvfree(old); > > return 0; > > } > > @@ -487,6 +514,7 @@ static void memcg_drain_list_lru_node(struct list_lru_node *nlru, > > * we have to use IRQ-safe primitives here to avoid deadlock. > > */ > > spin_lock_irq(&nlru->lock); > > + rcu_read_lock(); > > > > src = list_lru_from_memcg_idx(nlru, src_idx); > > dst = list_lru_from_memcg_idx(nlru, dst_idx); > > @@ -495,6 +523,7 @@ static void memcg_drain_list_lru_node(struct list_lru_node *nlru, > > dst->nr_items += src->nr_items; > > src->nr_items = 0; > > > > + rcu_read_unlock(); > > spin_unlock_irq(&nlru->lock); > > } > > > > -- > > 2.11.0 > > -- > Michal Hocko > SUSE Labs > ^ permalink raw reply [flat|nested] 7+ messages in thread
* 答复: [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one 2018-03-27 9:08 ` Vladimir Davydov @ 2018-03-27 9:30 ` Li,Rongqing 2018-03-27 9:41 ` Kirill Tkhai 0 siblings, 1 reply; 7+ messages in thread From: Li,Rongqing @ 2018-03-27 9:30 UTC (permalink / raw) To: Vladimir Davydov, Michal Hocko Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org, Andrew Morton, Johannes Weiner, Dave Chinner, Kirill Tkhai > -----邮件原件----- > 发件人: Vladimir Davydov [mailto:vdavydov.dev@gmail.com] > 发送时间: 2018年3月27日 17:09 > 收件人: Michal Hocko <mhocko@kernel.org> > 抄送: Li,Rongqing <lirongqing@baidu.com>; linux-kernel@vger.kernel.org; > linux-mm@kvack.org; Andrew Morton <akpm@linux-foundation.org>; > Johannes Weiner <hannes@cmpxchg.org>; Dave Chinner > <david@fromorbit.com>; Kirill Tkhai <ktkhai@virtuozzo.com> > 主题: Re: [PATCH] mm/list_lru: replace spinlock with RCU in > __list_lru_count_one > > [Cc Kirill] > > AFAIU this has already been fixed in exactly the same fashion by Kirill > (mmotm commit 8e7d1201ec71 "mm: make counting of > list_lru_one::nr_items lockless"). Kirill is working on further optimizations > right now, see > > Ok, thanks -Rong > https://lkml.kernel.org/r/152163840790.21546.980703278415599202.stgit > @localhost.localdomain > > On Tue, Mar 27, 2018 at 10:15:46AM +0200, Michal Hocko wrote: > > [CC Dave] > > > > On Tue 27-03-18 15:59:04, Li RongQing wrote: > > > when reclaim memory, shink_slab will take lots of time even if no > > > memory is reclaimed, since list_lru_count_one called by it needs to > > > take a spinlock > > > > > > try to optimize it by replacing spinlock with RCU in > > > __list_lru_count_one > > > > Isn't the RCU overkill here? Why cannot we simply do an optimistic > > lockless check for nr_items? It would be racy but does it actually > > matter? We should be able to tolerate occasional 0 to non-zero and > > vice versa transitions AFAICS. > > > > > > > > $dd if=aaa of=bbb bs=1k count=3886080 > > > $rm -f bbb > > > $time echo > 100000000 >/cgroup/memory/test/memory.limit_in_bytes > > > > > > Before: 0m0.415s ===> after: 0m0.395s > > > > > > Signed-off-by: Li RongQing <lirongqing@baidu.com> > > > --- > > > include/linux/list_lru.h | 2 ++ > > > mm/list_lru.c | 69 > ++++++++++++++++++++++++++++++++++-------------- > > > 2 files changed, 51 insertions(+), 20 deletions(-) > > > > > > diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h > > > index bb8129a3474d..ae472538038e 100644 > > > --- a/include/linux/list_lru.h > > > +++ b/include/linux/list_lru.h > > > @@ -29,6 +29,7 @@ struct list_lru_one { > > > struct list_head list; > > > /* may become negative during memcg reparenting */ > > > long nr_items; > > > + struct rcu_head rcu; > > > }; > > > > > > struct list_lru_memcg { > > > @@ -46,6 +47,7 @@ struct list_lru_node { > > > struct list_lru_memcg *memcg_lrus; > > > #endif > > > long nr_items; > > > + struct rcu_head rcu; > > > } ____cacheline_aligned_in_smp; > > > > > > struct list_lru { > > > diff --git a/mm/list_lru.c b/mm/list_lru.c index > > > fd41e969ede5..4c58ed861729 100644 > > > --- a/mm/list_lru.c > > > +++ b/mm/list_lru.c > > > @@ -52,13 +52,13 @@ static inline bool list_lru_memcg_aware(struct > > > list_lru *lru) static inline struct list_lru_one * > > > list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) { > > > - /* > > > - * The lock protects the array of per cgroup lists from relocation > > > - * (see memcg_update_list_lru_node). > > > - */ > > > - lockdep_assert_held(&nlru->lock); > > > - if (nlru->memcg_lrus && idx >= 0) > > > - return nlru->memcg_lrus->lru[idx]; > > > + struct list_lru_memcg *tmp; > > > + > > > + WARN_ON_ONCE(!rcu_read_lock_held()); > > > + > > > + tmp = rcu_dereference(nlru->memcg_lrus); > > > + if (tmp && idx >= 0) > > > + return rcu_dereference(tmp->lru[idx]); > > > > > > return &nlru->lru; > > > } > > > @@ -113,14 +113,17 @@ bool list_lru_add(struct list_lru *lru, struct > list_head *item) > > > struct list_lru_one *l; > > > > > > spin_lock(&nlru->lock); > > > + rcu_read_lock(); > > > if (list_empty(item)) { > > > l = list_lru_from_kmem(nlru, item); > > > list_add_tail(item, &l->list); > > > l->nr_items++; > > > nlru->nr_items++; > > > + rcu_read_unlock(); > > > spin_unlock(&nlru->lock); > > > return true; > > > } > > > + rcu_read_unlock(); > > > spin_unlock(&nlru->lock); > > > return false; > > > } > > > @@ -133,14 +136,17 @@ bool list_lru_del(struct list_lru *lru, struct > list_head *item) > > > struct list_lru_one *l; > > > > > > spin_lock(&nlru->lock); > > > + rcu_read_lock(); > > > if (!list_empty(item)) { > > > l = list_lru_from_kmem(nlru, item); > > > list_del_init(item); > > > l->nr_items--; > > > nlru->nr_items--; > > > + rcu_read_unlock(); > > > spin_unlock(&nlru->lock); > > > return true; > > > } > > > + rcu_read_unlock(); > > > spin_unlock(&nlru->lock); > > > return false; > > > } > > > @@ -166,12 +172,13 @@ static unsigned long > > > __list_lru_count_one(struct list_lru *lru, { > > > struct list_lru_node *nlru = &lru->node[nid]; > > > struct list_lru_one *l; > > > - unsigned long count; > > > + unsigned long count = 0; > > > > > > - spin_lock(&nlru->lock); > > > + rcu_read_lock(); > > > l = list_lru_from_memcg_idx(nlru, memcg_idx); > > > - count = l->nr_items; > > > - spin_unlock(&nlru->lock); > > > + if (l) > > > + count = l->nr_items; > > > + rcu_read_unlock(); > > > > > > return count; > > > } > > > @@ -204,6 +211,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, > int memcg_idx, > > > unsigned long isolated = 0; > > > > > > spin_lock(&nlru->lock); > > > + rcu_read_lock(); > > > l = list_lru_from_memcg_idx(nlru, memcg_idx); > > > restart: > > > list_for_each_safe(item, n, &l->list) { @@ -250,6 +258,7 @@ > > > __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, > > > } > > > } > > > > > > + rcu_read_unlock(); > > > spin_unlock(&nlru->lock); > > > return isolated; > > > } > > > @@ -296,9 +305,14 @@ static void > __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, > > > int begin, int end) > > > { > > > int i; > > > + struct list_lru_one *tmp; > > > > > > - for (i = begin; i < end; i++) > > > - kfree(memcg_lrus->lru[i]); > > > + for (i = begin; i < end; i++) { > > > + tmp = memcg_lrus->lru[i]; > > > + rcu_assign_pointer(memcg_lrus->lru[i], NULL); > > > + if (tmp) > > > + kfree_rcu(tmp, rcu); > > > + } > > > } > > > > > > static int __memcg_init_list_lru_node(struct list_lru_memcg > > > *memcg_lrus, @@ -314,7 +328,7 @@ static int > __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, > > > goto fail; > > > > > > init_one_lru(l); > > > - memcg_lrus->lru[i] = l; > > > + rcu_assign_pointer(memcg_lrus->lru[i], l); > > > } > > > return 0; > > > fail: > > > @@ -325,25 +339,37 @@ static int __memcg_init_list_lru_node(struct > > > list_lru_memcg *memcg_lrus, static int > > > memcg_init_list_lru_node(struct list_lru_node *nlru) { > > > int size = memcg_nr_cache_ids; > > > + struct list_lru_memcg *tmp; > > > > > > - nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL); > > > - if (!nlru->memcg_lrus) > > > + tmp = kvmalloc(size * sizeof(void *), GFP_KERNEL); > > > + if (!tmp) > > > return -ENOMEM; > > > > > > - if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { > > > - kvfree(nlru->memcg_lrus); > > > + if (__memcg_init_list_lru_node(tmp, 0, size)) { > > > + kvfree(tmp); > > > return -ENOMEM; > > > } > > > > > > + rcu_assign_pointer(nlru->memcg_lrus, tmp); > > > + > > > return 0; > > > } > > > > > > -static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) > > > +static void memcg_destroy_list_lru_node_rcu(struct rcu_head *rcu) > > > { > > > + struct list_lru_node *nlru; > > > + > > > + nlru = container_of(rcu, struct list_lru_node, rcu); > > > + > > > __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, > memcg_nr_cache_ids); > > > kvfree(nlru->memcg_lrus); > > > } > > > > > > +static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) > > > +{ > > > + call_rcu(&nlru->rcu, memcg_destroy_list_lru_node_rcu); } > > > + > > > static int memcg_update_list_lru_node(struct list_lru_node *nlru, > > > int old_size, int new_size) { @@ -371,9 > +397,10 @@ > > > static int memcg_update_list_lru_node(struct list_lru_node *nlru, > > > * we have to use IRQ-safe primitives here to avoid deadlock. > > > */ > > > spin_lock_irq(&nlru->lock); > > > - nlru->memcg_lrus = new; > > > + rcu_assign_pointer(nlru->memcg_lrus, new); > > > spin_unlock_irq(&nlru->lock); > > > > > > + synchronize_rcu(); > > > kvfree(old); > > > return 0; > > > } > > > @@ -487,6 +514,7 @@ static void memcg_drain_list_lru_node(struct > list_lru_node *nlru, > > > * we have to use IRQ-safe primitives here to avoid deadlock. > > > */ > > > spin_lock_irq(&nlru->lock); > > > + rcu_read_lock(); > > > > > > src = list_lru_from_memcg_idx(nlru, src_idx); > > > dst = list_lru_from_memcg_idx(nlru, dst_idx); @@ -495,6 +523,7 > @@ > > > static void memcg_drain_list_lru_node(struct list_lru_node *nlru, > > > dst->nr_items += src->nr_items; > > > src->nr_items = 0; > > > > > > + rcu_read_unlock(); > > > spin_unlock_irq(&nlru->lock); > > > } > > > > > > -- > > > 2.11.0 > > > > -- > > Michal Hocko > > SUSE Labs > > ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: 答复: [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one 2018-03-27 9:30 ` 答复: " Li,Rongqing @ 2018-03-27 9:41 ` Kirill Tkhai 2018-03-27 9:47 ` 答复: " Li,Rongqing 0 siblings, 1 reply; 7+ messages in thread From: Kirill Tkhai @ 2018-03-27 9:41 UTC (permalink / raw) To: Li,Rongqing, Vladimir Davydov, Michal Hocko Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org, Andrew Morton, Johannes Weiner, Dave Chinner On 27.03.2018 12:30, Li,Rongqing wrote: > > >> -----OE 1/4 thO- 1/4 th----- >> .c 1/4 thEE: Vladimir Davydov [mailto:vdavydov.dev@gmail.com] >> .cEIE+- 1/4 a: 2018Ae3OA27EO 17:09 >> EO 1/4 thEE: Michal Hocko <mhocko@kernel.org> >> 3-EI: Li,Rongqing <lirongqing@baidu.com>; linux-kernel@vger.kernel.org; >> linux-mm@kvack.org; Andrew Morton <akpm@linux-foundation.org>; >> Johannes Weiner <hannes@cmpxchg.org>; Dave Chinner >> <david@fromorbit.com>; Kirill Tkhai <ktkhai@virtuozzo.com> >> O/Ia: Re: [PATCH] mm/list_lru: replace spinlock with RCU in >> __list_lru_count_one >> >> [Cc Kirill] >> >> AFAIU this has already been fixed in exactly the same fashion by Kirill >> (mmotm commit 8e7d1201ec71 "mm: make counting of >> list_lru_one::nr_items lockless"). Kirill is working on further optimizations >> right now, see >> >> > > Ok, thanks Thanks Vladimir, for CCing me. Rong, if your are interested I may start to add you to CC on further iterations of https://marc.info/?i=152163840790.21546.980703278415599202.stgit%40localhost.localdomain since there are many people which meet such the problem. Kirill > >> https://lkml.kernel.org/r/152163840790.21546.980703278415599202.stgit >> @localhost.localdomain >> >> On Tue, Mar 27, 2018 at 10:15:46AM +0200, Michal Hocko wrote: >>> [CC Dave] >>> >>> On Tue 27-03-18 15:59:04, Li RongQing wrote: >>>> when reclaim memory, shink_slab will take lots of time even if no >>>> memory is reclaimed, since list_lru_count_one called by it needs to >>>> take a spinlock >>>> >>>> try to optimize it by replacing spinlock with RCU in >>>> __list_lru_count_one >>> >>> Isn't the RCU overkill here? Why cannot we simply do an optimistic >>> lockless check for nr_items? It would be racy but does it actually >>> matter? We should be able to tolerate occasional 0 to non-zero and >>> vice versa transitions AFAICS. >>> >>>> >>>> $dd if=aaa of=bbb bs=1k count=3886080 >>>> $rm -f bbb >>>> $time echo >> 100000000 >/cgroup/memory/test/memory.limit_in_bytes >>>> >>>> Before: 0m0.415s ===> after: 0m0.395s >>>> >>>> Signed-off-by: Li RongQing <lirongqing@baidu.com> >>>> --- >>>> include/linux/list_lru.h | 2 ++ >>>> mm/list_lru.c | 69 >> ++++++++++++++++++++++++++++++++++-------------- >>>> 2 files changed, 51 insertions(+), 20 deletions(-) >>>> >>>> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h >>>> index bb8129a3474d..ae472538038e 100644 >>>> --- a/include/linux/list_lru.h >>>> +++ b/include/linux/list_lru.h >>>> @@ -29,6 +29,7 @@ struct list_lru_one { >>>> struct list_head list; >>>> /* may become negative during memcg reparenting */ >>>> long nr_items; >>>> + struct rcu_head rcu; >>>> }; >>>> >>>> struct list_lru_memcg { >>>> @@ -46,6 +47,7 @@ struct list_lru_node { >>>> struct list_lru_memcg *memcg_lrus; >>>> #endif >>>> long nr_items; >>>> + struct rcu_head rcu; >>>> } ____cacheline_aligned_in_smp; >>>> >>>> struct list_lru { >>>> diff --git a/mm/list_lru.c b/mm/list_lru.c index >>>> fd41e969ede5..4c58ed861729 100644 >>>> --- a/mm/list_lru.c >>>> +++ b/mm/list_lru.c >>>> @@ -52,13 +52,13 @@ static inline bool list_lru_memcg_aware(struct >>>> list_lru *lru) static inline struct list_lru_one * >>>> list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) { >>>> - /* >>>> - * The lock protects the array of per cgroup lists from relocation >>>> - * (see memcg_update_list_lru_node). >>>> - */ >>>> - lockdep_assert_held(&nlru->lock); >>>> - if (nlru->memcg_lrus && idx >= 0) >>>> - return nlru->memcg_lrus->lru[idx]; >>>> + struct list_lru_memcg *tmp; >>>> + >>>> + WARN_ON_ONCE(!rcu_read_lock_held()); >>>> + >>>> + tmp = rcu_dereference(nlru->memcg_lrus); >>>> + if (tmp && idx >= 0) >>>> + return rcu_dereference(tmp->lru[idx]); >>>> >>>> return &nlru->lru; >>>> } >>>> @@ -113,14 +113,17 @@ bool list_lru_add(struct list_lru *lru, struct >> list_head *item) >>>> struct list_lru_one *l; >>>> >>>> spin_lock(&nlru->lock); >>>> + rcu_read_lock(); >>>> if (list_empty(item)) { >>>> l = list_lru_from_kmem(nlru, item); >>>> list_add_tail(item, &l->list); >>>> l->nr_items++; >>>> nlru->nr_items++; >>>> + rcu_read_unlock(); >>>> spin_unlock(&nlru->lock); >>>> return true; >>>> } >>>> + rcu_read_unlock(); >>>> spin_unlock(&nlru->lock); >>>> return false; >>>> } >>>> @@ -133,14 +136,17 @@ bool list_lru_del(struct list_lru *lru, struct >> list_head *item) >>>> struct list_lru_one *l; >>>> >>>> spin_lock(&nlru->lock); >>>> + rcu_read_lock(); >>>> if (!list_empty(item)) { >>>> l = list_lru_from_kmem(nlru, item); >>>> list_del_init(item); >>>> l->nr_items--; >>>> nlru->nr_items--; >>>> + rcu_read_unlock(); >>>> spin_unlock(&nlru->lock); >>>> return true; >>>> } >>>> + rcu_read_unlock(); >>>> spin_unlock(&nlru->lock); >>>> return false; >>>> } >>>> @@ -166,12 +172,13 @@ static unsigned long >>>> __list_lru_count_one(struct list_lru *lru, { >>>> struct list_lru_node *nlru = &lru->node[nid]; >>>> struct list_lru_one *l; >>>> - unsigned long count; >>>> + unsigned long count = 0; >>>> >>>> - spin_lock(&nlru->lock); >>>> + rcu_read_lock(); >>>> l = list_lru_from_memcg_idx(nlru, memcg_idx); >>>> - count = l->nr_items; >>>> - spin_unlock(&nlru->lock); >>>> + if (l) >>>> + count = l->nr_items; >>>> + rcu_read_unlock(); >>>> >>>> return count; >>>> } >>>> @@ -204,6 +211,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, >> int memcg_idx, >>>> unsigned long isolated = 0; >>>> >>>> spin_lock(&nlru->lock); >>>> + rcu_read_lock(); >>>> l = list_lru_from_memcg_idx(nlru, memcg_idx); >>>> restart: >>>> list_for_each_safe(item, n, &l->list) { @@ -250,6 +258,7 @@ >>>> __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, >>>> } >>>> } >>>> >>>> + rcu_read_unlock(); >>>> spin_unlock(&nlru->lock); >>>> return isolated; >>>> } >>>> @@ -296,9 +305,14 @@ static void >> __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, >>>> int begin, int end) >>>> { >>>> int i; >>>> + struct list_lru_one *tmp; >>>> >>>> - for (i = begin; i < end; i++) >>>> - kfree(memcg_lrus->lru[i]); >>>> + for (i = begin; i < end; i++) { >>>> + tmp = memcg_lrus->lru[i]; >>>> + rcu_assign_pointer(memcg_lrus->lru[i], NULL); >>>> + if (tmp) >>>> + kfree_rcu(tmp, rcu); >>>> + } >>>> } >>>> >>>> static int __memcg_init_list_lru_node(struct list_lru_memcg >>>> *memcg_lrus, @@ -314,7 +328,7 @@ static int >> __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, >>>> goto fail; >>>> >>>> init_one_lru(l); >>>> - memcg_lrus->lru[i] = l; >>>> + rcu_assign_pointer(memcg_lrus->lru[i], l); >>>> } >>>> return 0; >>>> fail: >>>> @@ -325,25 +339,37 @@ static int __memcg_init_list_lru_node(struct >>>> list_lru_memcg *memcg_lrus, static int >>>> memcg_init_list_lru_node(struct list_lru_node *nlru) { >>>> int size = memcg_nr_cache_ids; >>>> + struct list_lru_memcg *tmp; >>>> >>>> - nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL); >>>> - if (!nlru->memcg_lrus) >>>> + tmp = kvmalloc(size * sizeof(void *), GFP_KERNEL); >>>> + if (!tmp) >>>> return -ENOMEM; >>>> >>>> - if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { >>>> - kvfree(nlru->memcg_lrus); >>>> + if (__memcg_init_list_lru_node(tmp, 0, size)) { >>>> + kvfree(tmp); >>>> return -ENOMEM; >>>> } >>>> >>>> + rcu_assign_pointer(nlru->memcg_lrus, tmp); >>>> + >>>> return 0; >>>> } >>>> >>>> -static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) >>>> +static void memcg_destroy_list_lru_node_rcu(struct rcu_head *rcu) >>>> { >>>> + struct list_lru_node *nlru; >>>> + >>>> + nlru = container_of(rcu, struct list_lru_node, rcu); >>>> + >>>> __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, >> memcg_nr_cache_ids); >>>> kvfree(nlru->memcg_lrus); >>>> } >>>> >>>> +static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) >>>> +{ >>>> + call_rcu(&nlru->rcu, memcg_destroy_list_lru_node_rcu); } >>>> + >>>> static int memcg_update_list_lru_node(struct list_lru_node *nlru, >>>> int old_size, int new_size) { @@ -371,9 >> +397,10 @@ >>>> static int memcg_update_list_lru_node(struct list_lru_node *nlru, >>>> * we have to use IRQ-safe primitives here to avoid deadlock. >>>> */ >>>> spin_lock_irq(&nlru->lock); >>>> - nlru->memcg_lrus = new; >>>> + rcu_assign_pointer(nlru->memcg_lrus, new); >>>> spin_unlock_irq(&nlru->lock); >>>> >>>> + synchronize_rcu(); >>>> kvfree(old); >>>> return 0; >>>> } >>>> @@ -487,6 +514,7 @@ static void memcg_drain_list_lru_node(struct >> list_lru_node *nlru, >>>> * we have to use IRQ-safe primitives here to avoid deadlock. >>>> */ >>>> spin_lock_irq(&nlru->lock); >>>> + rcu_read_lock(); >>>> >>>> src = list_lru_from_memcg_idx(nlru, src_idx); >>>> dst = list_lru_from_memcg_idx(nlru, dst_idx); @@ -495,6 +523,7 >> @@ >>>> static void memcg_drain_list_lru_node(struct list_lru_node *nlru, >>>> dst->nr_items += src->nr_items; >>>> src->nr_items = 0; >>>> >>>> + rcu_read_unlock(); >>>> spin_unlock_irq(&nlru->lock); >>>> } >>>> >>>> -- >>>> 2.11.0 >>> >>> -- >>> Michal Hocko >>> SUSE Labs >>> ^ permalink raw reply [flat|nested] 7+ messages in thread
* 答复: 答复: [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one 2018-03-27 9:41 ` Kirill Tkhai @ 2018-03-27 9:47 ` Li,Rongqing 0 siblings, 0 replies; 7+ messages in thread From: Li,Rongqing @ 2018-03-27 9:47 UTC (permalink / raw) To: Kirill Tkhai, Vladimir Davydov, Michal Hocko Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org, Andrew Morton, Johannes Weiner, Dave Chinner > -----邮件原件----- > 发件人: linux-kernel-owner@vger.kernel.org > [mailto:linux-kernel-owner@vger.kernel.org] 代表 Kirill Tkhai > 发送时间: 2018年3月27日 17:41 > 收件人: Li,Rongqing <lirongqing@baidu.com>; Vladimir Davydov > <vdavydov.dev@gmail.com>; Michal Hocko <mhocko@kernel.org> > 抄送: linux-kernel@vger.kernel.org; linux-mm@kvack.org; Andrew Morton > <akpm@linux-foundation.org>; Johannes Weiner <hannes@cmpxchg.org>; > Dave Chinner <david@fromorbit.com> > 主题: Re: 答复: [PATCH] mm/list_lru: replace spinlock with RCU in > __list_lru_count_one > > On 27.03.2018 12:30, Li,Rongqing wrote: > > > > > >> -----邮件原件----- > >> 发件人: Vladimir Davydov [mailto:vdavydov.dev@gmail.com] > >> 发送时间: 2018年3月27日 17:09 > >> 收件人: Michal Hocko <mhocko@kernel.org> > >> 抄送: Li,Rongqing <lirongqing@baidu.com>; > linux-kernel@vger.kernel.org; > >> linux-mm@kvack.org; Andrew Morton <akpm@linux-foundation.org>; > >> Johannes Weiner <hannes@cmpxchg.org>; Dave Chinner > >> <david@fromorbit.com>; Kirill Tkhai <ktkhai@virtuozzo.com> > >> 主题: Re: [PATCH] mm/list_lru: replace spinlock with RCU in > >> __list_lru_count_one > >> > >> [Cc Kirill] > >> > >> AFAIU this has already been fixed in exactly the same fashion by > >> Kirill (mmotm commit 8e7d1201ec71 "mm: make counting of > >> list_lru_one::nr_items lockless"). Kirill is working on further > >> optimizations right now, see > >> > >> > > > > Ok, thanks > > Thanks Vladimir, for CCing me. > > Rong, if your are interested I may start to add you to CC on further iterations > of > https://marc.info/?i=152163840790.21546.980703278415599202.stgit%40 > localhost.localdomain > since there are many people which meet such the problem. > > Kirill Ok, please add me thank you -RongQing > > > > >> > https://lkml.kernel.org/r/152163840790.21546.980703278415599202.stgit > >> @localhost.localdomain > >> > >> On Tue, Mar 27, 2018 at 10:15:46AM +0200, Michal Hocko wrote: > >>> [CC Dave] > >>> > >>> On Tue 27-03-18 15:59:04, Li RongQing wrote: > >>>> when reclaim memory, shink_slab will take lots of time even if no > >>>> memory is reclaimed, since list_lru_count_one called by it needs to > >>>> take a spinlock > >>>> > >>>> try to optimize it by replacing spinlock with RCU in > >>>> __list_lru_count_one > >>> > >>> Isn't the RCU overkill here? Why cannot we simply do an optimistic > >>> lockless check for nr_items? It would be racy but does it actually > >>> matter? We should be able to tolerate occasional 0 to non-zero and > >>> vice versa transitions AFAICS. > >>> > >>>> > >>>> $dd if=aaa of=bbb bs=1k count=3886080 > >>>> $rm -f bbb > >>>> $time echo > >> 100000000 >/cgroup/memory/test/memory.limit_in_bytes > >>>> > >>>> Before: 0m0.415s ===> after: 0m0.395s > >>>> > >>>> Signed-off-by: Li RongQing <lirongqing@baidu.com> > >>>> --- > >>>> include/linux/list_lru.h | 2 ++ > >>>> mm/list_lru.c | 69 > >> ++++++++++++++++++++++++++++++++++-------------- > >>>> 2 files changed, 51 insertions(+), 20 deletions(-) > >>>> > >>>> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h > >>>> index bb8129a3474d..ae472538038e 100644 > >>>> --- a/include/linux/list_lru.h > >>>> +++ b/include/linux/list_lru.h > >>>> @@ -29,6 +29,7 @@ struct list_lru_one { > >>>> struct list_head list; > >>>> /* may become negative during memcg reparenting */ > >>>> long nr_items; > >>>> + struct rcu_head rcu; > >>>> }; > >>>> > >>>> struct list_lru_memcg { > >>>> @@ -46,6 +47,7 @@ struct list_lru_node { > >>>> struct list_lru_memcg *memcg_lrus; > >>>> #endif > >>>> long nr_items; > >>>> + struct rcu_head rcu; > >>>> } ____cacheline_aligned_in_smp; > >>>> > >>>> struct list_lru { > >>>> diff --git a/mm/list_lru.c b/mm/list_lru.c index > >>>> fd41e969ede5..4c58ed861729 100644 > >>>> --- a/mm/list_lru.c > >>>> +++ b/mm/list_lru.c > >>>> @@ -52,13 +52,13 @@ static inline bool list_lru_memcg_aware(struct > >>>> list_lru *lru) static inline struct list_lru_one * > >>>> list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) { > >>>> - /* > >>>> - * The lock protects the array of per cgroup lists from relocation > >>>> - * (see memcg_update_list_lru_node). > >>>> - */ > >>>> - lockdep_assert_held(&nlru->lock); > >>>> - if (nlru->memcg_lrus && idx >= 0) > >>>> - return nlru->memcg_lrus->lru[idx]; > >>>> + struct list_lru_memcg *tmp; > >>>> + > >>>> + WARN_ON_ONCE(!rcu_read_lock_held()); > >>>> + > >>>> + tmp = rcu_dereference(nlru->memcg_lrus); > >>>> + if (tmp && idx >= 0) > >>>> + return rcu_dereference(tmp->lru[idx]); > >>>> > >>>> return &nlru->lru; > >>>> } > >>>> @@ -113,14 +113,17 @@ bool list_lru_add(struct list_lru *lru, > >>>> struct > >> list_head *item) > >>>> struct list_lru_one *l; > >>>> > >>>> spin_lock(&nlru->lock); > >>>> + rcu_read_lock(); > >>>> if (list_empty(item)) { > >>>> l = list_lru_from_kmem(nlru, item); > >>>> list_add_tail(item, &l->list); > >>>> l->nr_items++; > >>>> nlru->nr_items++; > >>>> + rcu_read_unlock(); > >>>> spin_unlock(&nlru->lock); > >>>> return true; > >>>> } > >>>> + rcu_read_unlock(); > >>>> spin_unlock(&nlru->lock); > >>>> return false; > >>>> } > >>>> @@ -133,14 +136,17 @@ bool list_lru_del(struct list_lru *lru, > >>>> struct > >> list_head *item) > >>>> struct list_lru_one *l; > >>>> > >>>> spin_lock(&nlru->lock); > >>>> + rcu_read_lock(); > >>>> if (!list_empty(item)) { > >>>> l = list_lru_from_kmem(nlru, item); > >>>> list_del_init(item); > >>>> l->nr_items--; > >>>> nlru->nr_items--; > >>>> + rcu_read_unlock(); > >>>> spin_unlock(&nlru->lock); > >>>> return true; > >>>> } > >>>> + rcu_read_unlock(); > >>>> spin_unlock(&nlru->lock); > >>>> return false; > >>>> } > >>>> @@ -166,12 +172,13 @@ static unsigned long > >>>> __list_lru_count_one(struct list_lru *lru, { > >>>> struct list_lru_node *nlru = &lru->node[nid]; > >>>> struct list_lru_one *l; > >>>> - unsigned long count; > >>>> + unsigned long count = 0; > >>>> > >>>> - spin_lock(&nlru->lock); > >>>> + rcu_read_lock(); > >>>> l = list_lru_from_memcg_idx(nlru, memcg_idx); > >>>> - count = l->nr_items; > >>>> - spin_unlock(&nlru->lock); > >>>> + if (l) > >>>> + count = l->nr_items; > >>>> + rcu_read_unlock(); > >>>> > >>>> return count; > >>>> } > >>>> @@ -204,6 +211,7 @@ __list_lru_walk_one(struct list_lru *lru, int > >>>> nid, > >> int memcg_idx, > >>>> unsigned long isolated = 0; > >>>> > >>>> spin_lock(&nlru->lock); > >>>> + rcu_read_lock(); > >>>> l = list_lru_from_memcg_idx(nlru, memcg_idx); > >>>> restart: > >>>> list_for_each_safe(item, n, &l->list) { @@ -250,6 +258,7 @@ > >>>> __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, > >>>> } > >>>> } > >>>> > >>>> + rcu_read_unlock(); > >>>> spin_unlock(&nlru->lock); > >>>> return isolated; > >>>> } > >>>> @@ -296,9 +305,14 @@ static void > >> __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, > >>>> int begin, int end) > >>>> { > >>>> int i; > >>>> + struct list_lru_one *tmp; > >>>> > >>>> - for (i = begin; i < end; i++) > >>>> - kfree(memcg_lrus->lru[i]); > >>>> + for (i = begin; i < end; i++) { > >>>> + tmp = memcg_lrus->lru[i]; > >>>> + rcu_assign_pointer(memcg_lrus->lru[i], NULL); > >>>> + if (tmp) > >>>> + kfree_rcu(tmp, rcu); > >>>> + } > >>>> } > >>>> > >>>> static int __memcg_init_list_lru_node(struct list_lru_memcg > >>>> *memcg_lrus, @@ -314,7 +328,7 @@ static int > >> __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, > >>>> goto fail; > >>>> > >>>> init_one_lru(l); > >>>> - memcg_lrus->lru[i] = l; > >>>> + rcu_assign_pointer(memcg_lrus->lru[i], l); > >>>> } > >>>> return 0; > >>>> fail: > >>>> @@ -325,25 +339,37 @@ static int > __memcg_init_list_lru_node(struct > >>>> list_lru_memcg *memcg_lrus, static int > >>>> memcg_init_list_lru_node(struct list_lru_node *nlru) { > >>>> int size = memcg_nr_cache_ids; > >>>> + struct list_lru_memcg *tmp; > >>>> > >>>> - nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL); > >>>> - if (!nlru->memcg_lrus) > >>>> + tmp = kvmalloc(size * sizeof(void *), GFP_KERNEL); > >>>> + if (!tmp) > >>>> return -ENOMEM; > >>>> > >>>> - if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { > >>>> - kvfree(nlru->memcg_lrus); > >>>> + if (__memcg_init_list_lru_node(tmp, 0, size)) { > >>>> + kvfree(tmp); > >>>> return -ENOMEM; > >>>> } > >>>> > >>>> + rcu_assign_pointer(nlru->memcg_lrus, tmp); > >>>> + > >>>> return 0; > >>>> } > >>>> > >>>> -static void memcg_destroy_list_lru_node(struct list_lru_node > >>>> *nlru) > >>>> +static void memcg_destroy_list_lru_node_rcu(struct rcu_head *rcu) > >>>> { > >>>> + struct list_lru_node *nlru; > >>>> + > >>>> + nlru = container_of(rcu, struct list_lru_node, rcu); > >>>> + > >>>> __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, > >> memcg_nr_cache_ids); > >>>> kvfree(nlru->memcg_lrus); > >>>> } > >>>> > >>>> +static void memcg_destroy_list_lru_node(struct list_lru_node > >>>> +*nlru) { > >>>> + call_rcu(&nlru->rcu, memcg_destroy_list_lru_node_rcu); } > >>>> + > >>>> static int memcg_update_list_lru_node(struct list_lru_node *nlru, > >>>> int old_size, int new_size) { @@ -371,9 > >> +397,10 @@ > >>>> static int memcg_update_list_lru_node(struct list_lru_node *nlru, > >>>> * we have to use IRQ-safe primitives here to avoid deadlock. > >>>> */ > >>>> spin_lock_irq(&nlru->lock); > >>>> - nlru->memcg_lrus = new; > >>>> + rcu_assign_pointer(nlru->memcg_lrus, new); > >>>> spin_unlock_irq(&nlru->lock); > >>>> > >>>> + synchronize_rcu(); > >>>> kvfree(old); > >>>> return 0; > >>>> } > >>>> @@ -487,6 +514,7 @@ static void memcg_drain_list_lru_node(struct > >> list_lru_node *nlru, > >>>> * we have to use IRQ-safe primitives here to avoid deadlock. > >>>> */ > >>>> spin_lock_irq(&nlru->lock); > >>>> + rcu_read_lock(); > >>>> > >>>> src = list_lru_from_memcg_idx(nlru, src_idx); > >>>> dst = list_lru_from_memcg_idx(nlru, dst_idx); @@ -495,6 +523,7 > >> @@ > >>>> static void memcg_drain_list_lru_node(struct list_lru_node *nlru, > >>>> dst->nr_items += src->nr_items; > >>>> src->nr_items = 0; > >>>> > >>>> + rcu_read_unlock(); > >>>> spin_unlock_irq(&nlru->lock); > >>>> } > >>>> > >>>> -- > >>>> 2.11.0 > >>> > >>> -- > >>> Michal Hocko > >>> SUSE Labs > >>> ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one 2018-03-27 7:59 [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one Li RongQing 2018-03-27 8:15 ` Michal Hocko @ 2018-03-28 7:59 ` kbuild test robot 1 sibling, 0 replies; 7+ messages in thread From: kbuild test robot @ 2018-03-28 7:59 UTC (permalink / raw) To: Li RongQing Cc: kbuild-all, linux-kernel, linux-mm, Andrew Morton, Michal Hocko, Johannes Weiner Hi Li, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on linus/master] [also build test WARNING on v4.16-rc7] [cannot apply to next-20180327] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Li-RongQing/mm-list_lru-replace-spinlock-with-RCU-in-__list_lru_count_one/20180328-042620 reproduce: # apt-get install sparse make ARCH=x86_64 allmodconfig make C=1 CF=-D__CHECK_ENDIAN__ sparse warnings: (new ones prefixed by >>) >> mm/list_lru.c:59:15: sparse: incompatible types in comparison expression (different address spaces) mm/list_lru.c:61:24: sparse: incompatible types in comparison expression (different address spaces) >> mm/list_lru.c:59:15: sparse: incompatible types in comparison expression (different address spaces) mm/list_lru.c:61:24: sparse: incompatible types in comparison expression (different address spaces) >> mm/list_lru.c:59:15: sparse: incompatible types in comparison expression (different address spaces) mm/list_lru.c:61:24: sparse: incompatible types in comparison expression (different address spaces) >> mm/list_lru.c:59:15: sparse: incompatible types in comparison expression (different address spaces) mm/list_lru.c:61:24: sparse: incompatible types in comparison expression (different address spaces) >> mm/list_lru.c:59:15: sparse: incompatible types in comparison expression (different address spaces) mm/list_lru.c:61:24: sparse: incompatible types in comparison expression (different address spaces) >> mm/list_lru.c:59:15: sparse: incompatible types in comparison expression (different address spaces) mm/list_lru.c:61:24: sparse: incompatible types in comparison expression (different address spaces) vim +59 mm/list_lru.c 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 54 { 55 struct list_lru_memcg *tmp; 56 57 WARN_ON_ONCE(!rcu_read_lock_held()); 58 > 59 tmp = rcu_dereference(nlru->memcg_lrus); 60 if (tmp && idx >= 0) 61 return rcu_dereference(tmp->lru[idx]); 62 63 return &nlru->lru; 64 } 65 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2018-03-28 8:01 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2018-03-27 7:59 [PATCH] mm/list_lru: replace spinlock with RCU in __list_lru_count_one Li RongQing 2018-03-27 8:15 ` Michal Hocko 2018-03-27 9:08 ` Vladimir Davydov 2018-03-27 9:30 ` 答复: " Li,Rongqing 2018-03-27 9:41 ` Kirill Tkhai 2018-03-27 9:47 ` 答复: " Li,Rongqing 2018-03-28 7:59 ` kbuild test robot
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).