* [PATCH tip/core/urgent 01/10] rcu: v2: optionally leave lockdep enabled after RCU lockdep splat
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 02/10] KEYS: Fix an RCU warning Paul E. McKenney
` (8 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Paul E. McKenney
From: Lai Jiangshan <laijs@cn.fujitsu.com>
There is no need to disable lockdep after an RCU lockdep splat, so
remove the debug_lockdeps_off() from lockdep_rcu_dereference().
To avoid repeated lockdep splats, use a static variable in the
inlined rcu_dereference_check() and rcu_dereference_protected()
macros so that a given instance splats only once, but so that
multiple instances can be detected per boot.
This is controlled by a new config variable CONFIG_PROVE_RCU_REPEATEDLY,
which is disabled by default.
Requested-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Tested-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
include/linux/rcupdate.h | 15 +++++++++++----
kernel/lockdep.c | 2 ++
lib/Kconfig.debug | 12 ++++++++++++
3 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 07db2fe..ec9ab49 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -190,6 +190,15 @@ static inline int rcu_read_lock_sched_held(void)
#ifdef CONFIG_PROVE_RCU
+#define __do_rcu_dereference_check(c) \
+ do { \
+ static bool __warned; \
+ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
+ __warned = true; \
+ lockdep_rcu_dereference(__FILE__, __LINE__); \
+ } \
+ } while (0)
+
/**
* rcu_dereference_check - rcu_dereference with debug checking
* @p: The pointer to read, prior to dereferencing
@@ -219,8 +228,7 @@ static inline int rcu_read_lock_sched_held(void)
*/
#define rcu_dereference_check(p, c) \
({ \
- if (debug_lockdep_rcu_enabled() && !(c)) \
- lockdep_rcu_dereference(__FILE__, __LINE__); \
+ __do_rcu_dereference_check(c); \
rcu_dereference_raw(p); \
})
@@ -237,8 +245,7 @@ static inline int rcu_read_lock_sched_held(void)
*/
#define rcu_dereference_protected(p, c) \
({ \
- if (debug_lockdep_rcu_enabled() && !(c)) \
- lockdep_rcu_dereference(__FILE__, __LINE__); \
+ __do_rcu_dereference_check(c); \
(p); \
})
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 2594e1c..73747b7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3801,8 +3801,10 @@ void lockdep_rcu_dereference(const char *file, const int line)
{
struct task_struct *curr = current;
+#ifndef CONFIG_PROVE_RCU_REPEATEDLY
if (!debug_locks_off())
return;
+#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
printk("\n===================================================\n");
printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
printk( "---------------------------------------------------\n");
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 935248b..94090b4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -512,6 +512,18 @@ config PROVE_RCU
Say N if you are unsure.
+config PROVE_RCU_REPEATEDLY
+ bool "RCU debugging: don't disable PROVE_RCU on first splat"
+ depends on PROVE_RCU
+ default n
+ help
+ By itself, PROVE_RCU will disable checking upon issuing the
+ first warning (or "splat"). This feature prevents such
+ disabling, allowing multiple RCU-lockdep warnings to be printed
+ on a single reboot.
+
+ Say N if you are unsure.
+
config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH tip/core/urgent 02/10] KEYS: Fix an RCU warning
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 01/10] rcu: v2: optionally leave lockdep enabled after RCU lockdep splat Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 03/10] KEYS: Fix an RCU warning in the reading of user keys Paul E. McKenney
` (7 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Paul E. McKenney
From: David Howells <dhowells@redhat.com>
Fix the following RCU warning:
===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
security/keys/request_key.c:116 invoked rcu_dereference_check() without protection!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 0
1 lock held by keyctl/5372:
#0: (key_types_sem){.+.+.+}, at: [<ffffffff811a4e3d>] key_type_lookup+0x1c/0x70
stack backtrace:
Pid: 5372, comm: keyctl Not tainted 2.6.34-rc3-cachefs #150
Call Trace:
[<ffffffff810515f8>] lockdep_rcu_dereference+0xaa/0xb2
[<ffffffff811a9220>] call_sbin_request_key+0x156/0x2b6
[<ffffffff811a4c66>] ? __key_instantiate_and_link+0xb1/0xdc
[<ffffffff811a4cd3>] ? key_instantiate_and_link+0x42/0x5f
[<ffffffff811a96b8>] ? request_key_auth_new+0x17b/0x1f3
[<ffffffff811a8e00>] ? request_key_and_link+0x271/0x400
[<ffffffff810aba6f>] ? kmem_cache_alloc+0xe1/0x118
[<ffffffff811a8f1a>] request_key_and_link+0x38b/0x400
[<ffffffff811a7b72>] sys_request_key+0xf7/0x14a
[<ffffffff81052227>] ? trace_hardirqs_on_caller+0x10c/0x130
[<ffffffff81393f5c>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[<ffffffff81001eeb>] system_call_fastpath+0x16/0x1b
This was caused by doing:
[root@andromeda ~]# keyctl newring fred @s
539196288
[root@andromeda ~]# keyctl request2 user a a 539196288
request_key: Required key not available
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
security/keys/request_key.c | 13 ++++++++-----
1 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 03fe63e..ea97c31 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -68,7 +68,8 @@ static int call_sbin_request_key(struct key_construction *cons,
{
const struct cred *cred = current_cred();
key_serial_t prkey, sskey;
- struct key *key = cons->key, *authkey = cons->authkey, *keyring;
+ struct key *key = cons->key, *authkey = cons->authkey, *keyring,
+ *session;
char *argv[9], *envp[3], uid_str[12], gid_str[12];
char key_str[12], keyring_str[3][12];
char desc[20];
@@ -112,10 +113,12 @@ static int call_sbin_request_key(struct key_construction *cons,
if (cred->tgcred->process_keyring)
prkey = cred->tgcred->process_keyring->serial;
- if (cred->tgcred->session_keyring)
- sskey = rcu_dereference(cred->tgcred->session_keyring)->serial;
- else
- sskey = cred->user->session_keyring->serial;
+ rcu_read_lock();
+ session = rcu_dereference(cred->tgcred->session_keyring);
+ if (!session)
+ session = cred->user->session_keyring;
+ sskey = session->serial;
+ rcu_read_unlock();
sprintf(keyring_str[2], "%d", sskey);
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH tip/core/urgent 03/10] KEYS: Fix an RCU warning in the reading of user keys
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 01/10] rcu: v2: optionally leave lockdep enabled after RCU lockdep splat Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 02/10] KEYS: Fix an RCU warning Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 04/10] cgroup: Fix an RCU warning in cgroup_path() Paul E. McKenney
` (6 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Paul E. McKenney
From: David Howells <dhowells@redhat.com>
Fix an RCU warning in the reading of user keys:
===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
security/keys/user_defined.c:202 invoked rcu_dereference_check() without protection!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 0
1 lock held by keyctl/3637:
#0: (&key->sem){+++++.}, at: [<ffffffff811a80ae>] keyctl_read_key+0x9c/0xcf
stack backtrace:
Pid: 3637, comm: keyctl Not tainted 2.6.34-rc5-cachefs #18
Call Trace:
[<ffffffff81051f6c>] lockdep_rcu_dereference+0xaa/0xb2
[<ffffffff811aa55f>] user_read+0x47/0x91
[<ffffffff811a80be>] keyctl_read_key+0xac/0xcf
[<ffffffff811a8a06>] sys_keyctl+0x75/0xb7
[<ffffffff81001eeb>] system_call_fastpath+0x16/0x1b
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
security/keys/user_defined.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 7c687d5..e9aa079 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -199,7 +199,8 @@ long user_read(const struct key *key, char __user *buffer, size_t buflen)
struct user_key_payload *upayload;
long ret;
- upayload = rcu_dereference(key->payload.data);
+ upayload = rcu_dereference_protected(
+ key->payload.data, rwsem_is_locked(&((struct key *)key)->sem));
ret = upayload->datalen;
/* we can return the data as is */
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH tip/core/urgent 04/10] cgroup: Fix an RCU warning in cgroup_path()
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
` (2 preceding siblings ...)
2010-05-03 18:53 ` [PATCH tip/core/urgent 03/10] KEYS: Fix an RCU warning in the reading of user keys Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 05/10] cgroup: Fix an RCU warning in alloc_css_id() Paul E. McKenney
` (5 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Li Zefan, Paul E. McKenney
From: Li Zefan <lizf@cn.fujitsu.com>
with CONFIG_PROVE_RCU=y, a warning can be triggered:
# mount -t cgroup -o debug xxx /mnt
# cat /proc/$$/cgroup
...
kernel/cgroup.c:1649 invoked rcu_dereference_check() without protection!
...
This is a false-positive, because cgroup_path() can be called
with either rcu_read_lock() held or cgroup_mutex held.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
kernel/cgroup.c | 12 +++++++++---
1 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e2769e1..4ca928d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1646,7 +1646,9 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
char *start;
- struct dentry *dentry = rcu_dereference(cgrp->dentry);
+ struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
+ rcu_read_lock_held() ||
+ cgroup_lock_is_held());
if (!dentry || cgrp == dummytop) {
/*
@@ -1662,13 +1664,17 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
*--start = '\0';
for (;;) {
int len = dentry->d_name.len;
+
if ((start -= len) < buf)
return -ENAMETOOLONG;
- memcpy(start, cgrp->dentry->d_name.name, len);
+ memcpy(start, dentry->d_name.name, len);
cgrp = cgrp->parent;
if (!cgrp)
break;
- dentry = rcu_dereference(cgrp->dentry);
+
+ dentry = rcu_dereference_check(cgrp->dentry,
+ rcu_read_lock_held() ||
+ cgroup_lock_is_held());
if (!cgrp->parent)
continue;
if (--start < buf)
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH tip/core/urgent 05/10] cgroup: Fix an RCU warning in alloc_css_id()
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
` (3 preceding siblings ...)
2010-05-03 18:53 ` [PATCH tip/core/urgent 04/10] cgroup: Fix an RCU warning in cgroup_path() Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 06/10] sched: Fix an RCU warning in print_task() Paul E. McKenney
` (4 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Li Zefan, Paul E. McKenney
From: Li Zefan <lizf@cn.fujitsu.com>
With CONFIG_PROVE_RCU=y, a warning can be triggered:
# mount -t cgroup -o memory xxx /mnt
# mkdir /mnt/0
...
kernel/cgroup.c:4442 invoked rcu_dereference_check() without protection!
...
This is a false-positive. It's safe to directly access parent_css->id.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
kernel/cgroup.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4ca928d..3a53c77 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4561,13 +4561,13 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
{
int subsys_id, i, depth = 0;
struct cgroup_subsys_state *parent_css, *child_css;
- struct css_id *child_id, *parent_id = NULL;
+ struct css_id *child_id, *parent_id;
subsys_id = ss->subsys_id;
parent_css = parent->subsys[subsys_id];
child_css = child->subsys[subsys_id];
- depth = css_depth(parent_css) + 1;
parent_id = parent_css->id;
+ depth = parent_id->depth;
child_id = get_new_cssid(ss, depth);
if (IS_ERR(child_id))
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH tip/core/urgent 06/10] sched: Fix an RCU warning in print_task()
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
` (4 preceding siblings ...)
2010-05-03 18:53 ` [PATCH tip/core/urgent 05/10] cgroup: Fix an RCU warning in alloc_css_id() Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 07/10] cgroup: Check task_lock in task_subsys_state() Paul E. McKenney
` (3 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Li Zefan, Paul E. McKenney
From: Li Zefan <lizf@cn.fujitsu.com>
With CONFIG_PROVE_RCU=y, a warning can be triggered:
$ cat /proc/sched_debug
...
kernel/cgroup.c:1649 invoked rcu_dereference_check() without protection!
...
Both cgroup_path() and task_group() should be called with either
rcu_read_lock or cgroup_mutex held.
The rcu_dereference_check() does include cgroup_lock_is_held(), so we
know that this lock is not held. Therefore, in a CONFIG_PREEMPT kernel,
to say nothing of a CONFIG_PREEMPT_RT kernel, the original code could
have ended up copying a string out of the freelist.
This patch inserts RCU read-side primitives needed to prevent this
scenario.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
kernel/sched_debug.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 9b49db1..19be00b 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -114,7 +114,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
{
char path[64];
+ rcu_read_lock();
cgroup_path(task_group(p)->css.cgroup, path, sizeof(path));
+ rcu_read_unlock();
SEQ_printf(m, " %s", path);
}
#endif
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH tip/core/urgent 07/10] cgroup: Check task_lock in task_subsys_state()
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
` (5 preceding siblings ...)
2010-05-03 18:53 ` [PATCH tip/core/urgent 06/10] sched: Fix an RCU warning in print_task() Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 08/10] memcg: css_id() must be called under rcu_read_lock() Paul E. McKenney
` (2 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Li Zefan, Paul E. McKenney
From: Li Zefan <lizf@cn.fujitsu.com>
Expand task_subsys_state()'s rcu_dereference_check() to include the full
locking rule as documented in Documentation/cgroups/cgroups.txt by adding
a check for task->alloc_lock being held.
This fixes an RCU false positive when resuming from suspend. The warning
comes from freezer cgroup in cgroup_freezing_or_frozen().
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
include/linux/cgroup.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b8ad1ea..8f78073 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -530,6 +530,7 @@ static inline struct cgroup_subsys_state *task_subsys_state(
{
return rcu_dereference_check(task->cgroups->subsys[subsys_id],
rcu_read_lock_held() ||
+ lockdep_is_held(&task->alloc_lock) ||
cgroup_lock_is_held());
}
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH tip/core/urgent 08/10] memcg: css_id() must be called under rcu_read_lock()
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
` (6 preceding siblings ...)
2010-05-03 18:53 ` [PATCH tip/core/urgent 07/10] cgroup: Check task_lock in task_subsys_state() Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-07 19:11 ` Andrew Morton
2010-05-03 18:53 ` [PATCH tip/core/urgent 09/10] blk-cgroup: Fix RCU correctness warning in cfq_init_queue() Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 10/10] vfs: fix RCU-lockdep false positive due to /proc access Paul E. McKenney
9 siblings, 1 reply; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Paul E. McKenney, Daisuke Nishimura, Balbir Singh,
KAMEZAWA Hiroyuki
This patch fixes task_in_mem_cgroup(), mem_cgroup_uncharge_swapcache(),
mem_cgroup_move_swap_account(), and is_target_pte_for_mc() to protect
calls to css_id(). An additional RCU lockdep splat was reported for
memcg_oom_wake_function(), however, this function is not yet in
mainline as of 2.6.34-rc5.
Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
mm/memcontrol.c | 21 ++++++++++++++++-----
1 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f4ede99..e06490d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -811,10 +811,12 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
* enabled in "curr" and "curr" is a child of "mem" in *cgroup*
* hierarchy(even if use_hierarchy is disabled in "mem").
*/
+ rcu_read_lock();
if (mem->use_hierarchy)
ret = css_is_ancestor(&curr->css, &mem->css);
else
ret = (curr == mem);
+ rcu_read_unlock();
css_put(&curr->css);
return ret;
}
@@ -2312,7 +2314,9 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
/* record memcg information */
if (do_swap_account && swapout && memcg) {
+ rcu_read_lock();
swap_cgroup_record(ent, css_id(&memcg->css));
+ rcu_read_unlock();
mem_cgroup_get(memcg);
}
if (swapout && memcg)
@@ -2369,8 +2373,10 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
{
unsigned short old_id, new_id;
+ rcu_read_lock();
old_id = css_id(&from->css);
new_id = css_id(&to->css);
+ rcu_read_unlock();
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
mem_cgroup_swap_statistics(from, false);
@@ -4038,11 +4044,16 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
put_page(page);
}
/* throught */
- if (ent.val && do_swap_account && !ret &&
- css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
- ret = MC_TARGET_SWAP;
- if (target)
- target->ent = ent;
+ if (ent.val && do_swap_account && !ret) {
+ unsigned short id;
+ rcu_read_lock();
+ id = css_id(&mc.from->css);
+ rcu_read_unlock();
+ if (id == lookup_swap_cgroup(ent)) {
+ ret = MC_TARGET_SWAP;
+ if (target)
+ target->ent = ent;
+ }
}
return ret;
}
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH tip/core/urgent 08/10] memcg: css_id() must be called under rcu_read_lock()
2010-05-03 18:53 ` [PATCH tip/core/urgent 08/10] memcg: css_id() must be called under rcu_read_lock() Paul E. McKenney
@ 2010-05-07 19:11 ` Andrew Morton
2010-05-10 0:17 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2010-05-07 19:11 UTC (permalink / raw)
To: Paul E. McKenney
Cc: linux-kernel, mingo, laijs, dipankar, mathieu.desnoyers, josh,
dvhltc, niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Daisuke Nishimura, Balbir Singh, KAMEZAWA Hiroyuki
On Mon, 3 May 2010 11:53:17 -0700
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com> wrote:
> This patch fixes task_in_mem_cgroup(), mem_cgroup_uncharge_swapcache(),
> mem_cgroup_move_swap_account(), and is_target_pte_for_mc() to protect
> calls to css_id(). An additional RCU lockdep splat was reported for
> memcg_oom_wake_function(), however, this function is not yet in
> mainline as of 2.6.34-rc5.
>
> ...
>
> index f4ede99..e06490d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -811,10 +811,12 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
> * enabled in "curr" and "curr" is a child of "mem" in *cgroup*
> * hierarchy(even if use_hierarchy is disabled in "mem").
> */
> + rcu_read_lock();
> if (mem->use_hierarchy)
> ret = css_is_ancestor(&curr->css, &mem->css);
> else
> ret = (curr == mem);
> + rcu_read_unlock();
> css_put(&curr->css);
> return ret;
> }
The above hunk seems to be locking around css_is_ancestor(), not
css_id() as the changelog states.
> @@ -2312,7 +2314,9 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
>
> /* record memcg information */
> if (do_swap_account && swapout && memcg) {
> + rcu_read_lock();
> swap_cgroup_record(ent, css_id(&memcg->css));
> + rcu_read_unlock();
> mem_cgroup_get(memcg);
> }
> if (swapout && memcg)
That makes some sense - the lock is held across the call and the use of
the result of the call.
> @@ -2369,8 +2373,10 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
> {
> unsigned short old_id, new_id;
>
> + rcu_read_lock();
> old_id = css_id(&from->css);
> new_id = css_id(&to->css);
> + rcu_read_unlock();
>
> if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
> mem_cgroup_swap_statistics(from, false);
This doesn't make sense. We take the lock, read the values, drop the
lock and then use the now-possibly-wrong values.
> @@ -4038,11 +4044,16 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
> put_page(page);
> }
> /* throught */
> - if (ent.val && do_swap_account && !ret &&
> - css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
> - ret = MC_TARGET_SWAP;
> - if (target)
> - target->ent = ent;
> + if (ent.val && do_swap_account && !ret) {
> + unsigned short id;
Please put a newline between end-of-locals and start-of-code.
> + rcu_read_lock();
> + id = css_id(&mc.from->css);
> + rcu_read_unlock();
> + if (id == lookup_swap_cgroup(ent)) {
> + ret = MC_TARGET_SWAP;
> + if (target)
> + target->ent = ent;
> + }
Again, when we use `id', the lock has been dropped. The value which
css_id() returned might no longer be correct.
The merge of this patch caused rejections in -mm's
memcg-clean-up-move-charge.patch (at least). It may have caused more,
I haven't checked yet. The code I have here now does:
if (ent.val && !ret) {
unsigned short id;
rcu_read_lock();
id = css_id(&mc.from->css);
rcu_read_unlock();
if (id == lookup_swap_cgroup(ent)) {
ret = MC_TARGET_SWAP;
if (target)
target->ent = ent;
}
}
however I suspect it would be saner to do
if (ent.val && !ret) {
rcu_read_lock();
if (css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
ret = MC_TARGET_SWAP;
if (target)
target->ent = ent;
}
rcu_read_unlock();
}
However this still doesn't make a lot of sense because three nanoseonds
after we've done rcu_read_unlock(), the value of
css_id(&mc.from->css) == lookup_swap_cgroup(ent)
might have changed. So I'd ask the memcg guys to have a more serious
think about all of this please. I get the feeling that the original
patch just splattered rcu_read_lock() around the place to silence a
runtime warning without digging into what the code is really supposed
to be doing.
The mem_cgroup_move_swap_account() would benefit from some attention
also please.
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH tip/core/urgent 08/10] memcg: css_id() must be called under rcu_read_lock()
2010-05-07 19:11 ` Andrew Morton
@ 2010-05-10 0:17 ` KAMEZAWA Hiroyuki
2010-05-10 5:46 ` [BUGFIX][PATCH 1/2] cgroup/cssid/memcg rcu fixes. (Was " KAMEZAWA Hiroyuki
0 siblings, 1 reply; 16+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-05-10 0:17 UTC (permalink / raw)
To: Andrew Morton
Cc: Paul E. McKenney, linux-kernel, mingo, laijs, dipankar,
mathieu.desnoyers, josh, dvhltc, niv, tglx, peterz, rostedt,
Valdis.Kletnieks, dhowells, eric.dumazet, Daisuke Nishimura,
Balbir Singh
On Fri, 7 May 2010 12:11:38 -0700
Andrew Morton <akpm@linux-foundation.org> wrote:
> On Mon, 3 May 2010 11:53:17 -0700
> "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> wrote:
>
> > This patch fixes task_in_mem_cgroup(), mem_cgroup_uncharge_swapcache(),
> > mem_cgroup_move_swap_account(), and is_target_pte_for_mc() to protect
> > calls to css_id(). An additional RCU lockdep splat was reported for
> > memcg_oom_wake_function(), however, this function is not yet in
> > mainline as of 2.6.34-rc5.
> >
> > ...
> >
> > index f4ede99..e06490d 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -811,10 +811,12 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
> > * enabled in "curr" and "curr" is a child of "mem" in *cgroup*
> > * hierarchy(even if use_hierarchy is disabled in "mem").
> > */
> > + rcu_read_lock();
> > if (mem->use_hierarchy)
> > ret = css_is_ancestor(&curr->css, &mem->css);
> > else
> > ret = (curr == mem);
> > + rcu_read_unlock();
> > css_put(&curr->css);
> > return ret;
> > }
>
> The above hunk seems to be locking around css_is_ancestor(), not
> css_id() as the changelog states.
>
Hmm. I'll move rcu_xxx to cgroup.c::css_is_ancestor().
(But .....because we have css's reference count, rcu_read_lock isn't
necessary...lock-check founds it as bug but this was intentional.)
> > @@ -2312,7 +2314,9 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
> >
> > /* record memcg information */
> > if (do_swap_account && swapout && memcg) {
> > + rcu_read_lock();
> > swap_cgroup_record(ent, css_id(&memcg->css));
> > + rcu_read_unlock();
> > mem_cgroup_get(memcg);
> > }
> > if (swapout && memcg)
>
> That makes some sense - the lock is held across the call and the use of
> the result of the call.
>
>
> > @@ -2369,8 +2373,10 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
> > {
> > unsigned short old_id, new_id;
> >
> > + rcu_read_lock();
> > old_id = css_id(&from->css);
> > new_id = css_id(&to->css);
> > + rcu_read_unlock();
> >
> > if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
> > mem_cgroup_swap_statistics(from, false);
>
> This doesn't make sense. We take the lock, read the values, drop the
> lock and then use the now-possibly-wrong values.
>
will fix.
> > @@ -4038,11 +4044,16 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
> > put_page(page);
> > }
> > /* throught */
> > - if (ent.val && do_swap_account && !ret &&
> > - css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
> > - ret = MC_TARGET_SWAP;
> > - if (target)
> > - target->ent = ent;
> > + if (ent.val && do_swap_account && !ret) {
> > + unsigned short id;
>
> Please put a newline between end-of-locals and start-of-code.
>
will fix.
> > + rcu_read_lock();
> > + id = css_id(&mc.from->css);
> > + rcu_read_unlock();
> > + if (id == lookup_swap_cgroup(ent)) {
> > + ret = MC_TARGET_SWAP;
> > + if (target)
> > + target->ent = ent;
> > + }
>
> Again, when we use `id', the lock has been dropped. The value which
> css_id() returned might no longer be correct.
>
>
will fix.
>
> The merge of this patch caused rejections in -mm's
> memcg-clean-up-move-charge.patch (at least). It may have caused more,
> I haven't checked yet. The code I have here now does:
>
> if (ent.val && !ret) {
> unsigned short id;
>
> rcu_read_lock();
> id = css_id(&mc.from->css);
> rcu_read_unlock();
> if (id == lookup_swap_cgroup(ent)) {
> ret = MC_TARGET_SWAP;
> if (target)
> target->ent = ent;
> }
> }
>
> however I suspect it would be saner to do
>
> if (ent.val && !ret) {
> rcu_read_lock();
> if (css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
> ret = MC_TARGET_SWAP;
> if (target)
> target->ent = ent;
> }
> rcu_read_unlock();
> }
>
I'll prepare for -rc6 patch and for -mm patch.
> However this still doesn't make a lot of sense because three nanoseonds
> after we've done rcu_read_unlock(), the value of
>
> css_id(&mc.from->css) == lookup_swap_cgroup(ent)
>
> might have changed. So I'd ask the memcg guys to have a more serious
> think about all of this please. I get the feeling that the original
> patch just splattered rcu_read_lock() around the place to silence a
> runtime warning without digging into what the code is really supposed
> to be doing.
>
In most case, they are intentional and we have reference count of css.
I can think of
- css_id_rcu() .... use rcu_dereference().
- css_id() ... don't use rcu_dereference().
But this seems crazy.
> The mem_cgroup_move_swap_account() would benefit from some attention
> also please.
ok, I'll rewrite. If I find that I can't avoid rejection to -mm, I'll make
a patch for -rc6 to do minimal fixes. And add a patch for fixining remaining
things to -mm.
Thanks,
-Kame
^ permalink raw reply [flat|nested] 16+ messages in thread* [BUGFIX][PATCH 1/2] cgroup/cssid/memcg rcu fixes. (Was Re: [PATCH tip/core/urgent 08/10] memcg: css_id() must be called under rcu_read_lock()
2010-05-10 0:17 ` KAMEZAWA Hiroyuki
@ 2010-05-10 5:46 ` KAMEZAWA Hiroyuki
2010-05-10 5:48 ` [BUGFIX][PATCH 2/2] " KAMEZAWA Hiroyuki
0 siblings, 1 reply; 16+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-05-10 5:46 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: Andrew Morton, Paul E. McKenney, linux-kernel, mingo, laijs,
dipankar, mathieu.desnoyers, josh, dvhltc, niv, tglx, peterz,
rostedt, Valdis.Kletnieks, dhowells, eric.dumazet,
Daisuke Nishimura, Balbir Singh
Hi, Sorry for my lack of study...maybe this is an acceptable one.
This patch fixes css_id's RCU check and revert a patch already merged.
This is based on linux-2.6.34-rc7.
-Kame
==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Commit ad4ba375373937817404fd92239ef4cadbded23b modifies memcontol.c
for fixing RCU check message. But Andrew Morton pointed out that
the fix doesn't seems sane and it was just for hidining lockdep
messages.
This is a patch for do proper things. Checking again, all places, accessing
without rcu_read_lock, that commit fixies was intentional....
all callers of css_id() has reference count on it.
So, it's not necessary to be under rcu_read_lock().
Considering again, we can use rcu_dereference_check for css_id().
We know css->id is valid if css->refcnt > 0.
(css->id never changes and freed after css->refcnt going to be 0.)
This patch makes use of rcu_dereference_check() in css_id/depth and
remove unnecessary rcu-read-lock added by the commit.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
kernel/cgroup.c | 15 +++++++++++++--
mm/memcontrol.c | 19 +++++--------------
2 files changed, 18 insertions(+), 16 deletions(-)
Index: linux-2.6.34-rc7/kernel/cgroup.c
===================================================================
--- linux-2.6.34-rc7.orig/kernel/cgroup.c
+++ linux-2.6.34-rc7/kernel/cgroup.c
@@ -4435,7 +4435,15 @@ __setup("cgroup_disable=", cgroup_disabl
*/
unsigned short css_id(struct cgroup_subsys_state *css)
{
- struct css_id *cssid = rcu_dereference(css->id);
+ struct css_id *cssid;
+
+ /*
+ * This css_id() can return correct value when somone has refcnt
+ * on this or this is under rcu_read_lock(). Once css->id is allocated,
+ * it's unchanged until freed.
+ */
+ cssid = rcu_dereference_check(css->id,
+ rcu_read_lock_held() || atomic_read(&css->refcnt));
if (cssid)
return cssid->id;
@@ -4445,7 +4453,10 @@ EXPORT_SYMBOL_GPL(css_id);
unsigned short css_depth(struct cgroup_subsys_state *css)
{
- struct css_id *cssid = rcu_dereference(css->id);
+ struct css_id *cssid;
+
+ cssid = rcu_dereference_check(css->id,
+ rcu_read_lock_held() || atomic_read(&css->refcnt));
if (cssid)
return cssid->depth;
Index: linux-2.6.34-rc7/mm/memcontrol.c
===================================================================
--- linux-2.6.34-rc7.orig/mm/memcontrol.c
+++ linux-2.6.34-rc7/mm/memcontrol.c
@@ -2314,9 +2314,7 @@ mem_cgroup_uncharge_swapcache(struct pag
/* record memcg information */
if (do_swap_account && swapout && memcg) {
- rcu_read_lock();
swap_cgroup_record(ent, css_id(&memcg->css));
- rcu_read_unlock();
mem_cgroup_get(memcg);
}
if (swapout && memcg)
@@ -2373,10 +2371,8 @@ static int mem_cgroup_move_swap_account(
{
unsigned short old_id, new_id;
- rcu_read_lock();
old_id = css_id(&from->css);
new_id = css_id(&to->css);
- rcu_read_unlock();
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
mem_cgroup_swap_statistics(from, false);
@@ -4044,16 +4040,11 @@ static int is_target_pte_for_mc(struct v
put_page(page);
}
/* throught */
- if (ent.val && do_swap_account && !ret) {
- unsigned short id;
- rcu_read_lock();
- id = css_id(&mc.from->css);
- rcu_read_unlock();
- if (id == lookup_swap_cgroup(ent)) {
- ret = MC_TARGET_SWAP;
- if (target)
- target->ent = ent;
- }
+ if (ent.val && do_swap_account && !ret &&
+ css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
+ ret = MC_TARGET_SWAP;
+ if (target)
+ target->ent = ent;
}
return ret;
}
^ permalink raw reply [flat|nested] 16+ messages in thread* [BUGFIX][PATCH 2/2] cgroup/cssid/memcg rcu fixes. (Was Re: [PATCH tip/core/urgent 08/10] memcg: css_id() must be called under rcu_read_lock()
2010-05-10 5:46 ` [BUGFIX][PATCH 1/2] cgroup/cssid/memcg rcu fixes. (Was " KAMEZAWA Hiroyuki
@ 2010-05-10 5:48 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 16+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-05-10 5:48 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: Andrew Morton, Paul E. McKenney, linux-kernel, mingo, laijs,
dipankar, mathieu.desnoyers, josh, dvhltc, niv, tglx, peterz,
rostedt, Valdis.Kletnieks, dhowells, eric.dumazet,
Daisuke Nishimura, Balbir Singh
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Some callers (in memcontrol.c) calls css_is_ancestor() without
rcu_read_lock. Because css_is_ancestor() has to access RCU protected
data, it should be under rcu_read_lock().
This makes css_is_ancestor() itself does safe access to RCU protected
area. (At least, "root" can have refcnt==0 if it's not an ancestor of
"child". So, we need rcu_read_lock().)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
kernel/cgroup.c | 31 ++++++++++++++++++++++++++-----
mm/memcontrol.c | 4 ----
2 files changed, 26 insertions(+), 9 deletions(-)
Index: linux-2.6.34-rc7/kernel/cgroup.c
===================================================================
--- linux-2.6.34-rc7.orig/kernel/cgroup.c
+++ linux-2.6.34-rc7/kernel/cgroup.c
@@ -4464,15 +4464,36 @@ unsigned short css_depth(struct cgroup_s
}
EXPORT_SYMBOL_GPL(css_depth);
+/**
+ * css_is_ancestor - test "root" css is an ancestor of "child"
+ * @child: the css to be tested.
+ * @root: the css supporsed to be an ancestor of the child.
+ *
+ * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
+ * this function reads css->id, this use rcu_dereference() and rcu_read_lock().
+ * But, considering usual usage, the csses should be valid objects after test.
+ * Assuming that the caller will do some action to the child if this returns
+ * returns true, the caller must take "child";s reference count.
+ * If "child" is valid object and this returns true, "root" is valid, too.
+ */
+
bool css_is_ancestor(struct cgroup_subsys_state *child,
const struct cgroup_subsys_state *root)
{
- struct css_id *child_id = rcu_dereference(child->id);
- struct css_id *root_id = rcu_dereference(root->id);
+ struct css_id *child_id;
+ struct css_id *root_id;
+ bool ret = true;
- if (!child_id || !root_id || (child_id->depth < root_id->depth))
- return false;
- return child_id->stack[root_id->depth] == root_id->id;
+ rcu_read_lock();
+ child_id = rcu_dereference(child->id);
+ root_id = rcu_dereference(root->id);
+ if (!child_id
+ || !root_id
+ || (child_id->depth < root_id->depth)
+ || (child_id->stack[root_id->depth] != root_id->id))
+ ret = false;
+ rcu_read_unlock();
+ return ret;
}
static void __free_css_id_cb(struct rcu_head *head)
Index: linux-2.6.34-rc7/mm/memcontrol.c
===================================================================
--- linux-2.6.34-rc7.orig/mm/memcontrol.c
+++ linux-2.6.34-rc7/mm/memcontrol.c
@@ -811,12 +811,10 @@ int task_in_mem_cgroup(struct task_struc
* enabled in "curr" and "curr" is a child of "mem" in *cgroup*
* hierarchy(even if use_hierarchy is disabled in "mem").
*/
- rcu_read_lock();
if (mem->use_hierarchy)
ret = css_is_ancestor(&curr->css, &mem->css);
else
ret = (curr == mem);
- rcu_read_unlock();
css_put(&curr->css);
return ret;
}
@@ -1603,7 +1601,6 @@ static int __mem_cgroup_try_charge(struc
* There is a small race that "from" or "to" can be
* freed by rmdir, so we use css_tryget().
*/
- rcu_read_lock();
from = mc.from;
to = mc.to;
if (from && css_tryget(&from->css)) {
@@ -1624,7 +1621,6 @@ static int __mem_cgroup_try_charge(struc
do_continue = (to == mem_over_limit);
css_put(&to->css);
}
- rcu_read_unlock();
if (do_continue) {
DEFINE_WAIT(wait);
prepare_to_wait(&mc.waitq, &wait,
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH tip/core/urgent 09/10] blk-cgroup: Fix RCU correctness warning in cfq_init_queue()
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
` (7 preceding siblings ...)
2010-05-03 18:53 ` [PATCH tip/core/urgent 08/10] memcg: css_id() must be called under rcu_read_lock() Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
2010-05-03 18:53 ` [PATCH tip/core/urgent 10/10] vfs: fix RCU-lockdep false positive due to /proc access Paul E. McKenney
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Vivek Goyal, Paul E. McKenney, Jens Axboe
From: Vivek Goyal <vgoyal@redhat.com>
It is necessary to be in an RCU read-side critical section when invoking
css_id(), so this patch adds one to blkiocg_add_blkio_group(). This is
actually a false positive, because this is called at initialization time
and hence always refers to the root cgroup, which cannot go away.
[ 103.790505] ===================================================
[ 103.790509] [ INFO: suspicious rcu_dereference_check() usage. ]
[ 103.790511] ---------------------------------------------------
[ 103.790514] kernel/cgroup.c:4432 invoked rcu_dereference_check() without protection!
[ 103.790517]
[ 103.790517] other info that might help us debug this:
[ 103.790519]
[ 103.790521]
[ 103.790521] rcu_scheduler_active = 1, debug_locks = 1
[ 103.790524] 4 locks held by bash/4422:
[ 103.790526] #0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8114befa>] sysfs_write_file+0x3c/0x144
[ 103.790537] #1: (s_active#102){.+.+.+}, at: [<ffffffff8114bfa5>] sysfs_write_file+0xe7/0x144
[ 103.790544] #2: (&q->sysfs_lock){+.+.+.}, at: [<ffffffff812263b1>] queue_attr_store+0x49/0x8f
[ 103.790552] #3: (&(&blkcg->lock)->rlock){......}, at: [<ffffffff8122e4db>] blkiocg_add_blkio_group+0x2b/0xad
[ 103.790560]
[ 103.790561] stack backtrace:
[ 103.790564] Pid: 4422, comm: bash Not tainted 2.6.34-rc4-blkio-second-crash #81
[ 103.790567] Call Trace:
[ 103.790572] [<ffffffff81068f57>] lockdep_rcu_dereference+0x9d/0xa5
[ 103.790577] [<ffffffff8107fac1>] css_id+0x44/0x57
[ 103.790581] [<ffffffff8122e503>] blkiocg_add_blkio_group+0x53/0xad
[ 103.790586] [<ffffffff81231936>] cfq_init_queue+0x139/0x32c
[ 103.790591] [<ffffffff8121f2d0>] elv_iosched_store+0xbf/0x1bf
[ 103.790595] [<ffffffff812263d8>] queue_attr_store+0x70/0x8f
[ 103.790599] [<ffffffff8114bfa5>] ? sysfs_write_file+0xe7/0x144
[ 103.790603] [<ffffffff8114bfc6>] sysfs_write_file+0x108/0x144
[ 103.790609] [<ffffffff810f527f>] vfs_write+0xae/0x10b
[ 103.790612] [<ffffffff81069863>] ? trace_hardirqs_on_caller+0x10c/0x130
[ 103.790616] [<ffffffff810f539c>] sys_write+0x4a/0x6e
[ 103.790622] [<ffffffff81002b5b>] system_call_fastpath+0x16/0x1b
[ 103.790625]
Located-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
---
block/cfq-iosched.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 838834b..5f127cf 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3694,8 +3694,10 @@ static void *cfq_init_queue(struct request_queue *q)
* to make sure that cfq_put_cfqg() does not try to kfree root group
*/
atomic_set(&cfqg->ref, 1);
+ rcu_read_lock();
blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd,
0);
+ rcu_read_unlock();
#endif
/*
* Not strictly needed (since RB_ROOT just clears the node and we
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH tip/core/urgent 10/10] vfs: fix RCU-lockdep false positive due to /proc access
2010-05-03 18:52 [PATCH tip/core/urgent 0/10] v3: Fix RCU lockdep splats Paul E. McKenney
` (8 preceding siblings ...)
2010-05-03 18:53 ` [PATCH tip/core/urgent 09/10] blk-cgroup: Fix RCU correctness warning in cfq_init_queue() Paul E. McKenney
@ 2010-05-03 18:53 ` Paul E. McKenney
9 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2010-05-03 18:53 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
eric.dumazet, Paul E. McKenney, Al Viro
If a single-threaded process does a file-descriptor operation, and
some other process accesses that same file descriptor via /proc,
the current rcu_dereference_check_fdtable() can give a false-positive
RCU-lockdep splat due to the reference count being increased by the
/proc access after the reference-count check in fget_light() but before
the check in rcu_dereference_check_fdtable().
This commit prevents this false positive by checking for a single-threaded
process. To avoid #include hell, this commit also introduces an
rcu_my_thread_group_empty() as a wrapper for thread_group_empty(current).
Located-by: Miles Lane <miles.lane@gmail.com>
Located-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
---
include/linux/fdtable.h | 3 ++-
include/linux/rcupdate.h | 2 ++
kernel/rcupdate.c | 11 +++++++++++
3 files changed, 15 insertions(+), 1 deletions(-)
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 013dc52..d147461 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -61,7 +61,8 @@ struct files_struct {
(rcu_dereference_check((fdtfd), \
rcu_read_lock_held() || \
lockdep_is_held(&(files)->file_lock) || \
- atomic_read(&(files)->count) == 1))
+ atomic_read(&(files)->count) == 1 || \
+ rcu_my_thread_group_empty()))
#define files_fdtable(files) \
(rcu_dereference_check_fdtable((files), (files)->fdt))
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ec9ab49..4dca275 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -190,6 +190,8 @@ static inline int rcu_read_lock_sched_held(void)
#ifdef CONFIG_PROVE_RCU
+extern int rcu_my_thread_group_empty(void);
+
#define __do_rcu_dereference_check(c) \
do { \
static bool __warned; \
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 03a7ea1..49d808e 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -122,3 +122,14 @@ void wakeme_after_rcu(struct rcu_head *head)
rcu = container_of(head, struct rcu_synchronize, head);
complete(&rcu->completion);
}
+
+#ifdef CONFIG_PROVE_RCU
+/*
+ * wrapper function to avoid #include problems.
+ */
+int rcu_my_thread_group_empty(void)
+{
+ return thread_group_empty(current);
+}
+EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
+#endif /* #ifdef CONFIG_PROVE_RCU */
--
1.7.0
^ permalink raw reply related [flat|nested] 16+ messages in thread