mutex warning in cpufreq + RFC patch

linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* mutex warning in cpufreq + RFC patch
@ 2013-08-28  2:57 Stephen Boyd
  2013-08-28  6:58 ` Viresh Kumar
  0 siblings, 1 reply; 12+ messages in thread
From: Stephen Boyd @ 2013-08-28  2:57 UTC (permalink / raw)
  To: Viresh Kumar, Rafael J . Wysocki; +Cc: linux-kernel, cpufreq, linux-pm

I'm running this simple test code in a shell on my 3.10 kernel and running
into this warning rather quickly.

	cd /sys/devices/system/cpu/cpu1
	while true
	do
	echo 0 > online
	echo 1 > online
	done &
	while true
	do
	echo 300000 > cpufreq/scaling_min_freq
	echo 1000000 > cpufreq/scaling_min_freq
	done

(Note you should place valid values for min/max freq in the example
above.)

WARNING: at kernel/mutex.c:341 __mutex_lock_slowpath+0x14c/0x410()              DEBUG_LOCKS_WARN_ON(l->magic != l)
Modules linked in:                                                              CPU: 0 PID: 1960 Comm: sh Tainted: G        W    3.10.0 #32                     [<c010c178>] (unwind_backtrace+0x0/0x11c) from [<c0109dec>] (show_stack+0x10/0x14)                                                                              [<c0109dec>] (show_stack+0x10/0x14) from [<c01904cc>] (warn_slowpath_common+0x4c/0x6c)                                                                          [<c01904cc>] (warn_slowpath_common+0x4c/0x6c) from [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c)                                                                   [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c) from [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410)                                                                
 [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410) from [<c08a0618>] (mutex_lock+0x20/0x3c)                                                                       [<c08a0618>] (mutex_lock+0x20/0x3c) from [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8)                                                                        [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8) from [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4)                                                               [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4) from [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0)                                                               [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c)                                                          
   [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c) from [<c0633ae4>] (store+0x58/0x90)                                                                             [<c0633ae4>] (store+0x58/0x90) from [<c02a69d4>] (sysfs_write_file+0x100/0x148)
[<c02a69d4>] (sysfs_write_file+0x100/0x148) from [<c0255c18>] (vfs_write+0xcc/0x174)
[<c0255c18>] (vfs_write+0xcc/0x174) from [<c0255f70>] (SyS_write+0x38/0x64)     [<c0255f70>] (SyS_write+0x38/0x64) from [<c0106120>] (ret_fast_syscall+0x0/0x30)

This is happening because the governor is stopped via hotplug and
while we're in the middle of touching the scaling_min_freq file.
When the governor is stopped we destroy the timer_mutex that the
scaling_min_freq thread is just about to acquire. From what I can
tell, we shouldn't be stopping the governor until after the
kobjects go away or we should start and stop the governor while
holding the policy semaphore otherwise userspace can come in and
use uninitialized things. I have this hack which seems to mostly
work. Thoughts?

----8<----
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index cbfe3c1..134004b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -823,11 +823,11 @@ static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling,
 	policy = cpufreq_cpu_get(sibling);
 	WARN_ON(!policy);
 
+	lock_policy_rwsem_write(sibling);
+
 	if (has_target)
 		__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 
-	lock_policy_rwsem_write(sibling);
-
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 
 	cpumask_set_cpu(cpu, policy->cpus);
@@ -835,12 +835,11 @@ static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling,
 	per_cpu(cpufreq_cpu_data, cpu) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	unlock_policy_rwsem_write(sibling);
-
 	if (has_target) {
 		__cpufreq_governor(policy, CPUFREQ_GOV_START);
 		__cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 	}
+	unlock_policy_rwsem_write(sibling);
 
 	ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
 	if (ret) {
@@ -1037,9 +1036,6 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 		return -EINVAL;
 	}
 
-	if (cpufreq_driver->target)
-		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
-
 #ifdef CONFIG_HOTPLUG_CPU
 	if (!cpufreq_driver->setpolicy)
 		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
@@ -1048,9 +1044,6 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 
 	WARN_ON(lock_policy_rwsem_write(cpu));
 	cpus = cpumask_weight(data->cpus);
-
-	if (cpus > 1)
-		cpumask_clear_cpu(cpu, data->cpus);
 	unlock_policy_rwsem_write(cpu);
 
 	if (cpu != data->cpu) {
@@ -1086,9 +1079,6 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 1) {
-		if (cpufreq_driver->target)
-			__cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
-
 		lock_policy_rwsem_read(cpu);
 		kobj = &data->kobj;
 		cmp = &data->kobj_unregister;
@@ -1103,6 +1093,11 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 		wait_for_completion(cmp);
 		pr_debug("wait complete\n");
 
+		if (cpufreq_driver->target) {
+			__cpufreq_governor(data, CPUFREQ_GOV_STOP);
+			__cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
+		}
+
 		if (cpufreq_driver->exit)
 			cpufreq_driver->exit(data);
 
@@ -1113,8 +1108,13 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 		pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
 		cpufreq_cpu_put(data);
 		if (cpufreq_driver->target) {
+			WARN_ON(lock_policy_rwsem_write(cpu));
+			__cpufreq_governor(data, CPUFREQ_GOV_STOP);
+			if (cpus > 1)
+				cpumask_clear_cpu(cpu, data->cpus);
 			__cpufreq_governor(data, CPUFREQ_GOV_START);
 			__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+			unlock_policy_rwsem_write(cpu);
 		}
 	}
 

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-08-28  2:57 mutex warning in cpufreq + RFC patch Stephen Boyd
@ 2013-08-28  6:58 ` Viresh Kumar
  2013-08-28 16:52   ` Stephen Boyd
  0 siblings, 1 reply; 12+ messages in thread
From: Viresh Kumar @ 2013-08-28  6:58 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: Rafael J . Wysocki, Linux Kernel Mailing List,
	cpufreq@vger.kernel.org, linux-pm@vger.kernel.org

Hi Stephen,

On 28 August 2013 08:27, Stephen Boyd <sboyd@codeaurora.org> wrote:
> I'm running this simple test code in a shell on my 3.10 kernel and running
> into this warning rather quickly.
>
>         cd /sys/devices/system/cpu/cpu1
>         while true
>         do
>         echo 0 > online
>         echo 1 > online
>         done &
>         while true
>         do
>         echo 300000 > cpufreq/scaling_min_freq
>         echo 1000000 > cpufreq/scaling_min_freq
>         done
>
> (Note you should place valid values for min/max freq in the example
> above.)
>
> WARNING: at kernel/mutex.c:341 __mutex_lock_slowpath+0x14c/0x410()              DEBUG_LOCKS_WARN_ON(l->magic != l)
> Modules linked in:                                                              CPU: 0 PID: 1960 Comm: sh Tainted: G        W    3.10.0 #32                     [<c010c178>] (unwind_backtrace+0x0/0x11c) from [<c0109dec>] (show_stack+0x10/0x14)                                                                              [<c0109dec>] (show_stack+0x10/0x14) from [<c01904cc>] (warn_slowpath_common+0x4c/0x6c)                                                                          [<c01904cc>] (warn_slowpath_common+0x4c/0x6c) from [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c)                                                                   [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c) from [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410)                                                              
   [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410) from [<c08a0618>] (mutex_lock+0x20/0x3c)                                                                       [<c08a0618>] (mutex_lock+0x20/0x3c) from [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8)                                                                        [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8) from [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4)                                                               [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4) from [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0)                                                               [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c)                                                        
     [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c) from [<c0633ae4>] (store+0x58/0x90)                                                                             [<c0633ae4>] (store+0x58/0x90) from [<c02a69d4>] (sysfs_write_file+0x100/0x148)
> [<c02a69d4>] (sysfs_write_file+0x100/0x148) from [<c0255c18>] (vfs_write+0xcc/0x174)
> [<c0255c18>] (vfs_write+0xcc/0x174) from [<c0255f70>] (SyS_write+0x38/0x64)     [<c0255f70>] (SyS_write+0x38/0x64) from [<c0106120>] (ret_fast_syscall+0x0/0x30)
>
> This is happening because the governor is stopped via hotplug and
> while we're in the middle of touching the scaling_min_freq file.
> When the governor is stopped we destroy the timer_mutex that the
> scaling_min_freq thread is just about to acquire. From what I can
> tell, we shouldn't be stopping the governor until after the
> kobjects go away or we should start and stop the governor while
> holding the policy semaphore otherwise userspace can come in and
> use uninitialized things. I have this hack which seems to mostly
> work. Thoughts?

I haven't gone through the hack yet, but I am trying to understand the
problem first.. There had been some work in the past around this
kind of scenarios..

commit 95731ebb114c5f0c028459388560fc2a72fe5049
Author: Xiaoguang Chen <chenxg@marvell.com>
Date:   Wed Jun 19 15:00:07 2013 +0800

    cpufreq: Fix governor start/stop race condition


The problem probably is poor error checking which is still present at
few places, in __cpufreq_set_policy() routine..

Can you try after fixing them? Something similar has to be done..

commit 3de9bdeb28638e164d1f0eb38dd68e3f5d2ac95c
Author: Viresh Kumar <viresh.kumar@linaro.org>
Date:   Tue Aug 6 22:53:13 2013 +0530

    cpufreq: improve error checking on return values of __cpufreq_governor()

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-08-28  6:58 ` Viresh Kumar
@ 2013-08-28 16:52   ` Stephen Boyd
  2013-08-29  8:37     ` Viresh Kumar
  0 siblings, 1 reply; 12+ messages in thread
From: Stephen Boyd @ 2013-08-28 16:52 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Linux Kernel Mailing List,
	cpufreq@vger.kernel.org, linux-pm@vger.kernel.org

On 08/27/13 23:58, Viresh Kumar wrote:
> I haven't gone through the hack yet, but I am trying to understand the
> problem first.. There had been some work in the past around this
> kind of scenarios..
>
> commit 95731ebb114c5f0c028459388560fc2a72fe5049
> Author: Xiaoguang Chen <chenxg@marvell.com>
> Date:   Wed Jun 19 15:00:07 2013 +0800
>
>     cpufreq: Fix governor start/stop race condition
>
>
> The problem probably is poor error checking which is still present at
> few places, in __cpufreq_set_policy() routine..
>
> Can you try after fixing them? Something similar has to be done..
>
> commit 3de9bdeb28638e164d1f0eb38dd68e3f5d2ac95c
> Author: Viresh Kumar <viresh.kumar@linaro.org>
> Date:   Tue Aug 6 22:53:13 2013 +0530
>
>     cpufreq: improve error checking on return values of __cpufreq_governor()

No the problem isn't poor error checking. The problem is between
gov_stop and gov_start userspace can come in and write scaling_min_freq
which will try to acquire the mutex (sorry the copy paste of the error
got messed up so I've repasted it).

WARNING: at kernel/mutex.c:341 __mutex_lock_slowpath+0x14c/0x410()              
DEBUG_LOCKS_WARN_ON(l->magic != l)
Modules linked in:                                                              CPU: 0 PID: 1960 Comm: sh Tainted: G        W    3.10.0 #32                     
[<c010c178>] (unwind_backtrace+0x0/0x11c) from [<c0109dec>] (show_stack+0x10/0x14)
[<c0109dec>] (show_stack+0x10/0x14) from [<c01904cc>] (warn_slowpath_common+0x4c/0x6c)
[<c01904cc>] (warn_slowpath_common+0x4c/0x6c) from [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c)
[<c019056c>] (warn_slowpath_fmt+0x2c/0x3c) from [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410)
[<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410) from [<c08a0618>] (mutex_lock+0x20/0x3c)
[<c08a0618>] (mutex_lock+0x20/0x3c) from [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8)
[<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8) from [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4)
[<c06325b0>] (__cpufreq_governor+0xdc/0x1a4) from [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0)
[<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c)
[<c0632ea0>] (store_scaling_min_freq+0x80/0x9c) from [<c0633ae4>] (store+0x58/0x90)
[<c0633ae4>] (store+0x58/0x90) from [<c02a69d4>] (sysfs_write_file+0x100/0x148)
[<c02a69d4>] (sysfs_write_file+0x100/0x148) from [<c0255c18>] (vfs_write+0xcc/0x174)
[<c0255c18>] (vfs_write+0xcc/0x174) from [<c0255f70>] (SyS_write+0x38/0x64)
[<c0255f70>] (SyS_write+0x38/0x64) from [<c0106120>] (ret_fast_syscall+0x0/0x30)


I've applied these patches on top of v3.10

f51e1eb63d9c28cec188337ee656a13be6980cfd (cpufreq: Fix cpufreq regression after suspend/resume
aae760ed21cd690fe8a6db9f3a177ad55d7e12ab (cpufreq: Revert commit a66b2e to fix suspend/resume regression)
e8d05276f236ee6435e78411f62be9714e0b9377 (cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression) 
2a99859932281ed6c2ecdd988855f8f6838f6743 (cpufreq: Fix cpufreq driver module refcount balance after suspend/resume)
419e172145cf6c51d436a8bf4afcd17511f0ff79 (cpufreq: don't leave stale policy pointer in cdbs->cur_policy)
95731ebb114c5f0c028459388560fc2a72fe5049 (cpufreq: Fix governor start/stop race condition)

That second to last one causes a NULL pointer exception after the mutex
warning above because the limits case does

    if (policy->max < cpu_cdbs->cur_policy->cur)

and that dereferences a NULL cur_policy pointer.

Are there any fixes that I'm missing? I see that some things are
changing in linux-next but they don't look like fixes, more like
optimizations.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-08-28 16:52   ` Stephen Boyd
@ 2013-08-29  8:37     ` Viresh Kumar
  2013-08-29  8:39       ` Viresh Kumar
  2013-08-31  0:36       ` Stephen Boyd
  0 siblings, 2 replies; 12+ messages in thread
From: Viresh Kumar @ 2013-08-29  8:37 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: Rafael J . Wysocki, Linux Kernel Mailing List,
	cpufreq@vger.kernel.org, linux-pm@vger.kernel.org, Kukjin Kim

On 28 August 2013 22:22, Stephen Boyd <sboyd@codeaurora.org> wrote:
> On 08/27/13 23:58, Viresh Kumar wrote:
>> I haven't gone through the hack yet, but I am trying to understand the
>> problem first.. There had been some work in the past around this
>> kind of scenarios..
>>
>> commit 95731ebb114c5f0c028459388560fc2a72fe5049
>> Author: Xiaoguang Chen <chenxg@marvell.com>
>> Date:   Wed Jun 19 15:00:07 2013 +0800
>>
>>     cpufreq: Fix governor start/stop race condition
>>
>>
>> The problem probably is poor error checking which is still present at
>> few places, in __cpufreq_set_policy() routine..
>>
>> Can you try after fixing them? Something similar has to be done..
>>
>> commit 3de9bdeb28638e164d1f0eb38dd68e3f5d2ac95c
>> Author: Viresh Kumar <viresh.kumar@linaro.org>
>> Date:   Tue Aug 6 22:53:13 2013 +0530
>>
>>     cpufreq: improve error checking on return values of __cpufreq_governor()
>
> No the problem isn't poor error checking. The problem is between
> gov_stop and gov_start userspace can come in and write scaling_min_freq
> which will try to acquire the mutex (sorry the copy paste of the error
> got messed up so I've repasted it).
>
> WARNING: at kernel/mutex.c:341 __mutex_lock_slowpath+0x14c/0x410()
> DEBUG_LOCKS_WARN_ON(l->magic != l)
> Modules linked in:                                                              CPU: 0 PID: 1960 Comm: sh Tainted: G        W    3.10.0 #32
> [<c010c178>] (unwind_backtrace+0x0/0x11c) from [<c0109dec>] (show_stack+0x10/0x14)
> [<c0109dec>] (show_stack+0x10/0x14) from [<c01904cc>] (warn_slowpath_common+0x4c/0x6c)
> [<c01904cc>] (warn_slowpath_common+0x4c/0x6c) from [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c)
> [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c) from [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410)
> [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410) from [<c08a0618>] (mutex_lock+0x20/0x3c)
> [<c08a0618>] (mutex_lock+0x20/0x3c) from [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8)
> [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8) from [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4)
> [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4) from [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0)
> [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c)
> [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c) from [<c0633ae4>] (store+0x58/0x90)
> [<c0633ae4>] (store+0x58/0x90) from [<c02a69d4>] (sysfs_write_file+0x100/0x148)
> [<c02a69d4>] (sysfs_write_file+0x100/0x148) from [<c0255c18>] (vfs_write+0xcc/0x174)
> [<c0255c18>] (vfs_write+0xcc/0x174) from [<c0255f70>] (SyS_write+0x38/0x64)
> [<c0255f70>] (SyS_write+0x38/0x64) from [<c0106120>] (ret_fast_syscall+0x0/0x30)
>
>
> I've applied these patches on top of v3.10
>
> f51e1eb63d9c28cec188337ee656a13be6980cfd (cpufreq: Fix cpufreq regression after suspend/resume
> aae760ed21cd690fe8a6db9f3a177ad55d7e12ab (cpufreq: Revert commit a66b2e to fix suspend/resume regression)
> e8d05276f236ee6435e78411f62be9714e0b9377 (cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression)
> 2a99859932281ed6c2ecdd988855f8f6838f6743 (cpufreq: Fix cpufreq driver module refcount balance after suspend/resume)
> 419e172145cf6c51d436a8bf4afcd17511f0ff79 (cpufreq: don't leave stale policy pointer in cdbs->cur_policy)
> 95731ebb114c5f0c028459388560fc2a72fe5049 (cpufreq: Fix governor start/stop race condition)
>
> That second to last one causes a NULL pointer exception after the mutex
> warning above because the limits case does
>
>     if (policy->max < cpu_cdbs->cur_policy->cur)
>
> and that dereferences a NULL cur_policy pointer.

I have seen something similar and the error checking patch that
I mentioned earlier came as solution to that only..

> Are there any fixes that I'm missing? I see that some things are
> changing in linux-next but they don't look like fixes, more like
> optimizations.

Getting patches over 3.10 would be tricky.. You are two kernel
version back and that's not going to help much.. There are too many
patches in between linux-next and 3.10..

I really can't tell you which specific ones to include, as I am lost in them :)

probably try to get all of them in ? i.e. All patches touching drivers/cpufreq
and include/linux/cpufreq.h..

I have got Arndale (Samsung-exnos) board where offlining CPUs is broken
@Kukjin: Can you please try to get it fixed?? It leads to crashes..

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-08-29  8:37     ` Viresh Kumar
@ 2013-08-29  8:39       ` Viresh Kumar
  2013-08-31  0:36       ` Stephen Boyd
  1 sibling, 0 replies; 12+ messages in thread
From: Viresh Kumar @ 2013-08-29  8:39 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: Rafael J . Wysocki, Linux Kernel Mailing List,
	cpufreq@vger.kernel.org, linux-pm@vger.kernel.org, Kukjin Kim

On 29 August 2013 14:07, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 28 August 2013 22:22, Stephen Boyd <sboyd@codeaurora.org> wrote:
>> On 08/27/13 23:58, Viresh Kumar wrote:
>>> I haven't gone through the hack yet, but I am trying to understand the
>>> problem first.. There had been some work in the past around this
>>> kind of scenarios..
>>>
>>> commit 95731ebb114c5f0c028459388560fc2a72fe5049
>>> Author: Xiaoguang Chen <chenxg@marvell.com>
>>> Date:   Wed Jun 19 15:00:07 2013 +0800
>>>
>>>     cpufreq: Fix governor start/stop race condition
>>>
>>>
>>> The problem probably is poor error checking which is still present at
>>> few places, in __cpufreq_set_policy() routine..
>>>
>>> Can you try after fixing them? Something similar has to be done..
>>>
>>> commit 3de9bdeb28638e164d1f0eb38dd68e3f5d2ac95c
>>> Author: Viresh Kumar <viresh.kumar@linaro.org>
>>> Date:   Tue Aug 6 22:53:13 2013 +0530
>>>
>>>     cpufreq: improve error checking on return values of __cpufreq_governor()
>>
>> No the problem isn't poor error checking. The problem is between
>> gov_stop and gov_start userspace can come in and write scaling_min_freq
>> which will try to acquire the mutex (sorry the copy paste of the error
>> got messed up so I've repasted it).

I forgot to answer here :(

I would still say this could be a problem.. Suppose one thread tried to STOP
governor and stopped it.. Now other one came and tried to STOP it, it failed
but due to poor error checking, went ahead to next step.. and then they
got into issues..

I still see this as an potential issue in this case..

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-08-29  8:37     ` Viresh Kumar
  2013-08-29  8:39       ` Viresh Kumar
@ 2013-08-31  0:36       ` Stephen Boyd
  2013-08-31  0:55         ` Rafael J. Wysocki
  2013-09-01  6:24         ` Viresh Kumar
  1 sibling, 2 replies; 12+ messages in thread
From: Stephen Boyd @ 2013-08-31  0:36 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Linux Kernel Mailing List,
	cpufreq@vger.kernel.org, linux-pm@vger.kernel.org, Kukjin Kim

On 08/29, Viresh Kumar wrote:
> On 28 August 2013 22:22, Stephen Boyd <sboyd@codeaurora.org> wrote:
> >
> > I've applied these patches on top of v3.10
> >
> > f51e1eb63d9c28cec188337ee656a13be6980cfd (cpufreq: Fix cpufreq regression after suspend/resume
> > aae760ed21cd690fe8a6db9f3a177ad55d7e12ab (cpufreq: Revert commit a66b2e to fix suspend/resume regression)
> > e8d05276f236ee6435e78411f62be9714e0b9377 (cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression)
> > 2a99859932281ed6c2ecdd988855f8f6838f6743 (cpufreq: Fix cpufreq driver module refcount balance after suspend/resume)
> > 419e172145cf6c51d436a8bf4afcd17511f0ff79 (cpufreq: don't leave stale policy pointer in cdbs->cur_policy)
> > 95731ebb114c5f0c028459388560fc2a72fe5049 (cpufreq: Fix governor start/stop race condition)
> >
> > That second to last one causes a NULL pointer exception after the mutex
> > warning above because the limits case does
> >
> >     if (policy->max < cpu_cdbs->cur_policy->cur)
> >
> > and that dereferences a NULL cur_policy pointer.
> 
> I have seen something similar and the error checking patch that
> I mentioned earlier came as solution to that only..

Yes that patch may reduce the chance of the race condition but I
don't believe it removes it entirely. I believe this bug still
exists in linux-next. Consider the scenario where CPU1 is going
down.

__cpufreq_remove_dev()
 ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
  __cpufreq_governor()
   policy->governor->governor(policy, CPUFREQ_GOV_STOP);
    cpufreq_governor_dbs()
     case CPUFREQ_GOV_STOP:
      mutex_destroy(&cpu_cdbs->timer_mutex)
      cpu_cdbs->cur_policy = NULL;
  <PREEMPT>
store()
 __cpufreq_set_policy()
  ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
   __cpufreq_governor()
    policy->governor->governor(policy, CPUFREQ_GOV_LIMITS);
     case CPUFREQ_GOV_LIMITS:
      mutex_lock(&cpu_cdbs->timer_mutex); <-- Warning (destroyed mutex)
       if (policy->max < cpu_cdbs->cur_policy->cur) <- cur_policy == NULL

Once we stop the governor I don't see how another thread can't
race in and get all the way down into the GOV_LIMITS case. Even
if we wanted to lock out that thread with some mutex or semaphore
it will have to continue running eventually and so we really need
to wait until all the sysfs files are gone before we stop the
governor (in the case of the last cpu for the policy) or we need
to stop and start the governor while holding the policy semaphore
to prevent a race.

> 
> > Are there any fixes that I'm missing? I see that some things are
> > changing in linux-next but they don't look like fixes, more like
> > optimizations.
> 
> Getting patches over 3.10 would be tricky.. You are two kernel
> version back and that's not going to help much.. There are too many
> patches in between linux-next and 3.10..
>
> 
> I really can't tell you which specific ones to include, as I am lost in them :)

That's a problem. 3.10 is the next long term stable kernel and so we need to
backport any fixes to 3.10 for the next two years. Hopefully these bugs I'm
finding in the 3.10 stable kernel's cpufreq code aren't known issues on
3.11/next.

> 
> probably try to get all of them in ? i.e. All patches touching drivers/cpufreq
> and include/linux/cpufreq.h..

I may have to try that. I got another crash below. This time
governor was assigned to NULL in cpufreq_add_dev_interface() and
then userspace came in and wrote to sampling_min_rate which tries
to use the governor pointer in __cpufreq_governor() but it's
NULL. It looks like a change silently fixed this problem by
wrapping all this code in a rwsem (6eed940 cpufreq: Use rwsem for
protecting critical sections). Should we backport that change to
3.10.x trees? Alternatively, we can reorder the creation of the
sysfs files with the policy setup during CPU up (patch below) so
that userspace can't possibly be in the kernel at this time.

I think there is also another race between the cpufreq stats and
hotplug. I'm getting a sysfs warning about creating duplicate
cpufreq/stats files and I think that's because the thread that
craeted the sysfs file is preempted before it can assign the
cpufreq_stats_table and then cpufreq_add_dev_interface() comes in
and tries to create the table a second time. I need to keep
looking at that race to better understand it.

> 
> I have got Arndale (Samsung-exnos) board where offlining CPUs is broken
> @Kukjin: Can you please try to get it fixed?? It leads to crashes..

Maybe you can reproduce this on an x86 machine? This is all
generic code.

Unable to handle kernel NULL pointer dereference at virtual address 00000020
pgd = ea46c000
[00000020] *pgd=00000000
Internal error: Oops: 5 [#1] PREEMPT SMP ARM
Modules linked in: 
CPU: 1 PID: 3665 Comm: sh Tainted: G        W    3.10.0 #37
task: ea5b2300 ti: eaa6e000 task.ti: eaa6e000
PC is at __cpufreq_governor+0x10/0x1a4
LR is at __cpufreq_set_policy+0x278/0x2c0
pc : [<c0677218>]    lr : [<c067765c>]    psr: 60000013
sp : eaa6fe40  ip : 00000000  fp : 00000000
r10: ea437ddc  r9 : c0a6d86c  r8 : eaa6ff80
r7 : 00000000  r6 : 00000000  r5 : 00000003  r4 : ea437d80
r3 : 00000000  r2 : 000493e0  r1 : 00000000  r0 : ea437d80
Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
Control: 10c5787d  Table: 2a46c06a  DAC: 00000015
Process sh (pid: 3665, stack limit = 0xeaa6e238)
Stack: (0xeaa6fe40 to 0xeaa70000)
fe40: eaa6fe74 ea437d80 00000000 c067765c ea437d80 eaa6fe74 000493e0 ea437d80
fe60: 00000007 ea8d5000 c0ffb028 c0677c38 01437300 00000002 00000002 00000000
fe80: 00000001 00000000 00229200 000493e0 00000000 000493e0 00229200 000493e0
fea0: 00000000 00000000 00000000 00000000 ffffffe0 ea437dc0 ea437dc0 c0678ef4
fec0: 000493e0 00229200 00000000 00000000 ebe297c0 ea437de0 ea437de0 c32d3050
fee0: 00000000 c0ffaf84 ebd6ce40 00000002 00000003 00000000 00000000 dead4ead
ff00: ffffffff ffffffff ea437e14 ea437e14 00000007 ea437d80 ea8d5000 c06788fc
ff20: 00000007 eab7ecc0 ebe26e00 00000007 ebe26e18 c02ae634 ea437300 00000007
ff40: b85ae36c eaa6ff80 b85ae36c eaa6e000 00000007 c025d438 ea437300 b85ae36c
ff60: 00000007 00000000 00000000 ea437300 00000000 b85ae36c 00000007 c025d790
ff80: 00000000 00000000 00000007 00000003 00000007 00000001 00000004 c0106304
ffa0: 00000000 c0106180 00000003 00000007 00000001 b85ae36c 00000007 ffffffff
ffc0: 00000003 00000007 00000001 00000004 b85ae36c 00000000 00000000 00000000
ffe0: 00000000 bed834c8 b6f67d75 b6f02208 20000010 00000001 00000000 00000000
[<c0677218>] (__cpufreq_governor+0x10/0x1a4) from [<c067765c>] (__cpufreq_set_policy+0x278/0x2c0)
[<c067765c>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0677c38>] (store_scaling_min_freq+0x80/0x9c)
[<c0677c38>] (store_scaling_min_freq+0x80/0x9c) from [<c06788fc>] (store+0x58/0x90)
[<c06788fc>] (store+0x58/0x90) from [<c02ae634>] (sysfs_write_file+0x100/0x148)
[<c02ae634>] (sysfs_write_file+0x100/0x148) from [<c025d438>] (vfs_write+0xcc/0x174)
[<c025d438>] (vfs_write+0xcc/0x174) from [<c025d790>] (SyS_write+0x38/0x64)
[<c025d790>] (SyS_write+0x38/0x64) from [<c0106180>] (ret_fast_syscall+0x0/0x30)
Code: e92d4070 e1a05001 e5901030 e1a04000 (e5913020)

Patch is based on 3.10 plus all the patches I mentioned above.

---8<----

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index cbfe3c1..ae4b59c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -755,6 +755,29 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
 	if (ret)
 		return ret;
 
+	write_lock_irqsave(&cpufreq_driver_lock, flags);
+	for_each_cpu(j, policy->cpus) {
+		per_cpu(cpufreq_cpu_data, j) = policy;
+		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
+	}
+	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+	/* assure that the starting sequence is run in __cpufreq_set_policy */
+	policy->governor = NULL;
+
+	/* set default policy */
+	ret = __cpufreq_set_policy(policy, &new_policy);
+	policy->user_policy.policy = policy->policy;
+	policy->user_policy.governor = policy->governor;
+
+	if (ret) {
+		pr_debug("setting policy failed\n");
+		if (cpufreq_driver->exit)
+			cpufreq_driver->exit(policy);
+		return ret;
+	}
+
 	/* set up files for this cpu device */
 	drv_attr = cpufreq_driver->attr;
 	while ((drv_attr) && (*drv_attr)) {
@@ -779,31 +802,10 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
 			goto err_out_kobj_put;
 	}
 
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus) {
-		per_cpu(cpufreq_cpu_data, j) = policy;
-		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
-	}
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
 	ret = cpufreq_add_dev_symlink(cpu, policy);
 	if (ret)
 		goto err_out_kobj_put;
 
-	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
-	/* assure that the starting sequence is run in __cpufreq_set_policy */
-	policy->governor = NULL;
-
-	/* set default policy */
-	ret = __cpufreq_set_policy(policy, &new_policy);
-	policy->user_policy.policy = policy->policy;
-	policy->user_policy.governor = policy->governor;
-
-	if (ret) {
-		pr_debug("setting policy failed\n");
-		if (cpufreq_driver->exit)
-			cpufreq_driver->exit(policy);
-	}
 	return ret;
 
 err_out_kobj_put:

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-08-31  0:36       ` Stephen Boyd
@ 2013-08-31  0:55         ` Rafael J. Wysocki
  2013-08-31  0:59           ` Rafael J. Wysocki
  2013-09-01  6:24         ` Viresh Kumar
  1 sibling, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2013-08-31  0:55 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: Viresh Kumar, Linux Kernel Mailing List, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Kukjin Kim

On Friday, August 30, 2013 05:36:41 PM Stephen Boyd wrote:
> On 08/29, Viresh Kumar wrote:
> > On 28 August 2013 22:22, Stephen Boyd <sboyd@codeaurora.org> wrote:
> > >
> > > I've applied these patches on top of v3.10
> > >
> > > f51e1eb63d9c28cec188337ee656a13be6980cfd (cpufreq: Fix cpufreq regression after suspend/resume
> > > aae760ed21cd690fe8a6db9f3a177ad55d7e12ab (cpufreq: Revert commit a66b2e to fix suspend/resume regression)
> > > e8d05276f236ee6435e78411f62be9714e0b9377 (cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression)
> > > 2a99859932281ed6c2ecdd988855f8f6838f6743 (cpufreq: Fix cpufreq driver module refcount balance after suspend/resume)
> > > 419e172145cf6c51d436a8bf4afcd17511f0ff79 (cpufreq: don't leave stale policy pointer in cdbs->cur_policy)
> > > 95731ebb114c5f0c028459388560fc2a72fe5049 (cpufreq: Fix governor start/stop race condition)
> > >
> > > That second to last one causes a NULL pointer exception after the mutex
> > > warning above because the limits case does
> > >
> > >     if (policy->max < cpu_cdbs->cur_policy->cur)
> > >
> > > and that dereferences a NULL cur_policy pointer.
> > 
> > I have seen something similar and the error checking patch that
> > I mentioned earlier came as solution to that only..
> 
> Yes that patch may reduce the chance of the race condition but I
> don't believe it removes it entirely. I believe this bug still
> exists in linux-next. Consider the scenario where CPU1 is going
> down.
> 
> __cpufreq_remove_dev()
>  ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>   __cpufreq_governor()
>    policy->governor->governor(policy, CPUFREQ_GOV_STOP);
>     cpufreq_governor_dbs()
>      case CPUFREQ_GOV_STOP:
>       mutex_destroy(&cpu_cdbs->timer_mutex)
>       cpu_cdbs->cur_policy = NULL;
>   <PREEMPT>
> store()
>  __cpufreq_set_policy()
>   ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
>    __cpufreq_governor()
>     policy->governor->governor(policy, CPUFREQ_GOV_LIMITS);
>      case CPUFREQ_GOV_LIMITS:
>       mutex_lock(&cpu_cdbs->timer_mutex); <-- Warning (destroyed mutex)
>        if (policy->max < cpu_cdbs->cur_policy->cur) <- cur_policy == NULL
> 
> Once we stop the governor I don't see how another thread can't
> race in and get all the way down into the GOV_LIMITS case. Even
> if we wanted to lock out that thread with some mutex or semaphore
> it will have to continue running eventually and so we really need
> to wait until all the sysfs files are gone before we stop the
> governor (in the case of the last cpu for the policy) or we need
> to stop and start the governor while holding the policy semaphore
> to prevent a race.
> 
> > 
> > > Are there any fixes that I'm missing? I see that some things are
> > > changing in linux-next but they don't look like fixes, more like
> > > optimizations.
> > 
> > Getting patches over 3.10 would be tricky.. You are two kernel
> > version back and that's not going to help much.. There are too many
> > patches in between linux-next and 3.10..
> >
> > 
> > I really can't tell you which specific ones to include, as I am lost in them :)
> 
> That's a problem. 3.10 is the next long term stable kernel and so we need to
> backport any fixes to 3.10 for the next two years. Hopefully these bugs I'm
> finding in the 3.10 stable kernel's cpufreq code aren't known issues on
> 3.11/next.

No, they aren't.

Well, that's the main reason why I've been pushing back against more churn in
the cpuidle subsystem recently.  I think we went too far with changes that
were not entirely understood and now we're seeing the fallout.

It would be great if you could identify the 3.11 changes that fix problems
you're seeing in 3.10.y (doing a reverse bisect of drivers/cpufreq/ changes
might help, since you have reproducers it seems).

Thanks,
Rafael


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-08-31  0:55         ` Rafael J. Wysocki
@ 2013-08-31  0:59           ` Rafael J. Wysocki
  0 siblings, 0 replies; 12+ messages in thread
From: Rafael J. Wysocki @ 2013-08-31  0:59 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: Viresh Kumar, Linux Kernel Mailing List, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Kukjin Kim

On Saturday, August 31, 2013 02:55:57 AM Rafael J. Wysocki wrote:
> On Friday, August 30, 2013 05:36:41 PM Stephen Boyd wrote:
> > On 08/29, Viresh Kumar wrote:
> > > On 28 August 2013 22:22, Stephen Boyd <sboyd@codeaurora.org> wrote:
> > > >
> > > > I've applied these patches on top of v3.10
> > > >
> > > > f51e1eb63d9c28cec188337ee656a13be6980cfd (cpufreq: Fix cpufreq regression after suspend/resume
> > > > aae760ed21cd690fe8a6db9f3a177ad55d7e12ab (cpufreq: Revert commit a66b2e to fix suspend/resume regression)
> > > > e8d05276f236ee6435e78411f62be9714e0b9377 (cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression)
> > > > 2a99859932281ed6c2ecdd988855f8f6838f6743 (cpufreq: Fix cpufreq driver module refcount balance after suspend/resume)
> > > > 419e172145cf6c51d436a8bf4afcd17511f0ff79 (cpufreq: don't leave stale policy pointer in cdbs->cur_policy)
> > > > 95731ebb114c5f0c028459388560fc2a72fe5049 (cpufreq: Fix governor start/stop race condition)
> > > >
> > > > That second to last one causes a NULL pointer exception after the mutex
> > > > warning above because the limits case does
> > > >
> > > >     if (policy->max < cpu_cdbs->cur_policy->cur)
> > > >
> > > > and that dereferences a NULL cur_policy pointer.
> > > 
> > > I have seen something similar and the error checking patch that
> > > I mentioned earlier came as solution to that only..
> > 
> > Yes that patch may reduce the chance of the race condition but I
> > don't believe it removes it entirely. I believe this bug still
> > exists in linux-next. Consider the scenario where CPU1 is going
> > down.
> > 
> > __cpufreq_remove_dev()
> >  ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
> >   __cpufreq_governor()
> >    policy->governor->governor(policy, CPUFREQ_GOV_STOP);
> >     cpufreq_governor_dbs()
> >      case CPUFREQ_GOV_STOP:
> >       mutex_destroy(&cpu_cdbs->timer_mutex)
> >       cpu_cdbs->cur_policy = NULL;
> >   <PREEMPT>
> > store()
> >  __cpufreq_set_policy()
> >   ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> >    __cpufreq_governor()
> >     policy->governor->governor(policy, CPUFREQ_GOV_LIMITS);
> >      case CPUFREQ_GOV_LIMITS:
> >       mutex_lock(&cpu_cdbs->timer_mutex); <-- Warning (destroyed mutex)
> >        if (policy->max < cpu_cdbs->cur_policy->cur) <- cur_policy == NULL
> > 
> > Once we stop the governor I don't see how another thread can't
> > race in and get all the way down into the GOV_LIMITS case. Even
> > if we wanted to lock out that thread with some mutex or semaphore
> > it will have to continue running eventually and so we really need
> > to wait until all the sysfs files are gone before we stop the
> > governor (in the case of the last cpu for the policy) or we need
> > to stop and start the governor while holding the policy semaphore
> > to prevent a race.
> > 
> > > 
> > > > Are there any fixes that I'm missing? I see that some things are
> > > > changing in linux-next but they don't look like fixes, more like
> > > > optimizations.
> > > 
> > > Getting patches over 3.10 would be tricky.. You are two kernel
> > > version back and that's not going to help much.. There are too many
> > > patches in between linux-next and 3.10..
> > >
> > > 
> > > I really can't tell you which specific ones to include, as I am lost in them :)
> > 
> > That's a problem. 3.10 is the next long term stable kernel and so we need to
> > backport any fixes to 3.10 for the next two years. Hopefully these bugs I'm
> > finding in the 3.10 stable kernel's cpufreq code aren't known issues on
> > 3.11/next.
> 
> No, they aren't.
> 
> Well, that's the main reason why I've been pushing back against more churn in
> the cpuidle subsystem recently.  I think we went too far with changes that
> were not entirely understood and now we're seeing the fallout.

s/cpuidle/cpufreq/

Apparently, I'm already too tired.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-08-31  0:36       ` Stephen Boyd
  2013-08-31  0:55         ` Rafael J. Wysocki
@ 2013-09-01  6:24         ` Viresh Kumar
  2013-09-01 13:22           ` Rafael J. Wysocki
  1 sibling, 1 reply; 12+ messages in thread
From: Viresh Kumar @ 2013-09-01  6:24 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: Rafael J . Wysocki, Linux Kernel Mailing List,
	cpufreq@vger.kernel.org, linux-pm@vger.kernel.org, Kukjin Kim

On 31 August 2013 06:06, Stephen Boyd <sboyd@codeaurora.org> wrote:
> Yes that patch may reduce the chance of the race condition but I
> don't believe it removes it entirely. I believe this bug still
> exists in linux-next. Consider the scenario where CPU1 is going
> down.
>
> __cpufreq_remove_dev()
>  ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>   __cpufreq_governor()
>    policy->governor->governor(policy, CPUFREQ_GOV_STOP);
>     cpufreq_governor_dbs()
>      case CPUFREQ_GOV_STOP:
>       mutex_destroy(&cpu_cdbs->timer_mutex)
>       cpu_cdbs->cur_policy = NULL;
>   <PREEMPT>
> store()
>  __cpufreq_set_policy()
>   ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
>    __cpufreq_governor()
>     policy->governor->governor(policy, CPUFREQ_GOV_LIMITS);
>      case CPUFREQ_GOV_LIMITS:
>       mutex_lock(&cpu_cdbs->timer_mutex); <-- Warning (destroyed mutex)
>        if (policy->max < cpu_cdbs->cur_policy->cur) <- cur_policy == NULL

Some of the crashes you reported would be fixed by the patches I sent
today morning.

Let me know if anything else is left for latest linux-next...

Btw, I am facing another crash which I am not sure how to fix.. It
came with your script:

[  190.850481] ------------[ cut here ]------------
[  190.850489] WARNING: CPU: 3 PID: 14140 at
/home/arm/work/kernel/mywork/linux.git/include/linux/kref.h:47
kobject_get+0x42/0x50()
[  190.850490] Modules linked in: nfsd nfs fscache lockd arc4 iwldvm
mac80211 i915 iwlwifi drm_kms_helper nfs_acl auth_rpcgss cfg80211
sunrpc drm joyd
ev thinkpad_acpi snd_hda_codec_hdmi snd_seq_midi
snd_hda_codec_conexant oid_registry btusb snd_rawmidi snd_hda_intel
snd_hda_codec i2c_algo_bit rfcomm
 snd_seq_midi_event bnep psmouse snd_seq snd_hwdep snd_pcm bluetooth
snd_timer snd_seq_device parport_pc snd_page_alloc ppdev tpm_tis snd
soundcore lp
c_ich lp parport video serio_raw mac_hid wmi nvram binfmt_misc btrfs
raid6_pq e1000e ptp pps_core xor sdhci_pci sdhci zlib_deflate
libcrc32c
[  190.850563] CPU: 3 PID: 14140 Comm: sh Not tainted 3.11.0-rc7-custom #39
[  190.850567] Hardware name: LENOVO 4236G50/4236G50, BIOS 83ET70WW
(1.40 ) 06/12/2012
[  190.850571]  000000000000002f ffff8800c8bdfc38 ffffffff816746c3
0000000000000007
[  190.850580]  0000000000000000 ffff8800c8bdfc78 ffffffff8104cf8c
ffff88011e5f9b18
[  190.850587]  ffff880118eaf000 0000000000000001 0000000000000202
0000000000000008
[  190.850593] Call Trace:
[  190.850607]  [<ffffffff816746c3>] dump_stack+0x46/0x58
[  190.850615]  [<ffffffff8104cf8c>] warn_slowpath_common+0x8c/0xc0
[  190.850622]  [<ffffffff8104cfda>] warn_slowpath_null+0x1a/0x20
[  190.850629]  [<ffffffff81324e02>] kobject_get+0x42/0x50
[  190.850638]  [<ffffffff81533ab0>] cpufreq_cpu_get+0x80/0xc0
[  190.850647]  [<ffffffff81533c11>] cpufreq_get_policy+0x21/0x120
[  190.850655]  [<ffffffff81533fdf>] store_scaling_min_freq+0x3f/0xa0
[  190.850666]  [<ffffffff816785b6>] ? down_write+0x16/0x40
[  190.850674]  [<ffffffff81533000>] store+0x70/0xb0
[  190.850683]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
[  190.850693]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
[  190.850700]  [<ffffffff81182392>] SyS_write+0x52/0xa0
[  190.850707]  [<ffffffff81683882>] system_call_fastpath+0x16/0x1b

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-09-01  6:24         ` Viresh Kumar
@ 2013-09-01 13:22           ` Rafael J. Wysocki
  2013-09-01 16:21             ` Viresh Kumar
  0 siblings, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2013-09-01 13:22 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Stephen Boyd, Linux Kernel Mailing List, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Kukjin Kim

On Sunday, September 01, 2013 11:54:10 AM Viresh Kumar wrote:
> On 31 August 2013 06:06, Stephen Boyd <sboyd@codeaurora.org> wrote:
> > Yes that patch may reduce the chance of the race condition but I
> > don't believe it removes it entirely. I believe this bug still
> > exists in linux-next. Consider the scenario where CPU1 is going
> > down.
> >
> > __cpufreq_remove_dev()
> >  ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
> >   __cpufreq_governor()
> >    policy->governor->governor(policy, CPUFREQ_GOV_STOP);
> >     cpufreq_governor_dbs()
> >      case CPUFREQ_GOV_STOP:
> >       mutex_destroy(&cpu_cdbs->timer_mutex)
> >       cpu_cdbs->cur_policy = NULL;
> >   <PREEMPT>
> > store()
> >  __cpufreq_set_policy()
> >   ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> >    __cpufreq_governor()
> >     policy->governor->governor(policy, CPUFREQ_GOV_LIMITS);
> >      case CPUFREQ_GOV_LIMITS:
> >       mutex_lock(&cpu_cdbs->timer_mutex); <-- Warning (destroyed mutex)
> >        if (policy->max < cpu_cdbs->cur_policy->cur) <- cur_policy == NULL
> 
> Some of the crashes you reported would be fixed by the patches I sent
> today morning.
> 
> Let me know if anything else is left for latest linux-next...
> 
> Btw, I am facing another crash which I am not sure how to fix.. It
> came with your script:

This isn't a crash, but a WARN_ON_ONCE() triggering.  The comment in kref_get()
explains when that occurs, so we seem to have a race between
store_scaling_min_freq() and CPU removal.

> [  190.850481] ------------[ cut here ]------------
> [  190.850489] WARNING: CPU: 3 PID: 14140 at
> /home/arm/work/kernel/mywork/linux.git/include/linux/kref.h:47
> kobject_get+0x42/0x50()
> [  190.850490] Modules linked in: nfsd nfs fscache lockd arc4 iwldvm
> mac80211 i915 iwlwifi drm_kms_helper nfs_acl auth_rpcgss cfg80211
> sunrpc drm joyd
> ev thinkpad_acpi snd_hda_codec_hdmi snd_seq_midi
> snd_hda_codec_conexant oid_registry btusb snd_rawmidi snd_hda_intel
> snd_hda_codec i2c_algo_bit rfcomm
>  snd_seq_midi_event bnep psmouse snd_seq snd_hwdep snd_pcm bluetooth
> snd_timer snd_seq_device parport_pc snd_page_alloc ppdev tpm_tis snd
> soundcore lp
> c_ich lp parport video serio_raw mac_hid wmi nvram binfmt_misc btrfs
> raid6_pq e1000e ptp pps_core xor sdhci_pci sdhci zlib_deflate
> libcrc32c
> [  190.850563] CPU: 3 PID: 14140 Comm: sh Not tainted 3.11.0-rc7-custom #39
> [  190.850567] Hardware name: LENOVO 4236G50/4236G50, BIOS 83ET70WW
> (1.40 ) 06/12/2012
> [  190.850571]  000000000000002f ffff8800c8bdfc38 ffffffff816746c3
> 0000000000000007
> [  190.850580]  0000000000000000 ffff8800c8bdfc78 ffffffff8104cf8c
> ffff88011e5f9b18
> [  190.850587]  ffff880118eaf000 0000000000000001 0000000000000202
> 0000000000000008
> [  190.850593] Call Trace:
> [  190.850607]  [<ffffffff816746c3>] dump_stack+0x46/0x58
> [  190.850615]  [<ffffffff8104cf8c>] warn_slowpath_common+0x8c/0xc0
> [  190.850622]  [<ffffffff8104cfda>] warn_slowpath_null+0x1a/0x20
> [  190.850629]  [<ffffffff81324e02>] kobject_get+0x42/0x50
> [  190.850638]  [<ffffffff81533ab0>] cpufreq_cpu_get+0x80/0xc0
> [  190.850647]  [<ffffffff81533c11>] cpufreq_get_policy+0x21/0x120
> [  190.850655]  [<ffffffff81533fdf>] store_scaling_min_freq+0x3f/0xa0
> [  190.850666]  [<ffffffff816785b6>] ? down_write+0x16/0x40
> [  190.850674]  [<ffffffff81533000>] store+0x70/0xb0
> [  190.850683]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
> [  190.850693]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
> [  190.850700]  [<ffffffff81182392>] SyS_write+0x52/0xa0
> [  190.850707]  [<ffffffff81683882>] system_call_fastpath+0x16/0x1b
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-09-01 13:22           ` Rafael J. Wysocki
@ 2013-09-01 16:21             ` Viresh Kumar
  2013-09-03 13:18               ` Srivatsa S. Bhat
  0 siblings, 1 reply; 12+ messages in thread
From: Viresh Kumar @ 2013-09-01 16:21 UTC (permalink / raw)
  To: Rafael J. Wysocki, Srivatsa S. Bhat
  Cc: Stephen Boyd, Linux Kernel Mailing List, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org

On 1 September 2013 18:52, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Sunday, September 01, 2013 11:54:10 AM Viresh Kumar wrote:
>> Btw, I am facing another crash which I am not sure how to fix.. It
>> came with your script:
>
> This isn't a crash, but a WARN_ON_ONCE() triggering.  The comment in kref_get()
> explains when that occurs, so we seem to have a race between
> store_scaling_min_freq() and CPU removal.

Yeah, I meant the same thing.. I don't know how to solve it or what's
the correct way to solve it:

I tried this:

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 4d5723db..be2e5f4 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -200,8 +200,14 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
        if (cpufreq_driver) {
                /* get the CPU */
                policy = per_cpu(cpufreq_cpu_data, cpu);
-               if (policy)
-                       kobject_get(&policy->kobj);
+               if (policy) {
+                       cpu_hotplug_disable();
+                       if (unlikely(!cpu_online(policy->cpu)))
+                               policy = NULL;
+                       else
+                               kobject_get(&policy->kobj);
+                       cpu_hotplug_enable();
+               }
        }

And this gave another crash:

[  246.724464] INFO: task irqbalance:1052 blocked for more than 120 seconds.
[  246.724474] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  246.724479] irqbalance      D 0000000000000000     0  1052      1 0x00000000
[  246.724489]  ffff88002f2dfd78 0000000000000086 ffff88002e53b800
0000000000000001
[  246.724498]  ffff880030620000 ffff88002f2dffd8 ffff88002f2dffd8
ffff88002f2dffd8
[  246.724505]  ffff880030181700 ffff880030620000 ffff8800ccbfd698
ffffffff81c981c0
[  246.724511] Call Trace:
[  246.724525]  [<ffffffff81679c39>] schedule+0x29/0x70
[  246.724532]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  246.724543]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  246.724552]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  246.724561]  [<ffffffff81416e15>] lock_device_hotplug+0x15/0x20
[  246.724568]  [<ffffffff81416e60>] show_online+0x20/0x60
[  246.724574]  [<ffffffff81415ab0>] dev_attr_show+0x20/0x60
[  246.724583]  [<ffffffff81129a3e>] ? __get_free_pages+0xe/0x40
[  246.724592]  [<ffffffff811f2c9a>] sysfs_read_file+0xaa/0x180
[  246.724602]  [<ffffffff81182074>] vfs_read+0xb4/0x180
[  246.724610]  [<ffffffff811822f2>] SyS_read+0x52/0xa0
[  246.724617]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  246.724631] INFO: task preload:1731 blocked for more than 120 seconds.
[  246.724634] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  246.724637] preload         D ffffffff8180fb60     0  1731      1 0x00000000
[  246.724643]  ffff8800cca71d28 0000000000000086 ffff8800cca71cc8
ffffffff8118bafd
[  246.724650]  ffff88002e7dc500 ffff8800cca71fd8 ffff8800cca71fd8
ffff8800cca71fd8
[  246.724655]  ffff880119359700 ffff88002e7dc500 ffff880119408240
ffffffff81c31b88
[  246.724661] Call Trace:
[  246.724669]  [<ffffffff8118bafd>] ? terminate_walk+0x3d/0x50
[  246.724675]  [<ffffffff81679c39>] schedule+0x29/0x70
[  246.724681]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  246.724690]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  246.724697]  [<ffffffff8118ba72>] ? path_put+0x22/0x30
[  246.724705]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  246.724713]  [<ffffffff8104d19c>] get_online_cpus+0x2c/0x50
[  246.724722]  [<ffffffff8114196a>] all_vm_events+0x1a/0x120
[  246.724730]  [<ffffffff81141b05>] vmstat_start+0x95/0xc0
[  246.724738]  [<ffffffff811a3739>] seq_read+0x139/0x3e0
[  246.724747]  [<ffffffff812acbb3>] ? security_file_permission+0xa3/0xc0
[  246.724755]  [<ffffffff811e4083>] proc_reg_read+0x43/0x70
[  246.724763]  [<ffffffff81182074>] vfs_read+0xb4/0x180
[  246.724770]  [<ffffffff811822f2>] SyS_read+0x52/0xa0
[  246.724776]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  246.724781] INFO: task ondemand:1867 blocked for more than 120 seconds.
[  246.724783] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  246.724786] ondemand        D ffffffff8180fb60     0  1867      1 0x00000000
[  246.724791]  ffff8800d66c3ba8 0000000000000082 ffff8800d66c3be8
ffffffff811c0ba2
[  246.724797]  ffff88002f1a0000 ffff8800d66c3fd8 ffff8800d66c3fd8
ffff8800d66c3fd8
[  246.724803]  ffffffff81c10440 ffff88002f1a0000 0000000000000000
ffffffff81c31b40
[  246.724809] Call Trace:
[  246.724817]  [<ffffffff811c0ba2>] ? fsnotify+0x1d2/0x2b0
[  246.724822]  [<ffffffff81679c39>] schedule+0x29/0x70
[  246.724828]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  246.724836]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  246.724844]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  246.724850]  [<ffffffff8104d1f5>] cpu_maps_update_begin+0x15/0x20
[  246.724856]  [<ffffffff8104d53e>] cpu_hotplug_disable+0xe/0x20
[  246.724866]  [<ffffffff81533aad>] cpufreq_cpu_get+0x7d/0xf0
[  246.724874]  [<ffffffff81533c41>] cpufreq_get_policy+0x21/0x120
[  246.724882]  [<ffffffff81533d8b>] store_scaling_governor+0x4b/0x1f0
[  246.724890]  [<ffffffff8118d37a>] ? link_path_walk+0x23a/0x8d0
[  246.724897]  [<ffffffff8119fae9>] ? mntput_no_expire+0x49/0x160
[  246.724902]  [<ffffffff8119fc24>] ? mntput+0x24/0x40
[  246.724911]  [<ffffffff81533000>] store+0x70/0xb0
[  246.724919]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
[  246.724927]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
[  246.724934]  [<ffffffff81182392>] SyS_write+0x52/0xa0
[  246.724940]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  246.724961] INFO: task gnome-keyring-d:2989 blocked for more than
120 seconds.
[  246.724963] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  246.724966] gnome-keyring-d D 0000000000000000     0  2989      1 0x00000000
[  246.724971]  ffff8800cb933e38 0000000000000086 00000000000000dc
0000000000000004
[  246.724977]  ffff88002e548000 ffff8800cb933fd8 ffff8800cb933fd8
ffff8800cb933fd8
[  246.724982]  ffff8800301fae00 ffff88002e548000 ffff88010fbb6b40
ffffffff81c31b88
[  246.724988] Call Trace:
[  246.724994]  [<ffffffff81679c39>] schedule+0x29/0x70
[  246.724999]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  246.725007]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  246.725016]  [<ffffffff81133780>] ? __pagevec_release+0x40/0x40
[  246.725024]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  246.725030]  [<ffffffff8104d19c>] get_online_cpus+0x2c/0x50
[  246.725038]  [<ffffffff810693d9>] schedule_on_each_cpu+0x39/0x110
[  246.725047]  [<ffffffff811337a5>] lru_add_drain_all+0x15/0x20
[  246.725055]  [<ffffffff81150dd8>] SyS_mlock+0x38/0x130
[  246.725061]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  246.725083] INFO: task sh:3952 blocked for more than 120 seconds.
[  246.725086] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  246.725089] sh              D ffffffff8180ff20     0  3952   3947 0x00000004
[  246.725093]  ffff8800ccd3bbd8 0000000000000082 0000000000000000
0000000000000001
[  246.725099]  ffff8800cb0c9700 ffff8800ccd3bfd8 ffff8800ccd3bfd8
ffff8800ccd3bfd8
[  246.725105]  ffff880118c5c500 ffff8800cb0c9700 ffffffff81142619
ffffffff81c31b40
[  246.725110] Call Trace:
[  246.725119]  [<ffffffff81142619>] ? zone_statistics+0x99/0xc0
[  246.725124]  [<ffffffff81679c39>] schedule+0x29/0x70
[  246.725129]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  246.725137]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  246.725145]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  246.725151]  [<ffffffff8104d1f5>] cpu_maps_update_begin+0x15/0x20
[  246.725157]  [<ffffffff8104d53e>] cpu_hotplug_disable+0xe/0x20
[  246.725165]  [<ffffffff81533aad>] cpufreq_cpu_get+0x7d/0xf0
[  246.725172]  [<ffffffff81533c41>] cpufreq_get_policy+0x21/0x120
[  246.725180]  [<ffffffff8153400f>] store_scaling_min_freq+0x3f/0xa0
[  246.725190]  [<ffffffff816785e6>] ? down_write+0x16/0x40
[  246.725197]  [<ffffffff81533000>] store+0x70/0xb0
[  246.725205]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
[  246.725212]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
[  246.725219]  [<ffffffff81182392>] SyS_write+0x52/0xa0
[  246.725225]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  246.725229] INFO: task sh:3953 blocked for more than 120 seconds.
[  246.725232] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  246.725234] sh              D ffffffff8180fb60     0  3953   3952 0x00000004
[  246.725239]  ffff8800ccd9dbf8 0000000000000082 ffffffff81116bcf
ffff880119398000
[  246.725245]  ffff8800c9f65c00 ffff8800ccd9dfd8 ffff8800ccd9dfd8
ffff8800ccd9dfd8
[  246.725251]  ffff880119358000 ffff8800c9f65c00 ffff8800ccd9dcc8
ffffffff81c31b40
[  246.725256] Call Trace:
[  246.725266]  [<ffffffff81116bcf>] ? update_group_times+0xf/0x40
[  246.725272]  [<ffffffff81679c39>] schedule+0x29/0x70
[  246.725277]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  246.725285]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  246.725293]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  246.725298]  [<ffffffff8104d1f5>] cpu_maps_update_begin+0x15/0x20
[  246.725304]  [<ffffffff8104d53e>] cpu_hotplug_disable+0xe/0x20
[  246.725312]  [<ffffffff81533aad>] cpufreq_cpu_get+0x7d/0xf0
[  246.725320]  [<ffffffff8153549d>] cpufreq_stats_free_sysfs+0x1d/0x80
[  246.725328]  [<ffffffff8153559f>] cpufreq_stat_cpu_callback+0x2f/0x40
[  246.725336]  [<ffffffff8167f17d>] notifier_call_chain+0x4d/0x70
[  246.725346]  [<ffffffff8107508e>] __raw_notifier_call_chain+0xe/0x10
[  246.725351]  [<ffffffff8104d0d0>] __cpu_notify+0x20/0x40
[  246.725358]  [<ffffffff81667171>] _cpu_down+0x81/0x250
[  246.725364]  [<ffffffff81667375>] cpu_down+0x35/0x50
[  246.725370]  [<ffffffff8141b9c1>] cpu_subsys_offline+0x21/0x40
[  246.725377]  [<ffffffff81416f45>] device_offline+0xa5/0xd0
[  246.725384]  [<ffffffff8141704e>] store_online+0x3e/0x80
[  246.725390]  [<ffffffff81414568>] dev_attr_store+0x18/0x30
[  246.725397]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
[  246.725404]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
[  246.725411]  [<ffffffff81182392>] SyS_write+0x52/0xa0
[  246.725419]  [<ffffffff8167f12e>] ? do_page_fault+0xe/0x10
[  246.725425]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  366.756425] INFO: task irqbalance:1052 blocked for more than 120 seconds.
[  366.756435] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  366.756440] irqbalance      D 0000000000000000     0  1052      1 0x00000000
[  366.756450]  ffff88002f2dfd78 0000000000000086 ffff88002e53b800
0000000000000001
[  366.756459]  ffff880030620000 ffff88002f2dffd8 ffff88002f2dffd8
ffff88002f2dffd8
[  366.756465]  ffff880030181700 ffff880030620000 ffff8800ccbfd698
ffffffff81c981c0
[  366.756472] Call Trace:
[  366.756486]  [<ffffffff81679c39>] schedule+0x29/0x70
[  366.756493]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  366.756504]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  366.756513]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  366.756522]  [<ffffffff81416e15>] lock_device_hotplug+0x15/0x20
[  366.756528]  [<ffffffff81416e60>] show_online+0x20/0x60
[  366.756535]  [<ffffffff81415ab0>] dev_attr_show+0x20/0x60
[  366.756544]  [<ffffffff81129a3e>] ? __get_free_pages+0xe/0x40
[  366.756553]  [<ffffffff811f2c9a>] sysfs_read_file+0xaa/0x180
[  366.756564]  [<ffffffff81182074>] vfs_read+0xb4/0x180
[  366.756571]  [<ffffffff811822f2>] SyS_read+0x52/0xa0
[  366.756579]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  366.756593] INFO: task preload:1731 blocked for more than 120 seconds.
[  366.756596] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  366.756599] preload         D ffffffff8180fb60     0  1731      1 0x00000000
[  366.756605]  ffff8800cca71d28 0000000000000086 ffff8800cca71cc8
ffffffff8118bafd
[  366.756611]  ffff88002e7dc500 ffff8800cca71fd8 ffff8800cca71fd8
ffff8800cca71fd8
[  366.756617]  ffff880119359700 ffff88002e7dc500 ffff880119408240
ffffffff81c31b88
[  366.756623] Call Trace:
[  366.756631]  [<ffffffff8118bafd>] ? terminate_walk+0x3d/0x50
[  366.756637]  [<ffffffff81679c39>] schedule+0x29/0x70
[  366.756643]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  366.756652]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  366.756659]  [<ffffffff8118ba72>] ? path_put+0x22/0x30
[  366.756667]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  366.756674]  [<ffffffff8104d19c>] get_online_cpus+0x2c/0x50
[  366.756684]  [<ffffffff8114196a>] all_vm_events+0x1a/0x120
[  366.756692]  [<ffffffff81141b05>] vmstat_start+0x95/0xc0
[  366.756700]  [<ffffffff811a3739>] seq_read+0x139/0x3e0
[  366.756709]  [<ffffffff812acbb3>] ? security_file_permission+0xa3/0xc0
[  366.756717]  [<ffffffff811e4083>] proc_reg_read+0x43/0x70
[  366.756725]  [<ffffffff81182074>] vfs_read+0xb4/0x180
[  366.756732]  [<ffffffff811822f2>] SyS_read+0x52/0xa0
[  366.756738]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  366.756743] INFO: task ondemand:1867 blocked for more than 120 seconds.
[  366.756745] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  366.756748] ondemand        D ffffffff8180fb60     0  1867      1 0x00000000
[  366.756753]  ffff8800d66c3ba8 0000000000000082 ffff8800d66c3be8
ffffffff811c0ba2
[  366.756759]  ffff88002f1a0000 ffff8800d66c3fd8 ffff8800d66c3fd8
ffff8800d66c3fd8
[  366.756765]  ffffffff81c10440 ffff88002f1a0000 0000000000000000
ffffffff81c31b40
[  366.756771] Call Trace:
[  366.756779]  [<ffffffff811c0ba2>] ? fsnotify+0x1d2/0x2b0
[  366.756785]  [<ffffffff81679c39>] schedule+0x29/0x70
[  366.756790]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  366.756798]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  366.756806]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  366.756813]  [<ffffffff8104d1f5>] cpu_maps_update_begin+0x15/0x20
[  366.756819]  [<ffffffff8104d53e>] cpu_hotplug_disable+0xe/0x20
[  366.756828]  [<ffffffff81533aad>] cpufreq_cpu_get+0x7d/0xf0
[  366.756836]  [<ffffffff81533c41>] cpufreq_get_policy+0x21/0x120
[  366.756845]  [<ffffffff81533d8b>] store_scaling_governor+0x4b/0x1f0
[  366.756852]  [<ffffffff8118d37a>] ? link_path_walk+0x23a/0x8d0
[  366.756859]  [<ffffffff8119fae9>] ? mntput_no_expire+0x49/0x160
[  366.756865]  [<ffffffff8119fc24>] ? mntput+0x24/0x40
[  366.756874]  [<ffffffff81533000>] store+0x70/0xb0
[  366.756882]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
[  366.756889]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
[  366.756896]  [<ffffffff81182392>] SyS_write+0x52/0xa0
[  366.756903]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
[  366.756922] INFO: task gnome-keyring-d:2989 blocked for more than
120 seconds.
[  366.756925] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  366.756927] gnome-keyring-d D 0000000000000000     0  2989      1 0x00000000
[  366.756932]  ffff8800cb933e38 0000000000000086 00000000000000dc
0000000000000004
[  366.756938]  ffff88002e548000 ffff8800cb933fd8 ffff8800cb933fd8
ffff8800cb933fd8
[  366.756944]  ffff8800301fae00 ffff88002e548000 ffff88010fbb6b40
ffffffff81c31b88
[  366.756949] Call Trace:
[  366.756955]  [<ffffffff81679c39>] schedule+0x29/0x70
[  366.756961]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
[  366.756969]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
[  366.756978]  [<ffffffff81133780>] ? __pagevec_release+0x40/0x40
[  366.756986]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
[  366.756991]  [<ffffffff8104d19c>] get_online_cpus+0x2c/0x50
[  366.757000]  [<ffffffff810693d9>] schedule_on_each_cpu+0x39/0x110
[  366.757008]  [<ffffffff811337a5>] lru_add_drain_all+0x15/0x20
[  366.757016]  [<ffffffff81150dd8>] SyS_mlock+0x38/0x130
[  366.757022]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b



And that's why I am confused about how to solve it :)

I thought when a CPU is going down its /sys/devices/system/cpu/cpuX/
directory should be locked/freezed and no other thread should be reading
from this directory at that point..

@Srivatsa: Can you share your knowledge here on this?

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: mutex warning in cpufreq + RFC patch
  2013-09-01 16:21             ` Viresh Kumar
@ 2013-09-03 13:18               ` Srivatsa S. Bhat
  0 siblings, 0 replies; 12+ messages in thread
From: Srivatsa S. Bhat @ 2013-09-03 13:18 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J. Wysocki, Stephen Boyd, Linux Kernel Mailing List,
	cpufreq@vger.kernel.org, linux-pm@vger.kernel.org

On 09/01/2013 09:51 PM, Viresh Kumar wrote:
> On 1 September 2013 18:52, Rafael J. Wysocki <rjw@sisk.pl> wrote:
>> On Sunday, September 01, 2013 11:54:10 AM Viresh Kumar wrote:
>>> Btw, I am facing another crash which I am not sure how to fix.. It
>>> came with your script:
>>
>> This isn't a crash, but a WARN_ON_ONCE() triggering.  The comment in kref_get()
>> explains when that occurs, so we seem to have a race between
>> store_scaling_min_freq() and CPU removal.
> 
> Yeah, I meant the same thing.. I don't know how to solve it or what's
> the correct way to solve it:
> 
> I tried this:
> 
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 4d5723db..be2e5f4 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -200,8 +200,14 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
>         if (cpufreq_driver) {
>                 /* get the CPU */
>                 policy = per_cpu(cpufreq_cpu_data, cpu);
> -               if (policy)
> -                       kobject_get(&policy->kobj);
> +               if (policy) {
> +                       cpu_hotplug_disable();
> +                       if (unlikely(!cpu_online(policy->cpu)))
> +                               policy = NULL;
> +                       else
> +                               kobject_get(&policy->kobj);
> +                       cpu_hotplug_enable();
> +               }
>         }
> 
> And this gave another crash:
> 
> [  246.724464] INFO: task irqbalance:1052 blocked for more than 120 seconds.
> [  246.724474] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  246.724479] irqbalance      D 0000000000000000     0  1052      1 0x00000000
> [  246.724489]  ffff88002f2dfd78 0000000000000086 ffff88002e53b800
> 0000000000000001
> [  246.724498]  ffff880030620000 ffff88002f2dffd8 ffff88002f2dffd8
> ffff88002f2dffd8
> [  246.724505]  ffff880030181700 ffff880030620000 ffff8800ccbfd698
> ffffffff81c981c0
> [  246.724511] Call Trace:
> [  246.724525]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  246.724532]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  246.724543]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  246.724552]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  246.724561]  [<ffffffff81416e15>] lock_device_hotplug+0x15/0x20
> [  246.724568]  [<ffffffff81416e60>] show_online+0x20/0x60
> [  246.724574]  [<ffffffff81415ab0>] dev_attr_show+0x20/0x60
> [  246.724583]  [<ffffffff81129a3e>] ? __get_free_pages+0xe/0x40
> [  246.724592]  [<ffffffff811f2c9a>] sysfs_read_file+0xaa/0x180
> [  246.724602]  [<ffffffff81182074>] vfs_read+0xb4/0x180
> [  246.724610]  [<ffffffff811822f2>] SyS_read+0x52/0xa0
> [  246.724617]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  246.724631] INFO: task preload:1731 blocked for more than 120 seconds.
> [  246.724634] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  246.724637] preload         D ffffffff8180fb60     0  1731      1 0x00000000
> [  246.724643]  ffff8800cca71d28 0000000000000086 ffff8800cca71cc8
> ffffffff8118bafd
> [  246.724650]  ffff88002e7dc500 ffff8800cca71fd8 ffff8800cca71fd8
> ffff8800cca71fd8
> [  246.724655]  ffff880119359700 ffff88002e7dc500 ffff880119408240
> ffffffff81c31b88
> [  246.724661] Call Trace:
> [  246.724669]  [<ffffffff8118bafd>] ? terminate_walk+0x3d/0x50
> [  246.724675]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  246.724681]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  246.724690]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  246.724697]  [<ffffffff8118ba72>] ? path_put+0x22/0x30
> [  246.724705]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  246.724713]  [<ffffffff8104d19c>] get_online_cpus+0x2c/0x50
> [  246.724722]  [<ffffffff8114196a>] all_vm_events+0x1a/0x120
> [  246.724730]  [<ffffffff81141b05>] vmstat_start+0x95/0xc0
> [  246.724738]  [<ffffffff811a3739>] seq_read+0x139/0x3e0
> [  246.724747]  [<ffffffff812acbb3>] ? security_file_permission+0xa3/0xc0
> [  246.724755]  [<ffffffff811e4083>] proc_reg_read+0x43/0x70
> [  246.724763]  [<ffffffff81182074>] vfs_read+0xb4/0x180
> [  246.724770]  [<ffffffff811822f2>] SyS_read+0x52/0xa0
> [  246.724776]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  246.724781] INFO: task ondemand:1867 blocked for more than 120 seconds.
> [  246.724783] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  246.724786] ondemand        D ffffffff8180fb60     0  1867      1 0x00000000
> [  246.724791]  ffff8800d66c3ba8 0000000000000082 ffff8800d66c3be8
> ffffffff811c0ba2
> [  246.724797]  ffff88002f1a0000 ffff8800d66c3fd8 ffff8800d66c3fd8
> ffff8800d66c3fd8
> [  246.724803]  ffffffff81c10440 ffff88002f1a0000 0000000000000000
> ffffffff81c31b40
> [  246.724809] Call Trace:
> [  246.724817]  [<ffffffff811c0ba2>] ? fsnotify+0x1d2/0x2b0
> [  246.724822]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  246.724828]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  246.724836]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  246.724844]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  246.724850]  [<ffffffff8104d1f5>] cpu_maps_update_begin+0x15/0x20
> [  246.724856]  [<ffffffff8104d53e>] cpu_hotplug_disable+0xe/0x20
> [  246.724866]  [<ffffffff81533aad>] cpufreq_cpu_get+0x7d/0xf0
> [  246.724874]  [<ffffffff81533c41>] cpufreq_get_policy+0x21/0x120
> [  246.724882]  [<ffffffff81533d8b>] store_scaling_governor+0x4b/0x1f0
> [  246.724890]  [<ffffffff8118d37a>] ? link_path_walk+0x23a/0x8d0
> [  246.724897]  [<ffffffff8119fae9>] ? mntput_no_expire+0x49/0x160
> [  246.724902]  [<ffffffff8119fc24>] ? mntput+0x24/0x40
> [  246.724911]  [<ffffffff81533000>] store+0x70/0xb0
> [  246.724919]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
> [  246.724927]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
> [  246.724934]  [<ffffffff81182392>] SyS_write+0x52/0xa0
> [  246.724940]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  246.724961] INFO: task gnome-keyring-d:2989 blocked for more than
> 120 seconds.
> [  246.724963] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  246.724966] gnome-keyring-d D 0000000000000000     0  2989      1 0x00000000
> [  246.724971]  ffff8800cb933e38 0000000000000086 00000000000000dc
> 0000000000000004
> [  246.724977]  ffff88002e548000 ffff8800cb933fd8 ffff8800cb933fd8
> ffff8800cb933fd8
> [  246.724982]  ffff8800301fae00 ffff88002e548000 ffff88010fbb6b40
> ffffffff81c31b88
> [  246.724988] Call Trace:
> [  246.724994]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  246.724999]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  246.725007]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  246.725016]  [<ffffffff81133780>] ? __pagevec_release+0x40/0x40
> [  246.725024]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  246.725030]  [<ffffffff8104d19c>] get_online_cpus+0x2c/0x50
> [  246.725038]  [<ffffffff810693d9>] schedule_on_each_cpu+0x39/0x110
> [  246.725047]  [<ffffffff811337a5>] lru_add_drain_all+0x15/0x20
> [  246.725055]  [<ffffffff81150dd8>] SyS_mlock+0x38/0x130
> [  246.725061]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  246.725083] INFO: task sh:3952 blocked for more than 120 seconds.
> [  246.725086] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  246.725089] sh              D ffffffff8180ff20     0  3952   3947 0x00000004
> [  246.725093]  ffff8800ccd3bbd8 0000000000000082 0000000000000000
> 0000000000000001
> [  246.725099]  ffff8800cb0c9700 ffff8800ccd3bfd8 ffff8800ccd3bfd8
> ffff8800ccd3bfd8
> [  246.725105]  ffff880118c5c500 ffff8800cb0c9700 ffffffff81142619
> ffffffff81c31b40
> [  246.725110] Call Trace:
> [  246.725119]  [<ffffffff81142619>] ? zone_statistics+0x99/0xc0
> [  246.725124]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  246.725129]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  246.725137]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  246.725145]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  246.725151]  [<ffffffff8104d1f5>] cpu_maps_update_begin+0x15/0x20
> [  246.725157]  [<ffffffff8104d53e>] cpu_hotplug_disable+0xe/0x20
> [  246.725165]  [<ffffffff81533aad>] cpufreq_cpu_get+0x7d/0xf0
> [  246.725172]  [<ffffffff81533c41>] cpufreq_get_policy+0x21/0x120
> [  246.725180]  [<ffffffff8153400f>] store_scaling_min_freq+0x3f/0xa0
> [  246.725190]  [<ffffffff816785e6>] ? down_write+0x16/0x40
> [  246.725197]  [<ffffffff81533000>] store+0x70/0xb0
> [  246.725205]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
> [  246.725212]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
> [  246.725219]  [<ffffffff81182392>] SyS_write+0x52/0xa0
> [  246.725225]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  246.725229] INFO: task sh:3953 blocked for more than 120 seconds.
> [  246.725232] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  246.725234] sh              D ffffffff8180fb60     0  3953   3952 0x00000004
> [  246.725239]  ffff8800ccd9dbf8 0000000000000082 ffffffff81116bcf
> ffff880119398000
> [  246.725245]  ffff8800c9f65c00 ffff8800ccd9dfd8 ffff8800ccd9dfd8
> ffff8800ccd9dfd8
> [  246.725251]  ffff880119358000 ffff8800c9f65c00 ffff8800ccd9dcc8
> ffffffff81c31b40
> [  246.725256] Call Trace:
> [  246.725266]  [<ffffffff81116bcf>] ? update_group_times+0xf/0x40
> [  246.725272]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  246.725277]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  246.725285]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  246.725293]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  246.725298]  [<ffffffff8104d1f5>] cpu_maps_update_begin+0x15/0x20
> [  246.725304]  [<ffffffff8104d53e>] cpu_hotplug_disable+0xe/0x20
> [  246.725312]  [<ffffffff81533aad>] cpufreq_cpu_get+0x7d/0xf0
> [  246.725320]  [<ffffffff8153549d>] cpufreq_stats_free_sysfs+0x1d/0x80
> [  246.725328]  [<ffffffff8153559f>] cpufreq_stat_cpu_callback+0x2f/0x40
> [  246.725336]  [<ffffffff8167f17d>] notifier_call_chain+0x4d/0x70
> [  246.725346]  [<ffffffff8107508e>] __raw_notifier_call_chain+0xe/0x10
> [  246.725351]  [<ffffffff8104d0d0>] __cpu_notify+0x20/0x40
> [  246.725358]  [<ffffffff81667171>] _cpu_down+0x81/0x250
> [  246.725364]  [<ffffffff81667375>] cpu_down+0x35/0x50
> [  246.725370]  [<ffffffff8141b9c1>] cpu_subsys_offline+0x21/0x40
> [  246.725377]  [<ffffffff81416f45>] device_offline+0xa5/0xd0
> [  246.725384]  [<ffffffff8141704e>] store_online+0x3e/0x80
> [  246.725390]  [<ffffffff81414568>] dev_attr_store+0x18/0x30
> [  246.725397]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
> [  246.725404]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
> [  246.725411]  [<ffffffff81182392>] SyS_write+0x52/0xa0
> [  246.725419]  [<ffffffff8167f12e>] ? do_page_fault+0xe/0x10
> [  246.725425]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  366.756425] INFO: task irqbalance:1052 blocked for more than 120 seconds.
> [  366.756435] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  366.756440] irqbalance      D 0000000000000000     0  1052      1 0x00000000
> [  366.756450]  ffff88002f2dfd78 0000000000000086 ffff88002e53b800
> 0000000000000001
> [  366.756459]  ffff880030620000 ffff88002f2dffd8 ffff88002f2dffd8
> ffff88002f2dffd8
> [  366.756465]  ffff880030181700 ffff880030620000 ffff8800ccbfd698
> ffffffff81c981c0
> [  366.756472] Call Trace:
> [  366.756486]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  366.756493]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  366.756504]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  366.756513]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  366.756522]  [<ffffffff81416e15>] lock_device_hotplug+0x15/0x20
> [  366.756528]  [<ffffffff81416e60>] show_online+0x20/0x60
> [  366.756535]  [<ffffffff81415ab0>] dev_attr_show+0x20/0x60
> [  366.756544]  [<ffffffff81129a3e>] ? __get_free_pages+0xe/0x40
> [  366.756553]  [<ffffffff811f2c9a>] sysfs_read_file+0xaa/0x180
> [  366.756564]  [<ffffffff81182074>] vfs_read+0xb4/0x180
> [  366.756571]  [<ffffffff811822f2>] SyS_read+0x52/0xa0
> [  366.756579]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  366.756593] INFO: task preload:1731 blocked for more than 120 seconds.
> [  366.756596] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  366.756599] preload         D ffffffff8180fb60     0  1731      1 0x00000000
> [  366.756605]  ffff8800cca71d28 0000000000000086 ffff8800cca71cc8
> ffffffff8118bafd
> [  366.756611]  ffff88002e7dc500 ffff8800cca71fd8 ffff8800cca71fd8
> ffff8800cca71fd8
> [  366.756617]  ffff880119359700 ffff88002e7dc500 ffff880119408240
> ffffffff81c31b88
> [  366.756623] Call Trace:
> [  366.756631]  [<ffffffff8118bafd>] ? terminate_walk+0x3d/0x50
> [  366.756637]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  366.756643]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  366.756652]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  366.756659]  [<ffffffff8118ba72>] ? path_put+0x22/0x30
> [  366.756667]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  366.756674]  [<ffffffff8104d19c>] get_online_cpus+0x2c/0x50
> [  366.756684]  [<ffffffff8114196a>] all_vm_events+0x1a/0x120
> [  366.756692]  [<ffffffff81141b05>] vmstat_start+0x95/0xc0
> [  366.756700]  [<ffffffff811a3739>] seq_read+0x139/0x3e0
> [  366.756709]  [<ffffffff812acbb3>] ? security_file_permission+0xa3/0xc0
> [  366.756717]  [<ffffffff811e4083>] proc_reg_read+0x43/0x70
> [  366.756725]  [<ffffffff81182074>] vfs_read+0xb4/0x180
> [  366.756732]  [<ffffffff811822f2>] SyS_read+0x52/0xa0
> [  366.756738]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  366.756743] INFO: task ondemand:1867 blocked for more than 120 seconds.
> [  366.756745] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  366.756748] ondemand        D ffffffff8180fb60     0  1867      1 0x00000000
> [  366.756753]  ffff8800d66c3ba8 0000000000000082 ffff8800d66c3be8
> ffffffff811c0ba2
> [  366.756759]  ffff88002f1a0000 ffff8800d66c3fd8 ffff8800d66c3fd8
> ffff8800d66c3fd8
> [  366.756765]  ffffffff81c10440 ffff88002f1a0000 0000000000000000
> ffffffff81c31b40
> [  366.756771] Call Trace:
> [  366.756779]  [<ffffffff811c0ba2>] ? fsnotify+0x1d2/0x2b0
> [  366.756785]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  366.756790]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  366.756798]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  366.756806]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  366.756813]  [<ffffffff8104d1f5>] cpu_maps_update_begin+0x15/0x20
> [  366.756819]  [<ffffffff8104d53e>] cpu_hotplug_disable+0xe/0x20
> [  366.756828]  [<ffffffff81533aad>] cpufreq_cpu_get+0x7d/0xf0
> [  366.756836]  [<ffffffff81533c41>] cpufreq_get_policy+0x21/0x120
> [  366.756845]  [<ffffffff81533d8b>] store_scaling_governor+0x4b/0x1f0
> [  366.756852]  [<ffffffff8118d37a>] ? link_path_walk+0x23a/0x8d0
> [  366.756859]  [<ffffffff8119fae9>] ? mntput_no_expire+0x49/0x160
> [  366.756865]  [<ffffffff8119fc24>] ? mntput+0x24/0x40
> [  366.756874]  [<ffffffff81533000>] store+0x70/0xb0
> [  366.756882]  [<ffffffff811f2582>] sysfs_write_file+0xe2/0x170
> [  366.756889]  [<ffffffff81181e8e>] vfs_write+0xce/0x200
> [  366.756896]  [<ffffffff81182392>] SyS_write+0x52/0xa0
> [  366.756903]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> [  366.756922] INFO: task gnome-keyring-d:2989 blocked for more than
> 120 seconds.
> [  366.756925] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  366.756927] gnome-keyring-d D 0000000000000000     0  2989      1 0x00000000
> [  366.756932]  ffff8800cb933e38 0000000000000086 00000000000000dc
> 0000000000000004
> [  366.756938]  ffff88002e548000 ffff8800cb933fd8 ffff8800cb933fd8
> ffff8800cb933fd8
> [  366.756944]  ffff8800301fae00 ffff88002e548000 ffff88010fbb6b40
> ffffffff81c31b88
> [  366.756949] Call Trace:
> [  366.756955]  [<ffffffff81679c39>] schedule+0x29/0x70
> [  366.756961]  [<ffffffff81679f1e>] schedule_preempt_disabled+0xe/0x10
> [  366.756969]  [<ffffffff816781d2>] __mutex_lock_slowpath+0x112/0x1b0
> [  366.756978]  [<ffffffff81133780>] ? __pagevec_release+0x40/0x40
> [  366.756986]  [<ffffffff816776ca>] mutex_lock+0x2a/0x41
> [  366.756991]  [<ffffffff8104d19c>] get_online_cpus+0x2c/0x50
> [  366.757000]  [<ffffffff810693d9>] schedule_on_each_cpu+0x39/0x110
> [  366.757008]  [<ffffffff811337a5>] lru_add_drain_all+0x15/0x20
> [  366.757016]  [<ffffffff81150dd8>] SyS_mlock+0x38/0x130
> [  366.757022]  [<ffffffff816838c2>] system_call_fastpath+0x16/0x1b
> 
> 
> 
> And that's why I am confused about how to solve it :)
> 
> I thought when a CPU is going down its /sys/devices/system/cpu/cpuX/
> directory should be locked/freezed and no other thread should be reading
> from this directory at that point..
> 
> @Srivatsa: Can you share your knowledge here on this?
> 

Sorry for the late reply. I was on vacation for some time and it took
me a while to catch up on things.

Looking at the patches you have sent out, I think the first one[1] is
required, but the second one[2] isn't helpful. I have an alternative
fix that I'll propose as a reply to that thread.

Coming to the problem above, calling cpu_hotplug_disable/enable for
general hotplug synchronization is a bad idea. You must use
get/put_online_cpus() instead. I had sent out a patch to add a comment
pointing this out [3], but that didn't go upstream since it was part of
a larger patchset etc. Maybe I should break it out and send it separately.

And IMHO the kref_get race can be fixed by using get/put_online_cpus()
instead of your second patch[2].

[1]. https://patchwork.kernel.org/patch/2852463/
[2]. https://patchwork.kernel.org/patch/2852464/
[3]. https://patchwork.kernel.org/patch/2795771/

Regards,
Srivatsa S. Bhat


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-09-03 13:22 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-08-28  2:57 mutex warning in cpufreq + RFC patch Stephen Boyd
2013-08-28  6:58 ` Viresh Kumar
2013-08-28 16:52   ` Stephen Boyd
2013-08-29  8:37     ` Viresh Kumar
2013-08-29  8:39       ` Viresh Kumar
2013-08-31  0:36       ` Stephen Boyd
2013-08-31  0:55         ` Rafael J. Wysocki
2013-08-31  0:59           ` Rafael J. Wysocki
2013-09-01  6:24         ` Viresh Kumar
2013-09-01 13:22           ` Rafael J. Wysocki
2013-09-01 16:21             ` Viresh Kumar
2013-09-03 13:18               ` Srivatsa S. Bhat

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).