[PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume

linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
@ 2013-12-20 15:56 Viresh Kumar
  2013-12-22  1:00 ` Rafael J. Wysocki
  0 siblings, 1 reply; 15+ messages in thread
From: Viresh Kumar @ 2013-12-20 15:56 UTC (permalink / raw)
  To: rjw; +Cc: linaro-kernel, patches, cpufreq, linux-pm, linux-kernel,
	Viresh Kumar

There are cases where cpufreq_add_dev() may fail for some CPUs during resume.
With the current code we will still have sysfs cpufreq files for such CPUs, and
struct cpufreq_policy would be already freed for them. Hence any operation on
those sysfs files would result in kernel warnings.

To fix this, lets remove those sysfs files or put the associated kobject in case
of such errors. Also, to make it simple lets remove the sysfs links from all the
CPUs (except policy->cpu) during suspend as that operation wouldn't result with a
loss of sysfs file permissions. And so we will create those links during resume
as well.

Reported-and-tested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 63 +++++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 02d534d..fab042e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -845,8 +845,7 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
 
 #ifdef CONFIG_HOTPLUG_CPU
 static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
-				  unsigned int cpu, struct device *dev,
-				  bool frozen)
+				  unsigned int cpu, struct device *dev)
 {
 	int ret = 0;
 	unsigned long flags;
@@ -877,11 +876,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 		}
 	}
 
-	/* Don't touch sysfs links during light-weight init */
-	if (!frozen)
-		ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
-
-	return ret;
+	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
 }
 #endif
 
@@ -926,6 +921,27 @@ err_free_policy:
 	return NULL;
 }
 
+static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
+{
+	struct kobject *kobj;
+	struct completion *cmp;
+
+	down_read(&policy->rwsem);
+	kobj = &policy->kobj;
+	cmp = &policy->kobj_unregister;
+	up_read(&policy->rwsem);
+	kobject_put(kobj);
+
+	/*
+	 * We need to make sure that the underlying kobj is
+	 * actually not referenced anymore by anybody before we
+	 * proceed with unloading.
+	 */
+	pr_debug("waiting for dropping of refcount\n");
+	wait_for_completion(cmp);
+	pr_debug("wait complete\n");
+}
+
 static void cpufreq_policy_free(struct cpufreq_policy *policy)
 {
 	free_cpumask_var(policy->related_cpus);
@@ -986,7 +1002,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
 	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
 		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
 			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev, frozen);
+			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
 			up_read(&cpufreq_rwsem);
 			return ret;
 		}
@@ -1096,7 +1112,10 @@ err_get_freq:
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
 err_set_policy_cpu:
+	if (frozen)
+		cpufreq_policy_put_kobj(policy);
 	cpufreq_policy_free(policy);
+
 nomem_out:
 	up_read(&cpufreq_rwsem);
 
@@ -1118,7 +1137,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 }
 
 static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
-					   unsigned int old_cpu, bool frozen)
+					   unsigned int old_cpu)
 {
 	struct device *cpu_dev;
 	int ret;
@@ -1126,10 +1145,6 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
 	/* first sibling now owns the new sysfs dir */
 	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
 
-	/* Don't touch sysfs files during light-weight tear-down */
-	if (frozen)
-		return cpu_dev->id;
-
 	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
 	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
 	if (ret) {
@@ -1196,7 +1211,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
 		if (!frozen)
 			sysfs_remove_link(&dev->kobj, "cpufreq");
 	} else if (cpus > 1) {
-		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen);
+		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
 		if (new_cpu >= 0) {
 			update_policy_cpu(policy, new_cpu);
 
@@ -1218,8 +1233,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 	int ret;
 	unsigned long flags;
 	struct cpufreq_policy *policy;
-	struct kobject *kobj;
-	struct completion *cmp;
 
 	read_lock_irqsave(&cpufreq_driver_lock, flags);
 	policy = per_cpu(cpufreq_cpu_data, cpu);
@@ -1249,22 +1262,8 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 			}
 		}
 
-		if (!frozen) {
-			down_read(&policy->rwsem);
-			kobj = &policy->kobj;
-			cmp = &policy->kobj_unregister;
-			up_read(&policy->rwsem);
-			kobject_put(kobj);
-
-			/*
-			 * We need to make sure that the underlying kobj is
-			 * actually not referenced anymore by anybody before we
-			 * proceed with unloading.
-			 */
-			pr_debug("waiting for dropping of refcount\n");
-			wait_for_completion(cmp);
-			pr_debug("wait complete\n");
-		}
+		if (!frozen)
+			cpufreq_policy_put_kobj(policy);
 
 		/*
 		 * Perform the ->exit() even during light-weight tear-down,
-- 
1.7.12.rc2.18.g61b472e


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-20 15:56 [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume Viresh Kumar
@ 2013-12-22  1:00 ` Rafael J. Wysocki
  2013-12-23  5:55   ` Bjørn Mork
  0 siblings, 1 reply; 15+ messages in thread
From: Rafael J. Wysocki @ 2013-12-22  1:00 UTC (permalink / raw)
  To: Viresh Kumar, Bjørn Mork; +Cc: cpufreq, linux-pm, linux-kernel

On Friday, December 20, 2013 09:26:02 PM Viresh Kumar wrote:
> There are cases where cpufreq_add_dev() may fail for some CPUs during resume.
> With the current code we will still have sysfs cpufreq files for such CPUs, and
> struct cpufreq_policy would be already freed for them. Hence any operation on
> those sysfs files would result in kernel warnings.
> 
> To fix this, lets remove those sysfs files or put the associated kobject in case
> of such errors. Also, to make it simple lets remove the sysfs links from all the
> CPUs (except policy->cpu) during suspend as that operation wouldn't result with a
> loss of sysfs file permissions. And so we will create those links during resume
> as well.
> 
> Reported-and-tested-by: Bjørn Mork <bjorn@mork.no>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

Queued up for the next PM pull request, thanks!

Bjorn, can you please check if the pm-cpufreq branch of the linux-pm.git tree
fixes the problem that you have reported without causing any new breakage to
happen?

Rafael


> ---
>  drivers/cpufreq/cpufreq.c | 63 +++++++++++++++++++++++------------------------
>  1 file changed, 31 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 02d534d..fab042e 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -845,8 +845,7 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
>  
>  #ifdef CONFIG_HOTPLUG_CPU
>  static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
> -				  unsigned int cpu, struct device *dev,
> -				  bool frozen)
> +				  unsigned int cpu, struct device *dev)
>  {
>  	int ret = 0;
>  	unsigned long flags;
> @@ -877,11 +876,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>  		}
>  	}
>  
> -	/* Don't touch sysfs links during light-weight init */
> -	if (!frozen)
> -		ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> -
> -	return ret;
> +	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
>  }
>  #endif
>  
> @@ -926,6 +921,27 @@ err_free_policy:
>  	return NULL;
>  }
>  
> +static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
> +{
> +	struct kobject *kobj;
> +	struct completion *cmp;
> +
> +	down_read(&policy->rwsem);
> +	kobj = &policy->kobj;
> +	cmp = &policy->kobj_unregister;
> +	up_read(&policy->rwsem);
> +	kobject_put(kobj);
> +
> +	/*
> +	 * We need to make sure that the underlying kobj is
> +	 * actually not referenced anymore by anybody before we
> +	 * proceed with unloading.
> +	 */
> +	pr_debug("waiting for dropping of refcount\n");
> +	wait_for_completion(cmp);
> +	pr_debug("wait complete\n");
> +}
> +
>  static void cpufreq_policy_free(struct cpufreq_policy *policy)
>  {
>  	free_cpumask_var(policy->related_cpus);
> @@ -986,7 +1002,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
>  	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
>  		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
>  			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev, frozen);
> +			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
>  			up_read(&cpufreq_rwsem);
>  			return ret;
>  		}
> @@ -1096,7 +1112,10 @@ err_get_freq:
>  	if (cpufreq_driver->exit)
>  		cpufreq_driver->exit(policy);
>  err_set_policy_cpu:
> +	if (frozen)
> +		cpufreq_policy_put_kobj(policy);
>  	cpufreq_policy_free(policy);
> +
>  nomem_out:
>  	up_read(&cpufreq_rwsem);
>  
> @@ -1118,7 +1137,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  }
>  
>  static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
> -					   unsigned int old_cpu, bool frozen)
> +					   unsigned int old_cpu)
>  {
>  	struct device *cpu_dev;
>  	int ret;
> @@ -1126,10 +1145,6 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
>  	/* first sibling now owns the new sysfs dir */
>  	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
>  
> -	/* Don't touch sysfs files during light-weight tear-down */
> -	if (frozen)
> -		return cpu_dev->id;
> -
>  	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
>  	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
>  	if (ret) {
> @@ -1196,7 +1211,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>  		if (!frozen)
>  			sysfs_remove_link(&dev->kobj, "cpufreq");
>  	} else if (cpus > 1) {
> -		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen);
> +		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
>  		if (new_cpu >= 0) {
>  			update_policy_cpu(policy, new_cpu);
>  
> @@ -1218,8 +1233,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  	int ret;
>  	unsigned long flags;
>  	struct cpufreq_policy *policy;
> -	struct kobject *kobj;
> -	struct completion *cmp;
>  
>  	read_lock_irqsave(&cpufreq_driver_lock, flags);
>  	policy = per_cpu(cpufreq_cpu_data, cpu);
> @@ -1249,22 +1262,8 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  			}
>  		}
>  
> -		if (!frozen) {
> -			down_read(&policy->rwsem);
> -			kobj = &policy->kobj;
> -			cmp = &policy->kobj_unregister;
> -			up_read(&policy->rwsem);
> -			kobject_put(kobj);
> -
> -			/*
> -			 * We need to make sure that the underlying kobj is
> -			 * actually not referenced anymore by anybody before we
> -			 * proceed with unloading.
> -			 */
> -			pr_debug("waiting for dropping of refcount\n");
> -			wait_for_completion(cmp);
> -			pr_debug("wait complete\n");
> -		}
> +		if (!frozen)
> +			cpufreq_policy_put_kobj(policy);
>  
>  		/*
>  		 * Perform the ->exit() even during light-weight tear-down,
> 
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-22  1:00 ` Rafael J. Wysocki
@ 2013-12-23  5:55   ` Bjørn Mork
  2013-12-23  6:02     ` Viresh Kumar
  0 siblings, 1 reply; 15+ messages in thread
From: Bjørn Mork @ 2013-12-23  5:55 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Viresh Kumar, cpufreq, linux-pm, linux-kernel

"Rafael J. Wysocki" <rjw@rjwysocki.net> writes:

> Bjorn, can you please check if the pm-cpufreq branch of the linux-pm.git tree
> fixes the problem that you have reported 

I can confirm that it fixes the major regression.  With this branch, the
cpufreq directory is completely removed after a cancelled userspace
hibernate (with the acpi-cpufreq problem causing failure).  So it is
possible to restore cpufreq by manually offlining and onlining non-boot
cores.  No more leftover sysfs attributes.

But there is still a minor regression compared to the old (v3.11)
behaviour: Previously the cpufreq functionality would be automatically
restored by any completed hibernate or suspend cycle, since it would
effectively do the CPU offline/online. This automatix fixup won't happen
with the current pm-cpufreq branch.  User intervention is now required
to fix up cpufreq. Which is expected, due to the special handling of
cpufreq suspend.

So there is still a small, small regression here, making me believe that
my "fix" is better until the cpufreq suspend is properly fixed.  But
it's certainly not a major problem to me either way.  Your call.

> without causing any new breakage to happen?

I'm not going to guarantee that :-)  But I haven't noticed anything
obvious during the 15 minutes I've been testing this branch so far.

Bjørn

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23  5:55   ` Bjørn Mork
@ 2013-12-23  6:02     ` Viresh Kumar
  2013-12-23  6:55       ` Bjørn Mork
  0 siblings, 1 reply; 15+ messages in thread
From: Viresh Kumar @ 2013-12-23  6:02 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

On 23 December 2013 11:25, Bjørn Mork <bjorn@mork.no> wrote:
> I can confirm that it fixes the major regression.  With this branch, the
> cpufreq directory is completely removed after a cancelled userspace
> hibernate (with the acpi-cpufreq problem causing failure).  So it is
> possible to restore cpufreq by manually offlining and onlining non-boot
> cores.  No more leftover sysfs attributes.

Thanks for giving it a try once again :)

> But there is still a minor regression compared to the old (v3.11)
> behaviour: Previously the cpufreq functionality would be automatically
> restored by any completed hibernate or suspend cycle, since it would
> effectively do the CPU offline/online. This automatix fixup won't happen
> with the current pm-cpufreq branch.

I didn't understood it completely, sorry :)

As far as I can see from 3.11 code we simply used to fail with any failure
resulting with a call to ->init() or some other call..

And so cpufreq wouldn't have added any directories at all in that case.
And so I think we still required an offline/online sequence to guarantee
things..

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23  6:02     ` Viresh Kumar
@ 2013-12-23  6:55       ` Bjørn Mork
  2013-12-23  7:55         ` viresh kumar
  0 siblings, 1 reply; 15+ messages in thread
From: Bjørn Mork @ 2013-12-23  6:55 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

Viresh Kumar <viresh.kumar@linaro.org> writes:
> On 23 December 2013 11:25, Bjørn Mork <bjorn@mork.no> wrote:
>> I can confirm that it fixes the major regression.  With this branch, the
>> cpufreq directory is completely removed after a cancelled userspace
>> hibernate (with the acpi-cpufreq problem causing failure).  So it is
>> possible to restore cpufreq by manually offlining and onlining non-boot
>> cores.  No more leftover sysfs attributes.
>
> Thanks for giving it a try once again :)
>
>> But there is still a minor regression compared to the old (v3.11)
>> behaviour: Previously the cpufreq functionality would be automatically
>> restored by any completed hibernate or suspend cycle, since it would
>> effectively do the CPU offline/online. This automatix fixup won't happen
>> with the current pm-cpufreq branch.
>
> I didn't understood it completely, sorry :)
>
> As far as I can see from 3.11 code we simply used to fail with any failure
> resulting with a call to ->init() or some other call..
>
> And so cpufreq wouldn't have added any directories at all in that case.
> And so I think we still required an offline/online sequence to guarantee
> things..

That's correct.  The immediate result of the failure is exactly the
same.

The difference is that a subsequent resume would restore the cpufreq
device whether it existed or not.  That made a complete suspend/resume
fix up any missing cpufreq device, e.g. one that was removed by a
previous error.

One effect of saving state on suspend is that a missing device isn't
added on resume.  You can of course see that as a feature.  But to me
it's a regression, because:
 - it didn't use to work that way, and
 - the addition of missing devices on resume is always wanted AFAICS.


Bjørn

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23  6:55       ` Bjørn Mork
@ 2013-12-23  7:55         ` viresh kumar
  2013-12-23  9:23           ` Bjørn Mork
  0 siblings, 1 reply; 15+ messages in thread
From: viresh kumar @ 2013-12-23  7:55 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

On Monday 23 December 2013 12:25 PM, Bjørn Mork wrote:
> That's correct.  The immediate result of the failure is exactly the
> same.

Okay..

> The difference is that a subsequent resume would restore the cpufreq
> device whether it existed or not.  That made a complete suspend/resume
> fix up any missing cpufreq device, e.g. one that was removed by a
> previous error.

I see.. Please see if below patch fixes it for you, it should :)


From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 23 Dec 2013 13:19:47 +0530
Subject: [PATCH] cpufreq: try to resume policies which failed on last resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__cpufreq_add_dev() can fail sometimes while we are resuming our system.
Currently we are clearing all sysfs nodes for cpufreq's failed policy as that
could make userspace unstable. But if we suspend/resume again, we should atleast
try to bring back those policies.

This patch fixes this issue by clearing fallback data on failure and trying to
allocate a new struct cpufreq_policy on second resume.

Reported-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index fab042e..7523d35 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1010,16 +1010,24 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 #endif

-	if (frozen)
+	if (frozen) {
 		/* Restore the saved policy when doing light-weight init */
 		policy = cpufreq_policy_restore(cpu);
-	else
+
+		/*
+		 * As we failed to resume cpufreq core last time, lets try to
+		 * create a new policy.
+		 */
+		if (!policy)
+			frozen = false;
+	}
+
+	if (!frozen)
 		policy = cpufreq_policy_alloc();

 	if (!policy)
 		goto nomem_out;

-
 	/*
 	 * In the resume path, since we restore a saved policy, the assignment
 	 * to policy->cpu is like an update of the existing policy, rather than
@@ -1112,8 +1120,14 @@ err_get_freq:
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
 err_set_policy_cpu:
-	if (frozen)
+	if (frozen) {
+		/*
+		 * Clear fallback data as we should try to make things work on
+		 * next suspend/resume
+		 */
+		per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL;
 		cpufreq_policy_put_kobj(policy);
+	}
 	cpufreq_policy_free(policy);

 nomem_out:


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23  7:55         ` viresh kumar
@ 2013-12-23  9:23           ` Bjørn Mork
  2013-12-23 10:45             ` Viresh Kumar
  0 siblings, 1 reply; 15+ messages in thread
From: Bjørn Mork @ 2013-12-23  9:23 UTC (permalink / raw)
  To: viresh kumar
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

viresh kumar <viresh.kumar@linaro.org> writes:

> On Monday 23 December 2013 12:25 PM, Bjørn Mork wrote:
>> That's correct.  The immediate result of the failure is exactly the
>> same.
>
> Okay..
>
>> The difference is that a subsequent resume would restore the cpufreq
>> device whether it existed or not.  That made a complete suspend/resume
>> fix up any missing cpufreq device, e.g. one that was removed by a
>> previous error.
>
> I see.. Please see if below patch fixes it for you, it should :)

Confirmed.  With that patch on top of the current pm-cpufreq branch all
functionality is restored and there are no regressions AFAICS.

I still don't understand why you want to add this hackish half-assed
suspend implementation, but at least it doesn't seem to break anything
anymore.  But if you really want to implement suspend/resume, then you
do need to keep the whole device and not just the sysfs files. Keeping
the attribute files allow you to save and restore changed permissions,
but it doesn't save any user modified settings. What's the point of
that?  Is the next step yet another hack to save those?  Where does this
end?

Why not implement proper support for suspending the "cpufreq device",
and *then* enable suspend/resume support?

Well, not my problem...  Just wondering really.

Bjørn

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23  9:23           ` Bjørn Mork
@ 2013-12-23 10:45             ` Viresh Kumar
  2013-12-23 10:57               ` Bjørn Mork
  0 siblings, 1 reply; 15+ messages in thread
From: Viresh Kumar @ 2013-12-23 10:45 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

On 23 December 2013 14:53, Bjørn Mork <bjorn@mork.no> wrote:
> Confirmed.  With that patch on top of the current pm-cpufreq branch all
> functionality is restored and there are no regressions AFAICS.

Thanks..

> I still don't understand why you want to add this hackish half-assed
> suspend implementation

As it is working currently, until we have a proper solution which I am
already discussing with Rafael:

https://lkml.org/lkml/2013/11/22/66

> But if you really want to implement suspend/resume, then you
> do need to keep the whole device and not just the sysfs files. Keeping
> the attribute files allow you to save and restore changed permissions,
> but it doesn't save any user modified settings.

Which settings are you talking about? I thought we are preserving all
files..

> Well, not my problem...  Just wondering really.

:)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23 10:45             ` Viresh Kumar
@ 2013-12-23 10:57               ` Bjørn Mork
  2013-12-23 11:13                 ` Viresh Kumar
  0 siblings, 1 reply; 15+ messages in thread
From: Bjørn Mork @ 2013-12-23 10:57 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

Viresh Kumar <viresh.kumar@linaro.org> writes:
> On 23 December 2013 14:53, Bjørn Mork <bjorn@mork.no> wrote:
>
>> But if you really want to implement suspend/resume, then you
>> do need to keep the whole device and not just the sysfs files. Keeping
>> the attribute files allow you to save and restore changed permissions,
>> but it doesn't save any user modified settings.
>
> Which settings are you talking about? I thought we are preserving all
> files..

I could be missing something, but I haven't noticed any attempt to
preserve anything except the sysfs files.

I tried modifying the max frequency, using

 echo 800000 >/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
 echo 800000 >/sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq

After supend + resume the boot CPU still had the modifed maximum, while
the non-boot core was reset to the default value.  I changed the gid of
both files too, verifying that they were saved and restored as expected.
But the value will change to default.

IMHO it would still be a lot better if this was handled as a true
hotplug event, allowing userspace to reset values/modes/owners on
resume. Hiding the hotplug event and saving part of the userspace
controlled environment is worse than not doing anything at all.

Bjørn

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23 10:57               ` Bjørn Mork
@ 2013-12-23 11:13                 ` Viresh Kumar
  2013-12-23 11:42                   ` Bjørn Mork
  0 siblings, 1 reply; 15+ messages in thread
From: Viresh Kumar @ 2013-12-23 11:13 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

On 23 December 2013 16:27, Bjørn Mork <bjorn@mork.no> wrote:
> I could be missing something, but I haven't noticed any attempt to
> preserve anything except the sysfs files.

What do you mean by sysfs here? Doesn't the below files mentioned
by you also come in sysfs?

> I tried modifying the max frequency, using
>
>  echo 800000 >/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
>  echo 800000 >/sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq
>
> After supend + resume the boot CPU still had the modifed maximum, while
> the non-boot core was reset to the default value.

This is all we were doing. i.e. not removing or putting the kobject which has
all these files and so shouldn't get reallocated at all..

So, has resumed passed on the first go only? As it was failing for the first
time in your case and hence this thread. In that case we are going to get
new files and so all values will be restored to default values.

Otherwise I don't see why we should loose any values here..

>  I changed the gid of
> both files too, verifying that they were saved and restored as expected.
> But the value will change to default.

For both boot and non-boot CPUs? I am asking because things should
be very plain for boot CPU atleast as that is never hot unplugged..

Have you tested this with the latest patches I gave?

> IMHO it would still be a lot better if this was handled as a true
> hotplug event, allowing userspace to reset values/modes/owners on
> resume. Hiding the hotplug event and saving part of the userspace
> controlled environment is worse than not doing anything at all.

We should be saving everything correctly with the current code,
with the patches I have sent.. And so things should work as far
as I can comment.

If you can confirm that these happened despite of latest patches
then probably I need to test that again on my thinkpad. But I was
quite sure this worked :)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23 11:13                 ` Viresh Kumar
@ 2013-12-23 11:42                   ` Bjørn Mork
  2013-12-23 15:45                     ` viresh kumar
  0 siblings, 1 reply; 15+ messages in thread
From: Bjørn Mork @ 2013-12-23 11:42 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

Viresh Kumar <viresh.kumar@linaro.org> writes:
> On 23 December 2013 16:27, Bjørn Mork <bjorn@mork.no> wrote:
>> I could be missing something, but I haven't noticed any attempt to
>> preserve anything except the sysfs files.
>
> What do you mean by sysfs here? Doesn't the below files mentioned
> by you also come in sysfs?

My apologies here.  I see that you *do* try to preserve the policy over
the suspend.  So I guess it should have worked...

>> I tried modifying the max frequency, using
>>
>>  echo 800000 >/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
>>  echo 800000 >/sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq
>>
>> After supend + resume the boot CPU still had the modifed maximum, while
>> the non-boot core was reset to the default value.
>
> This is all we were doing. i.e. not removing or putting the kobject which has
> all these files and so shouldn't get reallocated at all..
>
> So, has resumed passed on the first go only? As it was failing for the first
> time in your case and hence this thread. In that case we are going to get
> new files and so all values will be restored to default values.
>
> Otherwise I don't see why we should loose any values here..

Looking at the code I don't see it either.  But the value is reset.
This is with both your patches:

  cpufreq: remove sysfs files for CPUs which failed to come back after resume
  cpufreq: try to resume policies which failed on last resume

applied on top of v3.13-rc5.  I.e. also including Jason's

  cpufreq: Use CONFIG_CPU_FREQ_DEFAULT_* to set initial policy for setpolicy drivers

since -rc4.  I don't know if that confuses the picture or not.  But
these are the results:

 nemi:/tmp# ls -l /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
 -rw-rw-r-- 1 root bjorn 4096 Dec 23 12:00 /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
 -rw-rw-r-- 1 root bjorn 4096 Dec 23 11:59 /sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq
 nemi:/tmp# grep . /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
 /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq:1401000
 /sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq:1401000
 nemi:/tmp# echo 800000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
 nemi:/tmp# echo 800000 > /sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq
 nemi:/tmp# grep . /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
 /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq:800000
 /sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq:800000
 nemi:/tmp# s2ram 
 KMS graphics driver is in use, skipping quirks.

 ### resume, and then:

 nemi:/tmp# ls -l /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
 -rw-rw-r-- 1 root bjorn 4096 Dec 23 12:33 /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
 -rw-rw-r-- 1 root bjorn 4096 Dec 23 12:33 /sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq
 nemi:/tmp# grep . /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
 /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq:800000
 /sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq:1401000


The driver and governor is

 nemi:/tmp# grep . /sys/devices/system/cpu/cpu*/cpufreq/scaling_{driver,governor}
 /sys/devices/system/cpu/cpu0/cpufreq/scaling_driver:acpi-cpufreq
 /sys/devices/system/cpu/cpu1/cpufreq/scaling_driver:acpi-cpufreq
 /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor:ondemand
 /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor:ondemand


>>  I changed the gid of
>> both files too, verifying that they were saved and restored as expected.
>> But the value will change to default.
>
> For both boot and non-boot CPUs? I am asking because things should
> be very plain for boot CPU atleast as that is never hot unplugged..
>
> Have you tested this with the latest patches I gave?

See above.  Yes, this is tested with both the two patches in flight and
without any failures on suspend. The non-boot CPU have its settings
reset to default.  The boot CPU keeps the modified values.

>> IMHO it would still be a lot better if this was handled as a true
>> hotplug event, allowing userspace to reset values/modes/owners on
>> resume. Hiding the hotplug event and saving part of the userspace
>> controlled environment is worse than not doing anything at all.
>
> We should be saving everything correctly with the current code,
> with the patches I have sent.. And so things should work as far
> as I can comment.
>
> If you can confirm that these happened despite of latest patches
> then probably I need to test that again on my thinkpad.

That would be great.  This could be just me.  I am quite good at
breaking stuff.


> But I was quite sure this worked :)

Sorry for breaking the illusion :-)



Bjørn

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23 11:42                   ` Bjørn Mork
@ 2013-12-23 15:45                     ` viresh kumar
  2013-12-24  0:35                       ` Rafael J. Wysocki
  0 siblings, 1 reply; 15+ messages in thread
From: viresh kumar @ 2013-12-23 15:45 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: Rafael J. Wysocki, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

On Monday 23 December 2013 05:12 PM, Bjørn Mork wrote:
> That would be great.  This could be just me.  I am quite good at
> breaking stuff.

No its not you. While we tried to make sure everything is preserved,
few special things still stayed out :)

And they are solved with this patch, log should be good enough to give
proper reasoning:

From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 23 Dec 2013 20:51:54 +0530
Subject: [PATCH] cpufreq: preserve user_policy across suspend/resume

In __cpufreq_add_dev() we are reinitializing user_policy with default values and
hence would loose values of user_policy.{min|max|policy|governor} fields.

Preserve them by not overriding these for suspend/resume.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 16d7b4a..c5c3dac 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -839,9 +839,6 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)

 	/* set default policy */
 	ret = cpufreq_set_policy(policy, &new_policy);
-	policy->user_policy.policy = policy->policy;
-	policy->user_policy.governor = policy->governor;
-
 	if (ret) {
 		pr_debug("setting policy failed\n");
 		if (cpufreq_driver->exit)
@@ -1069,8 +1066,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
 	 */
 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);

-	policy->user_policy.min = policy->min;
-	policy->user_policy.max = policy->max;
+	if (!frozen) {
+		policy->user_policy.min = policy->min;
+		policy->user_policy.max = policy->max;
+	}

 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				     CPUFREQ_START, policy);
@@ -1101,6 +1100,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,

 	cpufreq_init_policy(policy);

+	if (!frozen) {
+		policy->user_policy.policy = policy->policy;
+		policy->user_policy.governor = policy->governor;
+	}
+
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
 	up_read(&cpufreq_rwsem);



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-24  0:35                       ` Rafael J. Wysocki
@ 2013-12-24  0:27                         ` Viresh Kumar
  2013-12-24  0:43                           ` Rafael J. Wysocki
  0 siblings, 1 reply; 15+ messages in thread
From: Viresh Kumar @ 2013-12-24  0:27 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Bjørn Mork, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

On 24 December 2013 06:05, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> OK
>
> Can you please combine this with the patch you sent previously (if that one
> is still necessary) and resubmit with a new subject?

Yes both are required and I really want two separate patches here as they
potentially target separate problems.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-23 15:45                     ` viresh kumar
@ 2013-12-24  0:35                       ` Rafael J. Wysocki
  2013-12-24  0:27                         ` Viresh Kumar
  0 siblings, 1 reply; 15+ messages in thread
From: Rafael J. Wysocki @ 2013-12-24  0:35 UTC (permalink / raw)
  To: viresh kumar
  Cc: Bjørn Mork, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

On Monday, December 23, 2013 09:15:40 PM viresh kumar wrote:
> On Monday 23 December 2013 05:12 PM, Bjørn Mork wrote:
> > That would be great.  This could be just me.  I am quite good at
> > breaking stuff.
> 
> No its not you. While we tried to make sure everything is preserved,
> few special things still stayed out :)
> 
> And they are solved with this patch, log should be good enough to give
> proper reasoning:
> 
> From: Viresh Kumar <viresh.kumar@linaro.org>
> Date: Mon, 23 Dec 2013 20:51:54 +0530
> Subject: [PATCH] cpufreq: preserve user_policy across suspend/resume
> 
> In __cpufreq_add_dev() we are reinitializing user_policy with default values and
> hence would loose values of user_policy.{min|max|policy|governor} fields.
> 
> Preserve them by not overriding these for suspend/resume.

OK

Can you please combine this with the patch you sent previously (if that one
is still necessary) and resubmit with a new subject?

Rafael


> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
>  drivers/cpufreq/cpufreq.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 16d7b4a..c5c3dac 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -839,9 +839,6 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
> 
>  	/* set default policy */
>  	ret = cpufreq_set_policy(policy, &new_policy);
> -	policy->user_policy.policy = policy->policy;
> -	policy->user_policy.governor = policy->governor;
> -
>  	if (ret) {
>  		pr_debug("setting policy failed\n");
>  		if (cpufreq_driver->exit)
> @@ -1069,8 +1066,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
>  	 */
>  	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
> 
> -	policy->user_policy.min = policy->min;
> -	policy->user_policy.max = policy->max;
> +	if (!frozen) {
> +		policy->user_policy.min = policy->min;
> +		policy->user_policy.max = policy->max;
> +	}
> 
>  	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>  				     CPUFREQ_START, policy);
> @@ -1101,6 +1100,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
> 
>  	cpufreq_init_policy(policy);
> 
> +	if (!frozen) {
> +		policy->user_policy.policy = policy->policy;
> +		policy->user_policy.governor = policy->governor;
> +	}
> +
>  	kobject_uevent(&policy->kobj, KOBJ_ADD);
>  	up_read(&cpufreq_rwsem);
> 
> 
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume
  2013-12-24  0:27                         ` Viresh Kumar
@ 2013-12-24  0:43                           ` Rafael J. Wysocki
  0 siblings, 0 replies; 15+ messages in thread
From: Rafael J. Wysocki @ 2013-12-24  0:43 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Bjørn Mork, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, Linux Kernel Mailing List

On Tuesday, December 24, 2013 05:57:35 AM Viresh Kumar wrote:
> On 24 December 2013 06:05, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> > OK
> >
> > Can you please combine this with the patch you sent previously (if that one
> > is still necessary) and resubmit with a new subject?
> 
> Yes both are required and I really want two separate patches here as they
> potentially target separate problems.

OK

Please resubmit them as a proper series then.

Thanks,
Rafael


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2013-12-24  0:35 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-12-20 15:56 [PATCH Resend] cpufreq: remove sysfs files for CPU which failed to come back after resume Viresh Kumar
2013-12-22  1:00 ` Rafael J. Wysocki
2013-12-23  5:55   ` Bjørn Mork
2013-12-23  6:02     ` Viresh Kumar
2013-12-23  6:55       ` Bjørn Mork
2013-12-23  7:55         ` viresh kumar
2013-12-23  9:23           ` Bjørn Mork
2013-12-23 10:45             ` Viresh Kumar
2013-12-23 10:57               ` Bjørn Mork
2013-12-23 11:13                 ` Viresh Kumar
2013-12-23 11:42                   ` Bjørn Mork
2013-12-23 15:45                     ` viresh kumar
2013-12-24  0:35                       ` Rafael J. Wysocki
2013-12-24  0:27                         ` Viresh Kumar
2013-12-24  0:43                           ` Rafael J. Wysocki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).