Linux Power Management development

Linux Power Management development
 help / color / mirror / Atom feed

* [PATCH] PM/Devfreq: Add Exynos5-bus devfreq driver for Exynos5250.
From: Abhilash Kesavan @ 2012-12-28  9:22 UTC (permalink / raw)
  To: myungjoo.ham, kyungmin.park, rjw, linux-kernel, linux-pm
  Cc: kgene.kim, jhbird.choi, Abhilash Kesavan
In-Reply-To: <CAJ0PZbTTDdAXJ378R7CDO2=spx7WF_RpztG5ch7iFZWx-v3N2Q@mail.gmail.com>

Exynos5-bus device devfreq driver monitors PPMU counters and
adjusts operating frequencies and voltages with OPP. ASV should
be used to provide appropriate voltages as per the speed group
of the SoC rather than using a constant 1.025V.

Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
Cc: Jonghwan Choi <jhbird.choi@samsung.com>
Cc: Kukjin Kim <kgene.kim@samsung.com>
---
Changes since RFC v1:
* Moved the Exynos5 PPMU driver to machine specific directory
* Migrated to the PM QOS framework

This patch depends on PPMU support which has now been posted as part of
the arch-side support patch.
Tested after merging for-rafael branch of
git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
with for-next branch of
git://git.kernel.org/pub/scm/linux/kernel/git/kgene/linux-samsung.git

 drivers/devfreq/Kconfig       |  10 +
 drivers/devfreq/Makefile      |   1 +
 drivers/devfreq/exynos5_bus.c | 469 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 480 insertions(+)
 create mode 100644 drivers/devfreq/exynos5_bus.c

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 0f079be..5b5a978 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -78,4 +78,14 @@ config ARM_EXYNOS4_BUS_DEVFREQ
 	  To operate with optimal voltages, ASV support is required
 	  (CONFIG_EXYNOS_ASV).
 
+config ARM_EXYNOS5_BUS_DEVFREQ
+	bool "ARM Exynos5250 Bus DEVFREQ Driver"
+	depends on SOC_EXYNOS5250 && EXYNOS5250_PPMU
+	select ARCH_HAS_OPP
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	help
+	  This adds the DEVFREQ driver for Exynos5250 bus interface (vdd_int).
+	  It reads PPMU counters of memory controllers and adjusts the
+	  operating frequencies and voltages with OPP support.
+
 endif # PM_DEVFREQ
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 8c46423..1771276 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_DEVFREQ_GOV_USERSPACE)	+= governor_userspace.o
 
 # DEVFREQ Drivers
 obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ)	+= exynos4_bus.o
+obj-$(CONFIG_ARM_EXYNOS5_BUS_DEVFREQ)	+= exynos5_bus.o
diff --git a/drivers/devfreq/exynos5_bus.c b/drivers/devfreq/exynos5_bus.c
new file mode 100644
index 0000000..ad442a4
--- /dev/null
+++ b/drivers/devfreq/exynos5_bus.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * EXYNOS5 INT clock frequency scaling support using DEVFREQ framework
+ * Based on work done by Jonghwan Choi <jhbird.choi@samsung.com>
+ * Support for only EXYNOS5250 is present.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/opp.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
+#include <linux/regulator/consumer.h>
+
+#include <mach/exynos_ppmu.h>
+#include <mach/exynos5_ppmu.h>
+#include "governor.h"
+
+#define MAX_SAFEVOLT			1100000 /* 1.10V */
+/* Assume that the bus is saturated if the utilization is 25% */
+#define INT_BUS_SATURATION_RATIO	25
+#define EXYNOS5_BUS_INT_POLL_TIME	msecs_to_jiffies(100)
+
+enum int_level_idx {
+	LV_0,
+	LV_1,
+	LV_2,
+	LV_3,
+	LV_4,
+	_LV_END
+};
+
+struct busfreq_data_int {
+	struct device *dev;
+	struct devfreq *devfreq;
+	bool disabled;
+	struct regulator *vdd_int;
+	unsigned long curr_freq;
+	struct notifier_block pm_notifier;
+	struct mutex lock;
+	struct pm_qos_request int_req;
+	struct clk *int_clk;
+	struct exynos5_ppmu_handle *ppmu;
+	struct delayed_work work;
+	int busy;
+};
+
+struct int_bus_opp_table {
+	unsigned int idx;
+	unsigned long clk;
+	unsigned long volt;
+};
+
+static struct int_bus_opp_table exynos5_int_opp_table[] = {
+	{LV_0, 266000, 1025000},
+	{LV_1, 200000, 1025000},
+	{LV_2, 160000, 1025000},
+	{LV_3, 133000, 1025000},
+	{LV_4, 100000, 1025000},
+	{0, 0, 0},
+};
+
+static int exynos5_int_setvolt(struct busfreq_data_int *data,
+		unsigned long volt)
+{
+	return regulator_set_voltage(data->vdd_int, volt, MAX_SAFEVOLT);
+}
+
+static int exynos5_busfreq_int_target(struct device *dev, unsigned long *_freq,
+			      u32 flags)
+{
+	int err = 0;
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct busfreq_data_int *data = platform_get_drvdata(pdev);
+	struct opp *opp;
+	unsigned long old_freq, freq;
+	unsigned long volt;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, _freq, flags);
+	if (IS_ERR(opp)) {
+		rcu_read_unlock();
+		dev_err(dev, "%s: Invalid OPP.\n", __func__);
+		return PTR_ERR(opp);
+	}
+
+	freq = opp_get_freq(opp);
+	volt = opp_get_voltage(opp);
+	rcu_read_unlock();
+
+	old_freq = data->curr_freq;
+
+	if (old_freq == freq)
+		return 0;
+
+	dev_dbg(dev, "targetting %lukHz %luuV\n", freq, volt);
+
+	mutex_lock(&data->lock);
+
+	if (data->disabled)
+		goto out;
+
+	if (freq > exynos5_int_opp_table[_LV_END - 1].clk)
+		pm_qos_update_request(&data->int_req,
+				data->busy * old_freq * 16 / 100000);
+	else
+		pm_qos_update_request(&data->int_req, -1);
+
+	if (old_freq < freq)
+		err = exynos5_int_setvolt(data, volt);
+	if (err)
+		goto out;
+
+	err = clk_set_rate(data->int_clk, freq * 1000);
+
+	if (err)
+		goto out;
+
+	if (old_freq > freq)
+		err = exynos5_int_setvolt(data, volt);
+	if (err)
+		goto out;
+
+	data->curr_freq = freq;
+out:
+	mutex_unlock(&data->lock);
+	return err;
+}
+
+static int exynos5_int_get_dev_status(struct device *dev,
+				      struct devfreq_dev_status *stat)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct busfreq_data_int *data = platform_get_drvdata(pdev);
+
+	stat->current_frequency = data->curr_freq;
+	stat->busy_time = data->busy;
+	stat->total_time = 100;
+
+	return 0;
+}
+
+static void exynos5_int_poll_start(struct busfreq_data_int *data)
+{
+	schedule_delayed_work(&data->work, EXYNOS5_BUS_INT_POLL_TIME);
+}
+
+static void exynos5_int_poll_stop(struct busfreq_data_int *data)
+{
+	cancel_delayed_work_sync(&data->work);
+}
+
+static void exynos5_int_poll(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct busfreq_data_int *data;
+	int ret;
+
+	dwork = to_delayed_work(work);
+	data = container_of(dwork, struct busfreq_data_int, work);
+
+	ret = exynos5_ppmu_get_busy(data->ppmu, PPMU_SET_RIGHT);
+
+	if (ret >= 0) {
+		data->busy = ret;
+		mutex_lock(&data->devfreq->lock);
+		update_devfreq(data->devfreq);
+		mutex_unlock(&data->devfreq->lock);
+	}
+
+	schedule_delayed_work(&data->work, EXYNOS5_BUS_INT_POLL_TIME);
+}
+
+static void exynos5_int_exit(struct device *dev)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct busfreq_data_int *data = platform_get_drvdata(pdev);
+
+	devfreq_unregister_opp_notifier(dev, data->devfreq);
+}
+
+static struct devfreq_dev_profile exynos5_devfreq_int_profile = {
+	.initial_freq		= 160000,
+	.polling_ms		= 0,
+	.target			= exynos5_busfreq_int_target,
+	.get_dev_status		= exynos5_int_get_dev_status,
+	.exit			= exynos5_int_exit,
+};
+
+static int exynos5250_init_int_tables(struct busfreq_data_int *data)
+{
+	int i, err = 0;
+
+	for (i = LV_0; i < _LV_END; i++) {
+		err = opp_add(data->dev, exynos5_int_opp_table[i].clk,
+				exynos5_int_opp_table[i].volt);
+		if (err) {
+			dev_err(data->dev, "Cannot add opp entries.\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+static struct devfreq_simple_ondemand_data exynos5_int_ondemand_data = {
+	.downdifferential = 2,
+	.upthreshold = INT_BUS_SATURATION_RATIO,
+};
+
+static int exynos5_busfreq_int_pm_notifier_event(struct notifier_block *this,
+		unsigned long event, void *ptr)
+{
+	struct busfreq_data_int *data = container_of(this,
+					struct busfreq_data_int, pm_notifier);
+	struct opp *opp;
+	unsigned long maxfreq = ULONG_MAX;
+	unsigned long freq;
+	unsigned long volt;
+	int err = 0;
+
+	switch (event) {
+	case PM_SUSPEND_PREPARE:
+		/* Set Fastest and Deactivate DVFS */
+		mutex_lock(&data->lock);
+
+		data->disabled = true;
+
+		rcu_read_lock();
+		opp = opp_find_freq_floor(data->dev, &maxfreq);
+		if (IS_ERR(opp)) {
+			rcu_read_unlock();
+			err = PTR_ERR(opp);
+			goto unlock;
+		}
+		freq = opp_get_freq(opp);
+		volt = opp_get_voltage(opp);
+		rcu_read_unlock();
+
+		err = exynos5_int_setvolt(data, volt);
+		if (err)
+			goto unlock;
+
+		err = clk_set_rate(data->int_clk, freq * 1000);
+
+		if (err)
+			goto unlock;
+
+		data->curr_freq = freq;
+unlock:
+		mutex_unlock(&data->lock);
+		if (err)
+			return NOTIFY_BAD;
+		return NOTIFY_OK;
+	case PM_POST_RESTORE:
+	case PM_POST_SUSPEND:
+		/* Reactivate */
+		mutex_lock(&data->lock);
+		data->disabled = false;
+		mutex_unlock(&data->lock);
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static __devinit int exynos5_busfreq_int_probe(struct platform_device *pdev)
+{
+	struct busfreq_data_int *data;
+	struct opp *opp;
+	struct device *dev = &pdev->dev;
+	unsigned long initial_freq;
+	unsigned long initial_volt;
+	int err = 0;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(struct busfreq_data_int),
+				GFP_KERNEL);
+	if (data == NULL) {
+		dev_err(dev, "Cannot allocate memory.\n");
+		return -ENOMEM;
+	}
+
+	data->pm_notifier.notifier_call = exynos5_busfreq_int_pm_notifier_event;
+	data->dev = dev;
+	mutex_init(&data->lock);
+
+	err = exynos5250_init_int_tables(data);
+	if (err)
+		goto err_regulator;
+
+	data->vdd_int = regulator_get(dev, "vdd_int");
+	if (IS_ERR(data->vdd_int)) {
+		dev_err(dev, "Cannot get the regulator \"vdd_int\"\n");
+		err = PTR_ERR(data->vdd_int);
+		goto err_regulator;
+	}
+
+	data->int_clk = clk_get(dev, "int_clk");
+	if (IS_ERR(data->int_clk)) {
+		dev_err(dev, "Cannot get clock \"int_clk\"\n");
+		err = PTR_ERR(data->int_clk);
+		goto err_clock;
+	}
+
+	rcu_read_lock();
+	opp = opp_find_freq_floor(dev,
+			&exynos5_devfreq_int_profile.initial_freq);
+	if (IS_ERR(opp)) {
+		rcu_read_unlock();
+		dev_err(dev, "Invalid initial frequency %lu kHz.\n",
+		       exynos5_devfreq_int_profile.initial_freq);
+		err = PTR_ERR(opp);
+		goto err_opp_add;
+	}
+	initial_freq = opp_get_freq(opp);
+	initial_volt = opp_get_voltage(opp);
+	rcu_read_unlock();
+	data->curr_freq = initial_freq;
+
+	err = clk_set_rate(data->int_clk, initial_freq * 1000);
+	if (err) {
+		dev_err(dev, "Failed to set initial frequency\n");
+		goto err_opp_add;
+	}
+
+	err = exynos5_int_setvolt(data, initial_volt);
+	if (err)
+		goto err_opp_add;
+
+	platform_set_drvdata(pdev, data);
+
+	data->ppmu = exynos5_ppmu_get();
+	if (!data->ppmu)
+		goto err_ppmu_get;
+
+	INIT_DELAYED_WORK(&data->work, exynos5_int_poll);
+	exynos5_int_poll_start(data);
+
+	data->devfreq = devfreq_add_device(dev, &exynos5_devfreq_int_profile,
+					   "simple_ondemand",
+					   &exynos5_int_ondemand_data);
+
+	if (IS_ERR(data->devfreq)) {
+		err = PTR_ERR(data->devfreq);
+		goto err_devfreq_add;
+	}
+
+	devfreq_register_opp_notifier(dev, data->devfreq);
+
+	err = register_pm_notifier(&data->pm_notifier);
+	if (err) {
+		dev_err(dev, "Failed to setup pm notifier\n");
+		goto err_devfreq_add;
+	}
+
+	/* TODO: Add a new QOS class for int/mif bus */
+	pm_qos_add_request(&data->int_req, PM_QOS_NETWORK_THROUGHPUT, -1);
+
+	return 0;
+
+err_devfreq_add:
+	devfreq_remove_device(data->devfreq);
+	exynos5_int_poll_stop(data);
+err_ppmu_get:
+	platform_set_drvdata(pdev, NULL);
+err_opp_add:
+	clk_put(data->int_clk);
+err_clock:
+	regulator_put(data->vdd_int);
+err_regulator:
+	return err;
+}
+
+static __devexit int exynos5_busfreq_int_remove(struct platform_device *pdev)
+{
+	struct busfreq_data_int *data = platform_get_drvdata(pdev);
+
+	pm_qos_remove_request(&data->int_req);
+	unregister_pm_notifier(&data->pm_notifier);
+	devfreq_remove_device(data->devfreq);
+	regulator_put(data->vdd_int);
+	clk_put(data->int_clk);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int exynos5_busfreq_int_suspend(struct device *dev)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct busfreq_data_int *data = platform_get_drvdata(pdev);
+
+	exynos5_int_poll_stop(data);
+	return 0;
+}
+
+static int exynos5_busfreq_int_resume(struct device *dev)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct busfreq_data_int *data = platform_get_drvdata(pdev);
+
+	exynos5_int_poll_start(data);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(exynos5_busfreq_int_pm, exynos5_busfreq_int_suspend,
+		exynos5_busfreq_int_resume);
+
+/* platform device pointer for exynos5 devfreq device. */
+static struct platform_device *exynos5_devfreq_pdev;
+
+static struct platform_driver exynos5_busfreq_int_driver = {
+	.probe		= exynos5_busfreq_int_probe,
+	.remove		= __devexit_p(exynos5_busfreq_int_remove),
+	.driver		= {
+		.name		= "exynos5-bus-int",
+		.owner		= THIS_MODULE,
+		.pm		= &exynos5_busfreq_int_pm,
+	},
+};
+
+static int __init exynos5_busfreq_int_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&exynos5_busfreq_int_driver);
+	if (ret < 0)
+		goto out;
+
+	exynos5_devfreq_pdev =
+		platform_device_register_simple("exynos5-bus-int", -1, NULL, 0);
+	if (IS_ERR_OR_NULL(exynos5_devfreq_pdev)) {
+		ret = PTR_ERR(exynos5_devfreq_pdev);
+		goto out1;
+	}
+
+	return 0;
+out1:
+	platform_driver_unregister(&exynos5_busfreq_int_driver);
+out:
+	return ret;
+}
+late_initcall(exynos5_busfreq_int_init);
+
+static void __exit exynos5_busfreq_int_exit(void)
+{
+	platform_device_unregister(exynos5_devfreq_pdev);
+	platform_driver_unregister(&exynos5_busfreq_int_driver);
+}
+module_exit(exynos5_busfreq_int_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("EXYNOS5 busfreq driver with devfreq framework");
-- 
1.8.1-rc3


^ permalink raw reply related

* [PATCH 4/5] cpufreq: conservative: call dbs_check_cpu only when necessary
From: Fabio Baltieri @ 2012-12-27 14:55 UTC (permalink / raw)
  To: Rafael J. Wysocki, cpufreq, linux-pm
  Cc: Linus Walleij, linux-kernel, Fabio Baltieri
In-Reply-To: <1356620142-8680-1-git-send-email-fabio.baltieri@linaro.org>

Modify conservative timer to not resample CPU utilization if recently
sampled from another SW coordinated core.

Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 drivers/cpufreq/cpufreq_conservative.c | 47 +++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index b9d7f14..5d8e894 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -111,22 +111,57 @@ static void cs_check_cpu(int cpu, unsigned int load)
 	}
 }
 
-static void cs_dbs_timer(struct work_struct *work)
+static void cs_timer_update(struct cs_cpu_dbs_info_s *dbs_info, bool sample,
+			    struct delayed_work *dw)
 {
-	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
-			struct cs_cpu_dbs_info_s, cdbs.work.work);
 	unsigned int cpu = dbs_info->cdbs.cpu;
 	int delay = delay_for_sampling_rate(cs_tuners.sampling_rate);
 
+	if (sample)
+		dbs_check_cpu(&cs_dbs_data, cpu);
+
+	schedule_delayed_work_on(smp_processor_id(), dw, delay);
+}
+
+static void cs_timer_coordinated(struct cs_cpu_dbs_info_s *dbs_info_local,
+				 struct delayed_work *dw)
+{
+	struct cs_cpu_dbs_info_s *dbs_info;
+	ktime_t time_now;
+	s64 delta_us;
+	bool sample = true;
+
+	/* use leader CPU's dbs_info */
+	dbs_info = &per_cpu(cs_cpu_dbs_info, dbs_info_local->cdbs.cpu);
 	mutex_lock(&dbs_info->cdbs.timer_mutex);
 
-	dbs_check_cpu(&cs_dbs_data, cpu);
+	time_now = ktime_get();
+	delta_us = ktime_us_delta(time_now, dbs_info->cdbs.time_stamp);
 
-	schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work,
-			delay);
+	/* Do nothing if we recently have sampled */
+	if (delta_us < (s64)(cs_tuners.sampling_rate / 2))
+		sample = false;
+	else
+		dbs_info->cdbs.time_stamp = time_now;
+
+	cs_timer_update(dbs_info, sample, dw);
 	mutex_unlock(&dbs_info->cdbs.timer_mutex);
 }
 
+static void cs_dbs_timer(struct work_struct *work)
+{
+	struct delayed_work *dw = to_delayed_work(work);
+	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
+			struct cs_cpu_dbs_info_s, cdbs.work.work);
+
+	if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) {
+		cs_timer_coordinated(dbs_info, dw);
+	} else {
+		mutex_lock(&dbs_info->cdbs.timer_mutex);
+		cs_timer_update(dbs_info, true, dw);
+		mutex_unlock(&dbs_info->cdbs.timer_mutex);
+	}
+}
 static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 		void *data)
 {
-- 
1.7.12.1


^ permalink raw reply related

* [PATCH v6 0/5] cpufreq: handle SW coordinated CPUs
From: Fabio Baltieri @ 2012-12-27 14:55 UTC (permalink / raw)
  To: Rafael J. Wysocki, cpufreq, linux-pm
  Cc: Linus Walleij, linux-kernel, Fabio Baltieri

Hello Rafael,

this is the v6 for the cpufreq SW coordinated CPU bug fix, that I was
holding for -rc1 as agreed.

Differences from v5:
- removed dangling special case in dbs_timer_init
- rebased on top of v3.8-rc1

Would you consider this set for -next?

Thanks,
Fabio

Fabio Baltieri (4):
  cpufreq: star/stop cpufreq timers on cpu hotplug
  cpufreq: ondemand: call dbs_check_cpu only when necessary
  cpufreq: conservative: call dbs_check_cpu only when necessary
  cpufreq: ondemand: use all CPUs in update_sampling_rate

Rickard Andersson (1):
  cpufreq: handle SW coordinated CPUs

 drivers/cpufreq/cpufreq_conservative.c | 46 ++++++++++++++--
 drivers/cpufreq/cpufreq_governor.c     | 98 +++++++++++++++++++++++++++++++---
 drivers/cpufreq/cpufreq_governor.h     |  2 +
 drivers/cpufreq/cpufreq_ondemand.c     | 62 ++++++++++++++++-----
 4 files changed, 185 insertions(+), 23 deletions(-)

-- 
1.7.12.1

^ permalink raw reply

* [PATCH 5/5] cpufreq: ondemand: use all CPUs in update_sampling_rate
From: Fabio Baltieri @ 2012-12-27 14:55 UTC (permalink / raw)
  To: Rafael J. Wysocki, cpufreq, linux-pm
  Cc: Linus Walleij, linux-kernel, Fabio Baltieri
In-Reply-To: <1356620142-8680-1-git-send-email-fabio.baltieri@linaro.org>

Modify update_sampling_rate() to check, and eventually immediately
schedule, all CPU's do_dbs_timer delayed work.

This is required in case of software coordinated CPUs, as we now have a
separate delayed work for each CPU.

Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 drivers/cpufreq/cpufreq_ondemand.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 13ceb3c..1017b90 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -326,7 +326,7 @@ static void update_sampling_rate(unsigned int new_rate)
 			cpufreq_cpu_put(policy);
 			continue;
 		}
-		dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu);
+		dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 		cpufreq_cpu_put(policy);

 		mutex_lock(&dbs_info->cdbs.timer_mutex);
@@ -345,8 +345,7 @@ static void update_sampling_rate(unsigned int new_rate)
 			cancel_delayed_work_sync(&dbs_info->cdbs.work);
 			mutex_lock(&dbs_info->cdbs.timer_mutex);

-			schedule_delayed_work_on(dbs_info->cdbs.cpu,
-					&dbs_info->cdbs.work,
+			schedule_delayed_work_on(cpu, &dbs_info->cdbs.work,
 					usecs_to_jiffies(new_rate));

 		}
-- 
1.7.12.1

^ permalink raw reply related

* [PATCH 3/5] cpufreq: ondemand: call dbs_check_cpu only when necessary
From: Fabio Baltieri @ 2012-12-27 14:55 UTC (permalink / raw)
  To: Rafael J. Wysocki, cpufreq, linux-pm
  Cc: Linus Walleij, linux-kernel, Fabio Baltieri
In-Reply-To: <1356620142-8680-1-git-send-email-fabio.baltieri@linaro.org>

Modify ondemand timer to not resample CPU utilization if recently
sampled from another SW coordinated core.

Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 drivers/cpufreq/cpufreq_governor.c |  3 ++
 drivers/cpufreq/cpufreq_governor.h |  1 +
 drivers/cpufreq/cpufreq_ondemand.c | 58 +++++++++++++++++++++++++++++++-------
 3 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index e881250..e5a3711 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -333,6 +333,9 @@ second_time:
 		mutex_unlock(&dbs_data->mutex);
 
 		if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
+			/* Initiate timer time stamp */
+			cpu_cdbs->time_stamp = ktime_get();
+
 			for_each_cpu(j, policy->cpus) {
 				struct cpu_dbs_common_info *j_cdbs;
 
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 5bf6fb8..aaf073d 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -82,6 +82,7 @@ struct cpu_dbs_common_info {
 	 * the governor or limits.
 	 */
 	struct mutex timer_mutex;
+	ktime_t time_stamp;
 };
 
 struct od_cpu_dbs_info_s {
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 93bb56d..13ceb3c 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -216,23 +216,23 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
 	}
 }
 
-static void od_dbs_timer(struct work_struct *work)
+static void od_timer_update(struct od_cpu_dbs_info_s *dbs_info, bool sample,
+			    struct delayed_work *dw)
 {
-	struct od_cpu_dbs_info_s *dbs_info =
-		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
 	unsigned int cpu = dbs_info->cdbs.cpu;
 	int delay, sample_type = dbs_info->sample_type;
 
-	mutex_lock(&dbs_info->cdbs.timer_mutex);
-
 	/* Common NORMAL_SAMPLE setup */
 	dbs_info->sample_type = OD_NORMAL_SAMPLE;
 	if (sample_type == OD_SUB_SAMPLE) {
 		delay = dbs_info->freq_lo_jiffies;
-		__cpufreq_driver_target(dbs_info->cdbs.cur_policy,
-			dbs_info->freq_lo, CPUFREQ_RELATION_H);
+		if (sample)
+			__cpufreq_driver_target(dbs_info->cdbs.cur_policy,
+						dbs_info->freq_lo,
+						CPUFREQ_RELATION_H);
 	} else {
-		dbs_check_cpu(&od_dbs_data, cpu);
+		if (sample)
+			dbs_check_cpu(&od_dbs_data, cpu);
 		if (dbs_info->freq_lo) {
 			/* Setup timer for SUB_SAMPLE */
 			dbs_info->sample_type = OD_SUB_SAMPLE;
@@ -243,11 +243,49 @@ static void od_dbs_timer(struct work_struct *work)
 		}
 	}
 
-	schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work,
-			delay);
+	schedule_delayed_work_on(smp_processor_id(), dw, delay);
+}
+
+static void od_timer_coordinated(struct od_cpu_dbs_info_s *dbs_info_local,
+				 struct delayed_work *dw)
+{
+	struct od_cpu_dbs_info_s *dbs_info;
+	ktime_t time_now;
+	s64 delta_us;
+	bool sample = true;
+
+	/* use leader CPU's dbs_info */
+	dbs_info = &per_cpu(od_cpu_dbs_info, dbs_info_local->cdbs.cpu);
+	mutex_lock(&dbs_info->cdbs.timer_mutex);
+
+	time_now = ktime_get();
+	delta_us = ktime_us_delta(time_now, dbs_info->cdbs.time_stamp);
+
+	/* Do nothing if we recently have sampled */
+	if (delta_us < (s64)(od_tuners.sampling_rate / 2))
+		sample = false;
+	else
+		dbs_info->cdbs.time_stamp = time_now;
+
+	od_timer_update(dbs_info, sample, dw);
 	mutex_unlock(&dbs_info->cdbs.timer_mutex);
 }
 
+static void od_dbs_timer(struct work_struct *work)
+{
+	struct delayed_work *dw = to_delayed_work(work);
+	struct od_cpu_dbs_info_s *dbs_info =
+		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
+
+	if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) {
+		od_timer_coordinated(dbs_info, dw);
+	} else {
+		mutex_lock(&dbs_info->cdbs.timer_mutex);
+		od_timer_update(dbs_info, true, dw);
+		mutex_unlock(&dbs_info->cdbs.timer_mutex);
+	}
+}
+
 /************************** sysfs interface ************************/
 
 static ssize_t show_sampling_rate_min(struct kobject *kobj,
-- 
1.7.12.1

^ permalink raw reply related

* [PATCH 2/5] cpufreq: star/stop cpufreq timers on cpu hotplug
From: Fabio Baltieri @ 2012-12-27 14:55 UTC (permalink / raw)
  To: Rafael J. Wysocki, cpufreq, linux-pm
  Cc: Linus Walleij, linux-kernel, Fabio Baltieri
In-Reply-To: <1356620142-8680-1-git-send-email-fabio.baltieri@linaro.org>

Add a CPU notifier to start and stop individual core timers on CPU
hotplug events when running on CPUs with SW coordinated frequency.

Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 drivers/cpufreq/cpufreq_governor.c | 51 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index b0e4506..e881250 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -25,9 +25,12 @@
 #include <linux/tick.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <linux/cpu.h>
 
 #include "cpufreq_governor.h"
 
+static DEFINE_PER_CPU(struct dbs_data *, cpu_cur_dbs);
+
 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
 	u64 idle_time;
@@ -185,6 +188,46 @@ static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs)
 	cancel_delayed_work_sync(&cdbs->work);
 }
 
+static int __cpuinit cpu_callback(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct device *cpu_dev = get_cpu_device(cpu);
+	struct dbs_data *dbs_data = per_cpu(cpu_cur_dbs, cpu);
+	struct cpu_dbs_common_info *cpu_cdbs = dbs_data->get_cpu_cdbs(cpu);
+	unsigned int sampling_rate;
+
+	if (dbs_data->governor == GOV_CONSERVATIVE) {
+		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
+		sampling_rate = cs_tuners->sampling_rate;
+	} else {
+		struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+		sampling_rate = od_tuners->sampling_rate;
+	}
+
+	if (cpu_dev) {
+		switch (action) {
+		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
+		case CPU_DOWN_FAILED:
+		case CPU_DOWN_FAILED_FROZEN:
+			dbs_timer_init(dbs_data, cpu_cdbs,
+					sampling_rate, cpu);
+			break;
+		case CPU_DOWN_PREPARE:
+		case CPU_DOWN_PREPARE_FROZEN:
+			dbs_timer_exit(cpu_cdbs);
+			break;
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __refdata ondemand_cpu_notifier = {
+	.notifier_call = cpu_callback,
+};
+
 int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 		struct cpufreq_policy *policy, unsigned int event)
 {
@@ -296,7 +339,11 @@ second_time:
 				j_cdbs = dbs_data->get_cpu_cdbs(j);
 				dbs_timer_init(dbs_data, j_cdbs,
 					       *sampling_rate, j);
+
+				per_cpu(cpu_cur_dbs, j) = dbs_data;
 			}
+
+			register_hotcpu_notifier(&ondemand_cpu_notifier);
 		} else {
 			dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate, cpu);
 		}
@@ -307,11 +354,15 @@ second_time:
 			cs_dbs_info->enable = 0;
 
 		if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
+			unregister_hotcpu_notifier(&ondemand_cpu_notifier);
+
 			for_each_cpu(j, policy->cpus) {
 				struct cpu_dbs_common_info *j_cdbs;
 
 				j_cdbs = dbs_data->get_cpu_cdbs(j);
 				dbs_timer_exit(j_cdbs);
+
+				per_cpu(cpu_cur_dbs, j) = NULL;
 			}
 		} else {
 			dbs_timer_exit(cpu_cdbs);
-- 
1.7.12.1

^ permalink raw reply related

* [PATCH 1/5] cpufreq: handle SW coordinated CPUs
From: Fabio Baltieri @ 2012-12-27 14:55 UTC (permalink / raw)
  To: Rafael J. Wysocki, cpufreq, linux-pm
  Cc: Linus Walleij, linux-kernel, Rickard Andersson, Fabio Baltieri
In-Reply-To: <1356620142-8680-1-git-send-email-fabio.baltieri@linaro.org>

From: Rickard Andersson <rickard.andersson@stericsson.com>

This patch fixes a bug that occurred when we had load on a secondary CPU
and the primary CPU was sleeping. Only one sampling timer was spawned
and it was spawned as a deferred timer on the primary CPU, so when a
secondary CPU had a change in load this was not detected by the cpufreq
governor (both ondemand and conservative).

This patch make sure that deferred timers are run on all CPUs in the
case of software controlled CPUs that run on the same frequency.

Signed-off-by: Rickard Andersson <rickard.andersson@stericsson.com>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 drivers/cpufreq/cpufreq_conservative.c |  3 ++-
 drivers/cpufreq/cpufreq_governor.c     | 44 +++++++++++++++++++++++++++++-----
 drivers/cpufreq/cpufreq_governor.h     |  1 +
 drivers/cpufreq/cpufreq_ondemand.c     |  3 ++-
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 64ef737..b9d7f14 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -122,7 +122,8 @@ static void cs_dbs_timer(struct work_struct *work)
 
 	dbs_check_cpu(&cs_dbs_data, cpu);
 
-	schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
+	schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work,
+			delay);
 	mutex_unlock(&dbs_info->cdbs.timer_mutex);
 }
 
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 6c5f1d3..b0e4506 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -161,13 +161,23 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 }
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 
+bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs)
+{
+	struct cpufreq_policy *policy = cdbs->cur_policy;
+
+	return cpumask_weight(policy->cpus) > 1;
+}
+EXPORT_SYMBOL_GPL(dbs_sw_coordinated_cpus);
+
 static inline void dbs_timer_init(struct dbs_data *dbs_data,
-		struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate)
+				  struct cpu_dbs_common_info *cdbs,
+				  unsigned int sampling_rate,
+				  int cpu)
 {
 	int delay = delay_for_sampling_rate(sampling_rate);
+	struct cpu_dbs_common_info *cdbs_local = dbs_data->get_cpu_cdbs(cpu);
 
-	INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer);
-	schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay);
+	schedule_delayed_work_on(cpu, &cdbs_local->work, delay);
 }
 
 static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs)
@@ -217,6 +227,10 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 			if (ignore_nice)
 				j_cdbs->prev_cpu_nice =
 					kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+
+			mutex_init(&j_cdbs->timer_mutex);
+			INIT_DEFERRABLE_WORK(&j_cdbs->work,
+					     dbs_data->gov_dbs_timer);
 		}
 
 		/*
@@ -275,15 +289,33 @@ second_time:
 		}
 		mutex_unlock(&dbs_data->mutex);
 
-		mutex_init(&cpu_cdbs->timer_mutex);
-		dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate);
+		if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
+			for_each_cpu(j, policy->cpus) {
+				struct cpu_dbs_common_info *j_cdbs;
+
+				j_cdbs = dbs_data->get_cpu_cdbs(j);
+				dbs_timer_init(dbs_data, j_cdbs,
+					       *sampling_rate, j);
+			}
+		} else {
+			dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate, cpu);
+		}
 		break;
 
 	case CPUFREQ_GOV_STOP:
 		if (dbs_data->governor == GOV_CONSERVATIVE)
 			cs_dbs_info->enable = 0;
 
-		dbs_timer_exit(cpu_cdbs);
+		if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
+			for_each_cpu(j, policy->cpus) {
+				struct cpu_dbs_common_info *j_cdbs;
+
+				j_cdbs = dbs_data->get_cpu_cdbs(j);
+				dbs_timer_exit(j_cdbs);
+			}
+		} else {
+			dbs_timer_exit(cpu_cdbs);
+		}
 
 		mutex_lock(&dbs_data->mutex);
 		mutex_destroy(&cpu_cdbs->timer_mutex);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index f661654..5bf6fb8 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -171,6 +171,7 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate)
 
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
+bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs);
 int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 		struct cpufreq_policy *policy, unsigned int event);
 #endif /* _CPUFREQ_GOVERNER_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 7731f7c..93bb56d 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -243,7 +243,8 @@ static void od_dbs_timer(struct work_struct *work)
 		}
 	}
 
-	schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
+	schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work,
+			delay);
 	mutex_unlock(&dbs_info->cdbs.timer_mutex);
 }
 
-- 
1.7.12.1

^ permalink raw reply related

* Re: [PATCH 0/6 v8] cpufreq: add support for Calxeda ECX-1000 (highbank)
From: Rafael J. Wysocki @ 2012-12-27 14:43 UTC (permalink / raw)
  To: Mark Langsdorf
  Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, linux-arm-kernel@lists.infradead.org
In-Reply-To: <21672683C5A3814BB4DB938EBE482DE435D97660AB@IAD2MBX09.mex02.mlsrvr.com>

On Thursday, December 27, 2012 08:28:22 AM Mark Langsdorf wrote:
> I thought I had addressed all the issues with the v9 version. The only
> comments I got on it was Mike's reviewed-by and I didn't think that
> warranted a resend.
> 
> If you do want me to send it again, do you want it  updated to current
> top-of-tree?

Yes, please.

> That may take a bit, since there's an unrelated bug that's preventing my
> test box from booting.

Sure.

Thanks,
Rafael


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply

* RE: [PATCH 0/6 v8] cpufreq: add support for Calxeda ECX-1000 (highbank)
From: Mark Langsdorf @ 2012-12-27 13:28 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org
In-Reply-To: <2705062.NIaFfcLb3a@vostro.rjw.lan>

I thought I had addressed all the issues with the v9 version. The only comments I got on it was Mike's reviewed-by and I didn't think that warranted a resend.

If you do want me to send it again, do you want it  updated to current top-of-tree? That may take a bit, since there's an unrelated bug that's preventing my test box from booting.

--Mark Langsdorf
Calxeda, Inc.

________________________________________
From: Rafael J. Wysocki [rjw@sisk.pl]
Sent: Thursday, December 27, 2012 7:12 AM
To: Mark Langsdorf
Cc: linux-kernel@vger.kernel.org; cpufreq@vger.kernel.org; linux-pm@vger.kernel.org; linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH 0/6 v8] cpufreq: add support for Calxeda ECX-1000 (highbank)

On Wednesday, December 05, 2012 10:48:35 AM Mark Langsdorf wrote:
> This patch series adds cpufreq support for the Calxeda
> ECX-1000 (highbank) SoCs. The EnergyCore Management Engine (ECME) on
> the ECX-1000 manages the voltage for the part and communications with
> Linux through a pl320 mailbox. clk notifications are used to control
> when to send messages to the ECME.

If you want me to handle this, can you please resend the whole patchset (with
all of the comments addressed, if possible)?

Rafael

--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply

* Re: [PATCH 0/6 v8] cpufreq: add support for Calxeda ECX-1000 (highbank)
From: Rafael J. Wysocki @ 2012-12-27 13:12 UTC (permalink / raw)
  To: Mark Langsdorf; +Cc: linux-kernel, cpufreq, linux-pm, linux-arm-kernel
In-Reply-To: <1354726121-17190-1-git-send-email-mark.langsdorf@calxeda.com>

On Wednesday, December 05, 2012 10:48:35 AM Mark Langsdorf wrote:
> This patch series adds cpufreq support for the Calxeda
> ECX-1000 (highbank) SoCs. The EnergyCore Management Engine (ECME) on
> the ECX-1000 manages the voltage for the part and communications with
> Linux through a pl320 mailbox. clk notifications are used to control
> when to send messages to the ECME.

If you want me to handle this, can you please resend the whole patchset (with
all of the comments addressed, if possible)?

Rafael


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply

* Re: [PATCH 4/8] Thermal: Add Thermal_trip sysfs node
From: Hongbo Zhang @ 2012-12-27  7:01 UTC (permalink / raw)
  To: Durgadoss R; +Cc: rui.zhang, linux-pm, linux-kernel, wni
In-Reply-To: <1355822977-4804-5-git-send-email-durgadoss.r@intel.com>

On 18 December 2012 17:29, Durgadoss R <durgadoss.r@intel.com> wrote:
> This patch adds a thermal_trip directory under
> /sys/class/thermal/zoneX. This directory contains
> the trip point values for sensors bound to this
> zone.
>
> Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
> ---
>  drivers/thermal/thermal_sys.c |  237 ++++++++++++++++++++++++++++++++++++++++-
>  include/linux/thermal.h       |   37 +++++++
>  2 files changed, 272 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
> index b39bf97..29ec073 100644
> --- a/drivers/thermal/thermal_sys.c
> +++ b/drivers/thermal/thermal_sys.c
> @@ -448,6 +448,22 @@ static void thermal_zone_device_check(struct work_struct *work)
>         thermal_zone_device_update(tz);
>  }
>
> +static int get_sensor_indx_by_kobj(struct thermal_zone *tz, const char *name)
> +{
> +       int i, indx = -EINVAL;
> +
> +       mutex_lock(&sensor_list_lock);
> +       for (i = 0; i < tz->sensor_indx; i++) {
> +               if (!strnicmp(name, kobject_name(tz->kobj_trip[i]),
> +                       THERMAL_NAME_LENGTH)) {
> +                       indx = i;
> +                       break;
> +               }
> +       }
> +       mutex_unlock(&sensor_list_lock);
> +       return indx;
> +}
> +
>  static void remove_sensor_from_zone(struct thermal_zone *tz,
>                                 struct thermal_sensor *ts)
>  {
> @@ -459,9 +475,15 @@ static void remove_sensor_from_zone(struct thermal_zone *tz,
>
>         sysfs_remove_link(&tz->device.kobj, kobject_name(&ts->device.kobj));
>
> +       /* Delete this sensor's trip Kobject */
> +       kobject_del(tz->kobj_trip[indx]);
> +
>         /* Shift the entries in the tz->sensors array */
> -       for (j = indx; j < MAX_SENSORS_PER_ZONE - 1; j++)
> +       for (j = indx; j < MAX_SENSORS_PER_ZONE - 1; j++) {
>                 tz->sensors[j] = tz->sensors[j + 1];
> +               tz->sensor_trip[j] = tz->sensor_trip[j + 1];
> +               tz->kobj_trip[j] = tz->kobj_trip[j + 1];
> +       }
>
>         tz->sensor_indx--;
>  }
> @@ -875,6 +897,120 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf)
>         return sprintf(buf, "%s\n", tz->governor->name);
>  }
>
> +static ssize_t
> +active_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> +       int i, indx, ret = 0;
> +       struct thermal_zone *tz;
> +       struct device *dev;
> +
> +       /* In this function, for
> +        * /sys/class/thermal/zoneX/thermal_trip/sensorY:
> +        * attr                 points to sysfs node 'active'
> +        * kobj                 points to sensorY
> +        * kobj->parent         points to thermal_trip
> +        * kobj->parent->parent points to zoneX
> +        */
> +
> +       /* Get the zone pointer */
> +       dev = container_of(kobj->parent->parent, struct device, kobj);
> +       tz = to_zone(dev);
> +       if (!tz)
> +               return -EINVAL;
> +
> +       /*
> +        * We need this because in the sysfs tree, 'sensorY' is
> +        * not really the sensor pointer. It just has the name
> +        * 'sensorY'; whereas 'zoneX' is actually the zone pointer.
> +        * This means container_of(kobj, struct device, kobj) will not
> +        * provide the actual sensor pointer.
> +        */
> +       indx = get_sensor_indx_by_kobj(tz, kobject_name(kobj));
> +       if (indx < 0)
> +               return indx;
> +
> +       if (tz->sensor_trip[indx]->num_active_trips <= 0)
> +               return sprintf(buf, "<Not available>\n");
> +
> +       ret += sprintf(buf, "0x%x", tz->sensor_trip[indx]->active_trip_mask);
> +       for (i = 0; i < tz->sensor_trip[indx]->num_active_trips; i++) {
> +               ret += sprintf(buf + ret, " %d",
> +                       tz->sensor_trip[indx]->active_trips[i]);
> +       }
> +
> +       ret += sprintf(buf + ret, "\n");
> +       return ret;
> +}
> +
> +static ssize_t
> +ptrip_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> +       int i, indx, ret = 0;
> +       struct thermal_zone *tz;
> +       struct device *dev;
> +
> +       /* Get the zone pointer */
> +       dev = container_of(kobj->parent->parent, struct device, kobj);
> +       tz = to_zone(dev);
> +       if (!tz)
> +               return -EINVAL;
> +
> +       indx = get_sensor_indx_by_kobj(tz, kobject_name(kobj));
> +       if (indx < 0)
> +               return indx;
> +
> +       if (tz->sensor_trip[indx]->num_passive_trips <= 0)
> +               return sprintf(buf, "<Not available>\n");
> +
> +       for (i = 0; i < tz->sensor_trip[indx]->num_passive_trips; i++) {
> +               ret += sprintf(buf + ret, "%d ",
> +                       tz->sensor_trip[indx]->passive_trips[i]);
> +       }
> +
> +       ret += sprintf(buf + ret, "\n");
> +       return ret;
> +}
> +
> +static ssize_t
> +hot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> +       int indx;
> +       struct thermal_zone *tz;
> +       struct device *dev;
> +
> +       /* Get the zone pointer */
> +       dev = container_of(kobj->parent->parent, struct device, kobj);
> +       tz = to_zone(dev);
> +       if (!tz)
> +               return -EINVAL;
> +
> +       indx = get_sensor_indx_by_kobj(tz, kobject_name(kobj));
> +       if (indx < 0)
> +               return indx;
> +
> +       return sprintf(buf, "%d\n", tz->sensor_trip[indx]->hot);
> +}
> +
> +static ssize_t
> +critical_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> +       int indx;
> +       struct thermal_zone *tz;
> +       struct device *dev;
> +
> +       /* Get the zone pointer */
> +       dev = container_of(kobj->parent->parent, struct device, kobj);
> +       tz = to_zone(dev);
> +       if (!tz)
> +               return -EINVAL;
> +
> +       indx = get_sensor_indx_by_kobj(tz, kobject_name(kobj));
> +       if (indx < 0)
> +               return indx;
> +
> +       return sprintf(buf, "%d\n", tz->sensor_trip[indx]->crit);
> +}
> +
>  static DEVICE_ATTR(type, 0444, type_show, NULL);
>  static DEVICE_ATTR(temp, 0444, temp_show, NULL);
>  static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
> @@ -885,7 +1021,27 @@ static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store);
>  static DEVICE_ATTR(sensor_name, 0444, sensor_name_show, NULL);
>  static DEVICE_ATTR(temp_input, 0444, sensor_temp_show, NULL);
>
> -static DEVICE_ATTR(zone_name, 0444, zone_name_show, NULL);
> +/* Thermal zone attributes */
> +static DEVICE_ATTR(zone_name, S_IRUGO, zone_name_show, NULL);
> +
> +/* Thermal trip attributes */
> +static struct kobj_attribute active_attr = __ATTR_RO(active);
> +/* TODO: rename this to passive while removing old code */
> +static struct kobj_attribute passive_attr = __ATTR_RO(ptrip);
> +static struct kobj_attribute hot_attr = __ATTR_RO(hot);
> +static struct kobj_attribute crit_attr = __ATTR_RO(critical);
> +
> +static struct attribute *trip_attrs[] = {
> +                       &active_attr.attr,
> +                       &passive_attr.attr,
> +                       &hot_attr.attr,
> +                       &crit_attr.attr,
> +                       NULL,
> +                       };
> +
> +static struct attribute_group trip_attr_group = {
> +                       .attrs = trip_attrs,
> +                       };
>
>  /* sys I/F for cooling device */
>  #define to_cooling_device(_dev)        \
> @@ -1770,12 +1926,19 @@ struct thermal_zone *create_thermal_zone(const char *name, void *devdata)
>         if (ret)
>                 goto exit_unregister;
>
> +       tz->kobj_thermal_trip = kobject_create_and_add("thermal_trip",
> +                                       &tz->device.kobj);
> +       if (!tz->kobj_thermal_trip)
> +               goto exit_name;
> +
>         /* Add this zone to the global list of thermal zones */
>         mutex_lock(&zone_list_lock);
>         list_add_tail(&tz->node, &thermal_zone_list);
>         mutex_unlock(&zone_list_lock);
>         return tz;
>
> +exit_name:
> +       device_remove_file(&tz->device, &dev_attr_zone_name);
>  exit_unregister:
>         device_unregister(&tz->device);
>  exit_idr:
> @@ -1789,6 +1952,7 @@ EXPORT_SYMBOL(create_thermal_zone);
>  void remove_thermal_zone(struct thermal_zone *tz)
>  {
>         struct thermal_zone *pos, *next;
> +       int i;
>         bool found = false;
>
>         if (!tz)
> @@ -1809,6 +1973,33 @@ void remove_thermal_zone(struct thermal_zone *tz)
>
>         device_remove_file(&tz->device, &dev_attr_zone_name);
>
> +       /* Just for ease of usage */
> +       i = tz->sensor_indx;
> +
> +       while (--i >= 0) {
> +               /* Remove /sys/class/thermal/zoneX/sensorY */
> +               sysfs_remove_link(&tz->device.kobj,
> +                               kobject_name(&tz->sensors[i]->device.kobj));
> +
> +               /* Remove /sys/class/thermal/zoneX/thermal_trip/sensorY */
> +               if (tz->kobj_trip[i]) {
> +                       sysfs_remove_group(tz->kobj_trip[i], &trip_attr_group);
> +                       kobject_del(tz->kobj_trip[i]);
> +               }
> +       }
> +
> +       /* Remove /sys/class/thermal/zoneX/thermal_trip */
> +       kobject_del(tz->kobj_thermal_trip);
> +
> +       /* Release the cdevs attached to this zone */
> +       i = tz->cdev_indx;
> +
> +       while (--i >= 0) {
> +               /* Remove /sys/class/thermal/zoneX/cooling_deviceY */
> +               sysfs_remove_link(&tz->device.kobj,
> +                               kobject_name(&tz->cdevs[i]->device.kobj));
> +       }
> +
>         release_idr(&thermal_zone_idr, &thermal_idr_lock, tz->id);
>         idr_destroy(&tz->idr);
>
> @@ -1920,6 +2111,48 @@ exit_zone:
>  }
>  EXPORT_SYMBOL(add_cdev_to_zone);
>
> +int add_sensor_trip_info(struct thermal_zone *tz, struct thermal_sensor *ts,
> +                       struct thermal_trip_point *trip)
> +{
> +       int indx, ret = -EINVAL;
> +
> +       if (!tz || !ts || !trip)
> +               return ret;
> +
> +       mutex_lock(&zone_list_lock);
> +
> +       GET_INDEX(tz, ts, indx, sensor);
> +       if (indx < 0)
> +               goto exit_indx;
> +
> +       /* Create kobj for /sys/class/thermal/zoneX/thermal_trip/sensorY */
> +       tz->kobj_trip[indx] = kobject_create_and_add(
> +                                       kobject_name(&ts->device.kobj),
> +                                       tz->kobj_thermal_trip);
> +       if (!tz->kobj_trip[indx]) {
> +               ret = -ENOMEM;
> +               goto exit_indx;
> +       }
> +
> +       ret = sysfs_create_group(tz->kobj_trip[indx], &trip_attr_group);
> +       if (ret) {
> +               dev_err(&tz->device, "sysfs_create_group failed:%d\n", ret);
> +               goto exit_kobj;
> +       }
> +
> +       tz->sensor_trip[indx] = trip;
> +       mutex_unlock(&zone_list_lock);
> +       return 0;
> +
> +exit_kobj:
> +       kobject_del(tz->kobj_trip[indx]);
> +       tz->kobj_trip[indx] = NULL;
> +exit_indx:
> +       mutex_unlock(&zone_list_lock);
> +       return ret;
> +}
> +EXPORT_SYMBOL(add_sensor_trip_info);
> +
>  /**
>   * thermal_sensor_register - register a new thermal sensor
>   * @name:      name of the thermal sensor
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index c4e45c7..8372f05 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -158,6 +158,30 @@ struct thermal_attr {
>         char name[THERMAL_NAME_LENGTH];
>  };
>
> +/*
> + * This structure defines the trip points for a sensor.
> + * The actual values for these trip points come from
> + * platform characterization. The thermal governors
> + * (either kernel or user space) may take appropriate
> + * actions when the sensors reach these trip points.
> + * See Documentation/thermal/sysfs-api2.txt for more details.
> + *
> + * As of now, For a particular sensor, we support:
> + * a) 1 hot trip point
> + * b) 1 critical trip point
> + * c) 'n' passive trip points
> + * d) 'm' active trip points
> + */
Durgadoss,
Currently the newly introduced governors don't treat passive/active
differently, what is the idea about this when you rebase governors to
new thermal zone/sensors? handle the passive/active differently or
eliminate the difference?

> +struct thermal_trip_point {
> +       int hot;
> +       int crit;
> +       int num_passive_trips;
> +       int *passive_trips;
> +       int num_active_trips;
> +       int *active_trips;
> +       int active_trip_mask;
> +};
> +
>  struct thermal_sensor {
>         char name[THERMAL_NAME_LENGTH];
>         int id;
> @@ -215,6 +239,16 @@ struct thermal_zone {
>         /* cdev level information */
>         int cdev_indx; /* index into 'cdevs' array */
>         struct thermal_cooling_device *cdevs[MAX_CDEVS_PER_ZONE];
> +
> +       /*
> +        * Thermal sensors trip information:
> +        * kobj_thermal_trip: /sys/class/thermal/zoneX/thermal_trip
> +        * kobj_trip: /sys/class/thermal/zoneX/thermal_trip/sensorY
> +        * sensor_trip: trip point information for each sensor
> +        */
> +       struct kobject *kobj_thermal_trip;
> +       struct kobject *kobj_trip[MAX_SENSORS_PER_ZONE];
> +       struct thermal_trip_point *sensor_trip[MAX_SENSORS_PER_ZONE];
>  };
>
>  /* Structure that holds thermal governor information */
> @@ -295,6 +329,9 @@ int add_sensor_to_zone(struct thermal_zone *, struct thermal_sensor *);
>
>  int add_cdev_to_zone(struct thermal_zone *, struct thermal_cooling_device *);
>
> +int add_sensor_trip_info(struct thermal_zone *, struct thermal_sensor *,
> +                       struct thermal_trip_point *);
> +
>  #ifdef CONFIG_NET
>  extern int thermal_generate_netlink_event(u32 orig, enum events event);
>  #else
> --
> 1.7.9.5
>

^ permalink raw reply

* Re: [PATCH 1/6 v9] arm: use devicetree to get smp_twd clock
From: Prashant Gaikwad @ 2012-12-27  5:11 UTC (permalink / raw)
  To: Mark Langsdorf
  Cc: linux-kernel@vger.kernel.org, cpufreq@vger.kernel.org,
	linux-pm@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
	Rob Herring
In-Reply-To: <1354833773-22845-2-git-send-email-mark.langsdorf@calxeda.com>

On Friday 07 December 2012 04:12 AM, Mark Langsdorf wrote:
> From: Rob Herring <rob.herring@calxeda.com>
>
> Signed-off-by: Rob Herring <rob.herring@calxeda.com>
> Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
> ---
> Changes from v4, v5, v6, v7, v8
>          None.
> Changes from v3
>          No longer setting *clk to NULL in twd_get_clock().
> Changes from v2
>          Turned the check for the node pointer into an if-then-else statement.
>          Removed the second, redundant clk_get_rate.
> Changes from v1
>          None.
>
>   arch/arm/kernel/smp_twd.c | 19 +++++++++++--------
>   1 file changed, 11 insertions(+), 8 deletions(-)

Hi Mark,

What is the status of this patch?

Regards,
PrashantG

> diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
> index b22d700..af46b80 100644
> --- a/arch/arm/kernel/smp_twd.c
> +++ b/arch/arm/kernel/smp_twd.c
> @@ -237,12 +237,15 @@ static irqreturn_t twd_handler(int irq, void *dev_id)
>   	return IRQ_NONE;
>   }
>

^ permalink raw reply

* Re: [PATCH] [RFC] cpufreq: can't raise max frequency with cpu_thermal
From: amit daniel kachhap @ 2012-12-26 19:32 UTC (permalink / raw)
  To: Doug Anderson
  Cc: Sonny Rao, linux-pm, linux-kernel@vger.kernel.org, Zhang Rui,
	Sameer Nanda
In-Reply-To: <CAD=FV=W+tO_3MfGYv_kc3XLw4VWHUXB2NUC+Etj9gW+1vSgvHQ@mail.gmail.com>

On Tue, Dec 18, 2012 at 9:45 PM, Doug Anderson <dianders@chromium.org> wrote:
> Amit,
>
> On Tue, Dec 18, 2012 at 8:17 PM, amit daniel kachhap
> <amit.daniel@samsung.com> wrote:
>> On Tue, Dec 18, 2012 at 12:29 AM, Sonny Rao <sonnyrao@chromium.org> wrote:
>>> The cpu_thermal generic thermal management code has a bug where once
>>> max cpu frequency has been lowered in sysfs (scaling_max_freq) it is
>>> not possible to raise the max back up later.  The bug is that the
>>> notifer gets called by __cpufreq_set_policy() before the user policy
>>> max is raised, and is incorrectly trying to enforce the max frequency
>>> policy even when we are trying to change the policy.  It is also not
>>> clear why this driver is looking at the user policy since it is
>>> primarily supposed to enforce thermal policy, not user set policy.
>>
>> Hi Sunny,
>>
>> I am not sure if this change is needed.
>
> Do you have a machine that's running with your code?  Can you go into
> sysfs (/sys/devices/system/cpu/cpu0/cpufreq/) and try lowering then
> raising the max frequency by doing something like this (assumes that
> you can scale down to 200MHz):
>
>   cd /sys/devices/system/cpu/cpu0/cpufreq/
>   OLD_VAL=$(cat scaling_max_freq)
>   cat scaling_min_freq > scaling_max_freq
>   echo ${OLD_VAL} > scaling_max_freq
>
>   echo "$(cat scaling_max_freq) should be ${OLD_VAL}.  Is it?"
>
> ...when I run the above without Sonny's patch on my system I see:
>   200000 should be 1700000. Is it?
>
> ...after Sonny's patch then the above works.
Hi Doug,

I tested the above steps on exynos origen board with all cpufreq
cooling configs enabled in kernel version 3.8-rc1.
In my tests I am able to vary scaling_max_freq to all values. Also I
am in normal temperature threshold. So basically I am not able to
reproduce the error reported,

Thanks,
Amit Daniel
>
>> There is a check in cpufreq_thermal_notifier function to return 0 if
>> notify_device == NOTIFY_INVALID. So the user will be always able to
>> change the max frequency in normal situation. Did you tested this for
>> some corner cases?
>> The reason behind putting this check is that I don't want to override
>> the user constraints.
>>
>> Thanks,
>> Amit Daniel
>>
>>>
>>> Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
>>> ---
>>>  drivers/thermal/cpu_cooling.c |    4 ----
>>>  1 files changed, 0 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
>>> index 836828e..63bc708 100644
>>> --- a/drivers/thermal/cpu_cooling.c
>>> +++ b/drivers/thermal/cpu_cooling.c
>>> @@ -219,10 +219,6 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
>>>         if (cpumask_test_cpu(policy->cpu, &notify_device->allowed_cpus))
>>>                 max_freq = notify_device->cpufreq_val;
>>>
>>> -       /* Never exceed user_policy.max*/
>>> -       if (max_freq > policy->user_policy.max)
>>> -               max_freq = policy->user_policy.max;
>>> -
>>>         if (policy->max != max_freq)
>>>                 cpufreq_verify_within_limits(policy, 0, max_freq);
>>>
>>> --
>>> 1.7.7.3
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> Please read the FAQ at  http://www.tux.org/lkml/
>
> -Doug
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply

* [PATCH] cpuidle - fix lock contention in the idle path
From: Daniel Lezcano @ 2012-12-26 10:01 UTC (permalink / raw)
  To: rafael.j.wysocki; +Cc: rja, linux-pm, pdeschrijver, akpm, linux-kernel

The commit bf4d1b5ddb78f86078ac6ae0415802d5f0c68f92 introduces
a lock in the cpuidle_get_cpu_driver function. This function
is used in the idle_call function.

The problem is the contention with a large number of cpus because
they try to access the idle routine at the same time.

The lock could be safely removed because of how is used the
cpuidle api. The cpuidle_register_driver is called first but
until the cpuidle_register_device is not called we don't
enter in the cpuidle idle call function because the device
is not enabled.

The cpuidle_unregister_driver function, leading the a NULL driver,
is not called before the cpuidle_unregister_device.

This is how is used the cpuidle api from the different drivers.

However, a cleanup around the lock and a proper refcounting
mechanism should be used to ensure the consistency in the api,
like cpuidle_unregister_driver should failed if its refcounting
is not 0.

These modifications will need some code reorganization and rewrite
which does not fit with a fix.

The following patch is a hot fix by returning to the initial behavior
by removing the lock when getting the driver.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/cpuidle/driver.c |    8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 3af841f..c2b281a 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -235,16 +235,10 @@ EXPORT_SYMBOL_GPL(cpuidle_get_driver);
  */
 struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
 {
-	struct cpuidle_driver *drv;
-
 	if (!dev)
 		return NULL;

-	spin_lock(&cpuidle_driver_lock);
-	drv = __cpuidle_get_cpu_driver(dev->cpu);
-	spin_unlock(&cpuidle_driver_lock);
-
-	return drv;
+	return __cpuidle_get_cpu_driver(dev->cpu);
 }
 EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);

-- 
1.7.9.5

^ permalink raw reply related

* RE: [PATCH 3/8] Thermal: Add APIs to bind cdev to new zone structure
From: R, Durgadoss @ 2012-12-26  3:30 UTC (permalink / raw)
  To: Wei Ni
  Cc: Zhang, Rui, linux-pm@vger.kernel.org,
	linux-kernel@vger.kernel.org, hongbo.zhang@linaro.org
In-Reply-To: <50D96438.3090105@nvidia.com>



> -----Original Message-----
> From: linux-pm-owner@vger.kernel.org [mailto:linux-pm-
> owner@vger.kernel.org] On Behalf Of Wei Ni
> Sent: Tuesday, December 25, 2012 2:01 PM
> To: R, Durgadoss
> Cc: Zhang, Rui; linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org;
> hongbo.zhang@linaro.org
> Subject: Re: [PATCH 3/8] Thermal: Add APIs to bind cdev to new zone
> structure
> 
> On 12/18/2012 05:29 PM, Durgadoss R wrote:
> > This patch creates new APIs to add/remove a
> > cdev to/from a zone. This patch does not change
> > the old cooling device implementation.
> >
> > Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
> > ---
> >  drivers/thermal/thermal_sys.c |   80
> +++++++++++++++++++++++++++++++++++++++++
> >  include/linux/thermal.h       |    8 +++++
> >  2 files changed, 88 insertions(+)
> >
> > diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
> > index 06d5a12..b39bf97 100644
> > --- a/drivers/thermal/thermal_sys.c
> > +++ b/drivers/thermal/thermal_sys.c
> > @@ -58,6 +58,7 @@ static LIST_HEAD(thermal_governor_list);
> >  static DEFINE_MUTEX(thermal_list_lock);
> >  static DEFINE_MUTEX(sensor_list_lock);
> >  static DEFINE_MUTEX(zone_list_lock);
> > +static DEFINE_MUTEX(cdev_list_lock);
> >  static DEFINE_MUTEX(thermal_governor_lock);
> >
> >  #define for_each_thermal_sensor(pos) \
> > @@ -82,6 +83,9 @@ static DEFINE_MUTEX(thermal_governor_lock);
> >  		mutex_unlock(&type##_list_lock);	\
> >  	} while (0)
> >
> > +#define for_each_cdev(pos) \
> > +	list_for_each_entry(pos, &thermal_cdev_list, node)
> > +
> >  static struct thermal_governor *__find_governor(const char *name)
> >  {
> >  	struct thermal_governor *pos;
> > @@ -462,6 +466,24 @@ static void remove_sensor_from_zone(struct
> thermal_zone *tz,
> >  	tz->sensor_indx--;
> >  }
> >
> > +static void remove_cdev_from_zone(struct thermal_zone *tz,
> > +				struct thermal_cooling_device *cdev)
> > +{
> > +	int j, indx;
> > +
> > +	GET_INDEX(tz, cdev, indx, cdev);
> > +	if (indx < 0)
> > +		return;
> > +
> > +	sysfs_remove_link(&tz->device.kobj, kobject_name(&cdev-
> >device.kobj));
> > +
> > +	/* Shift the entries in the tz->cdevs array */
> > +	for (j = indx; j < MAX_CDEVS_PER_ZONE - 1; j++)
> > +		tz->cdevs[j] = tz->cdevs[j + 1];
> > +
> > +	tz->cdev_indx--;
> > +}
> > +
> >  /* sys I/F for thermal zone */
> >
> >  #define to_thermal_zone(_dev) \
> > @@ -1458,6 +1480,7 @@ void thermal_cooling_device_unregister(struct
> thermal_cooling_device *cdev)
> >  	int i;
> >  	const struct thermal_zone_params *tzp;
> >  	struct thermal_zone_device *tz;
> > +	struct thermal_zone *tmp_tz;
> >  	struct thermal_cooling_device *pos = NULL;
> >
> >  	if (!cdev)
> > @@ -1495,6 +1518,13 @@ void thermal_cooling_device_unregister(struct
> thermal_cooling_device *cdev)
> >
> >  	mutex_unlock(&thermal_list_lock);
> >
> > +	mutex_lock(&zone_list_lock);
> > +
> > +	for_each_thermal_zone(tmp_tz)
> > +		remove_cdev_from_zone(tmp_tz, cdev);
> > +
> > +	mutex_unlock(&zone_list_lock);
> > +
> >  	if (cdev->type[0])
> >  		device_remove_file(&cdev->device, &dev_attr_cdev_type);
> >  	device_remove_file(&cdev->device, &dev_attr_max_state);
> > @@ -1790,6 +1820,23 @@ exit:
> >  }
> >  EXPORT_SYMBOL(remove_thermal_zone);
> >
> > +struct thermal_cooling_device *get_cdev_by_name(const char *name)
> > +{
> > +	struct thermal_cooling_device *pos;
> > +	struct thermal_cooling_device *cdev = NULL;
> > +
> > +	mutex_lock(&cdev_list_lock);
> > +	for_each_cdev(pos) {
> > +		if (!strnicmp(pos->type, name, THERMAL_NAME_LENGTH)) {
> > +			cdev = pos;
> > +			break;
> > +		}
> > +	}
> > +	mutex_unlock(&cdev_list_lock);
> > +	return cdev;
> > +}
> > +EXPORT_SYMBOL(get_cdev_by_name);
> 
> It seems you forgot to add get_cdev_by_name() and
> get_sensor_by_name()
> to the include file.

Thanks.. Will take care of this in v2.

Regards,
Durga

^ permalink raw reply

* RE: [PATCH 8/8] Thermal: Dummy driver used for testing
From: R, Durgadoss @ 2012-12-26  3:29 UTC (permalink / raw)
  To: Wei Ni
  Cc: Zhang, Rui, linux-pm@vger.kernel.org,
	linux-kernel@vger.kernel.org, hongbo.zhang@linaro.org
In-Reply-To: <50D965F1.6000806@nvidia.com>



> -----Original Message-----
> From: Wei Ni [mailto:wni@nvidia.com]
> Sent: Tuesday, December 25, 2012 2:08 PM
> To: R, Durgadoss
> Cc: Zhang, Rui; linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org;
> hongbo.zhang@linaro.org
> Subject: Re: [PATCH 8/8] Thermal: Dummy driver used for testing
> 
> On 12/18/2012 05:29 PM, Durgadoss R wrote:
> > This patch has a dummy driver that can be used for
> > testing purposes. This patch is not for merge.
> >
> > Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
> > ---
> >  drivers/thermal/Kconfig        |    5 +
> >  drivers/thermal/Makefile       |    3 +
> >  drivers/thermal/thermal_test.c |  315
> ++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 323 insertions(+)
> >  create mode 100644 drivers/thermal/thermal_test.c
> >
> > diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> > index c5ba3340..3b92a76 100644
> > --- a/drivers/thermal/Kconfig
> > +++ b/drivers/thermal/Kconfig
> > @@ -136,4 +136,9 @@ config DB8500_CPUFREQ_COOLING
> >  	  bound cpufreq cooling device turns active to set CPU frequency low
> to
> >  	  cool down the CPU.
> >
> > +config THERMAL_TEST
> > +	tristate "test driver"
> > +	help
> > +	  Enable this to test the thermal framework.
> > +
> >  endif
> > diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> > index d8da683..02c3edb 100644
> > --- a/drivers/thermal/Makefile
> > +++ b/drivers/thermal/Makefile
> > @@ -18,3 +18,6 @@ obj-$(CONFIG_RCAR_THERMAL)	+=
> rcar_thermal.o
> >  obj-$(CONFIG_EXYNOS_THERMAL)	+= exynos_thermal.o
> >  obj-$(CONFIG_DB8500_THERMAL)	+= db8500_thermal.o
> >  obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+=
> db8500_cpufreq_cooling.o
> > +
> > +# dummy driver for testing
> > +obj-$(CONFIG_THERMAL_TEST)	+= thermal_test.o
> > diff --git a/drivers/thermal/thermal_test.c
> b/drivers/thermal/thermal_test.c
> > new file mode 100644
> > index 0000000..5a11e34
> > --- /dev/null
> > +++ b/drivers/thermal/thermal_test.c
> > @@ -0,0 +1,315 @@
> > +/*
> > + * thermal_test.c - This driver can be used to test Thermal
> > + *			   Framework changes. Not specific to any
> > + *			   platform. Fills the log buffer generously ;)
> > + *
> > + * Copyright (C) 2012 Intel Corporation
> > + *
> > + *
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> ~~~~~~~~~~~~~~~~
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; version 2 of the License.
> > + *
> > + * This program is distributed in the hope that it will be useful, but
> > + * WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See
> the GNU
> > + * General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> along
> > + * with this program; if not, write to the Free Software Foundation, Inc.,
> > + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
> > + *
> > + *
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> ~~~~~~~~~~~~~~~~
> > + * Author: Durgadoss R <durgadoss.r@intel.com>
> > + */
> > +
> > +#define pr_fmt(fmt)  "thermal_test: " fmt
> > +
> > +#include <linux/module.h>
> > +#include <linux/init.h>
> > +#include <linux/err.h>
> > +#include <linux/param.h>
> > +#include <linux/device.h>
> > +#include <linux/slab.h>
> > +#include <linux/pm.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/thermal.h>
> > +
> > +#define MAX_THERMAL_ZONES	2
> > +#define MAX_THERMAL_SENSORS	2
> > +#define MAX_COOLING_DEVS	4
> > +#define NUM_THRESHOLDS		3
> > +
> > +static struct ts_data {
> > +	int curr_temp;
> > +	int flag;
> > +} ts_data;
> > +
> > +int active_trips[10] = {100, 90, 80, 70, 60, 50, 40, 30, 20, 10};
> > +int passive_trips[5] = {100, 90, 60, 50, 40};
> > +
> > +static struct platform_device *pdev;
> > +static unsigned long cur_cdev_state = 2;
> > +static struct thermal_sensor *ts, *ts1;
> > +static struct thermal_zone *tz;
> > +static struct thermal_cooling_device *cdev;
> > +
> > +static long thermal_thresholds[NUM_THRESHOLDS] = {30000, 40000,
> 50000};
> > +
> > +static struct thermal_trip_point trip = {
> > +	.hot = 90,
> > +	.crit = 100,
> > +	.num_passive_trips = 5,
> > +	.passive_trips = passive_trips,
> > +	.num_active_trips = 10,
> > +	.active_trips = active_trips,
> > +	.active_trip_mask = 0xCFF,
> > +};
> > +
> > +static struct thermal_trip_point trip1 = {
> > +	.hot = 95,
> > +	.crit = 125,
> > +	.num_passive_trips = 0,
> > +	.passive_trips = passive_trips,
> > +	.num_active_trips = 6,
> > +	.active_trips = active_trips,
> > +	.active_trip_mask = 0xFF,
> > +};
> > +
> > +static int read_cur_state(struct thermal_cooling_device *cdev,
> > +			unsigned long *state)
> > +{
> > +	*state = cur_cdev_state;
> > +	return 0;
> > +}
> > +
> > +static int write_cur_state(struct thermal_cooling_device *cdev,
> > +			unsigned long state)
> > +{
> > +	cur_cdev_state = state;
> > +	return 0;
> > +}
> > +
> > +static int read_max_state(struct thermal_cooling_device *cdev,
> > +			unsigned long *state)
> > +{
> > +	*state = 5;
> > +	return 0;
> > +}
> > +
> > +static int read_curr_temp(struct thermal_sensor *ts, long *temp)
> > +{
> > +	*temp = ts_data.curr_temp;
> > +	return 0;
> > +}
> > +
> > +static ssize_t
> > +flag_show(struct device *dev, struct device_attribute *devattr, char
> *buf)
> > +{
> > +	return sprintf(buf, "%d\n", ts_data.flag);
> > +}
> > +
> > +static ssize_t
> > +flag_store(struct device *dev, struct device_attribute *attr,
> > +		    const char *buf, size_t count)
> > +{
> > +	long flag;
> > +
> > +	if (kstrtol(buf, 10, &flag))
> > +		return -EINVAL;
> > +
> > +	ts_data.flag = flag;
> > +
> > +	if (flag == 0) {
> > +		thermal_sensor_unregister(ts);
> > +		ts = NULL;
> > +		pr_err("thermal_sensor_unregister (ts) done\n");
> > +	} else if (flag == 1) {
> > +		thermal_sensor_unregister(ts1);
> > +		ts1 = NULL;
> > +		pr_err("thermal_sensor_unregister (ts1) done\n");
> > +	} else if (flag == 2) {
> > +		thermal_cooling_device_unregister(cdev);
> > +		cdev = NULL;
> > +		pr_err("cdev unregister (cdev) done\n");
> > +	} else if (flag == 3) {
> > +		if (tz)
> > +			remove_thermal_zone(tz);
> > +		tz = NULL;
> > +		pr_err("removed thermal zone\n");
> > +	}
> > +
> > +	return count;
> > +}
> 
> What does this flag_show()/flag_store() mean?
> I noticed that you didn't call xxx_unregister() in the remove callback.
> Do you mean we need to provide these functions in the platform thermal
> driver? or this is just for test?

At Runtime, I wanted to test register/unregister APIs. That's why I used this
kind of a mechanism. This is _only_ for test.

Thanks,
Durga

^ permalink raw reply

* Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
From: Aaron Lu @ 2012-12-26  1:42 UTC (permalink / raw)
  To: Tejun Heo
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
In-Reply-To: <20121225171723.GI10220@mtj.dyndns.org>

On 12/26/2012 01:17 AM, Tejun Heo wrote:
> Hello, Aaron.
> 
> On Thu, Dec 20, 2012 at 02:07:25PM +0800, Aaron Lu wrote:
>> +static int is_gendisk_part0(struct device *dev, void *data)
>> +{
>> +	struct device **child = data;
>> +
>> +	if (dev->class == &block_class && dev->type == &disk_type) {
>> +		*child = dev;
>> +		return 1;
>> +	} else
>> +		return 0;
>> +}
>> +
>> +/**
>> + * disk_from_device - Get the gendisk pointer for this device.
>> + * @dev: the device this gendisk is created for, i.e. gendisk->driverfs_dev
>> + *
>> + * LLD sometimes need to play with the gendisk without HLD's aware,
>> + * this routine gives LLD the required access to gendisk.
>> + *
>> + * CONTEXT:
>> + * Don't care.
>> + */
>> +struct gendisk *disk_from_device(struct device *dev)
>> +{
>> +	struct device *child;
>> +
>> +	if (device_for_each_child(dev, &child, is_gendisk_part0))
>> +		return dev_to_disk(child);
>> +	else
>> +		return NULL;
>> +}
>> +EXPORT_SYMBOL(disk_from_device);
> 
> This is really a round-about way to find out the matching device and
> it wouldn't work if the disk device nests deeper.  Doesn't really look
> like a good idea to me.

I don't quite understand the 'disk device nests deeper' case, can you
please elaborate? My understanding is, as long as the disk's part0
device has a parent, this function should work. For LLDs want to take
advantage of this function, it should pass the device that is the parent
of part0 as the param, this function itself doesn't try to dig further.
The only problem I can see is when there are multiple gendisks created
for a single device, which I don't know if there is such a case?

> 
>> Then together with disk_try_block_events and disk_unblock_events, we can
>> avoid touching SCSI layer to let ODD stay in zero power state.
> 
> Also, I'd much prefer something along the line of
> block_events_nowait() instead of try_block.

Sure, no problem.

Thanks,
Aaron


^ permalink raw reply

* Re: [PATCH 16/25] PM / Domains: don't use [delayed_]work_pending()
From: Tejun Heo @ 2012-12-26  1:23 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: linux-pm, linux-kernel
In-Reply-To: <43765611.nGGJ409TUC@vostro.rjw.lan>

Hello,

On Tue, Dec 25, 2012 at 09:33:07PM +0100, Rafael J. Wysocki wrote:
> OK, so I'd generally prefer changelogs like this:
> 
> "There's no need to test whether a (delayed) work item is pending
> before queueing, flushing or cancelling it, so remove work_pending()
> tests used in those cases."
> 
> If that's fine with you, I'll queue up [16/25] and [11/25] for v3.9
> with the above as the changelog.

Sure thing.  Please go ahead.

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [PATCH 16/25] PM / Domains: don't use [delayed_]work_pending()
From: Rafael J. Wysocki @ 2012-12-25 20:33 UTC (permalink / raw)
  To: Tejun Heo, linux-pm; +Cc: linux-kernel
In-Reply-To: <20121225170328.GH10220@mtj.dyndns.org>

On Tuesday, December 25, 2012 09:03:28 AM Tejun Heo wrote:
> Hello, Rafael.
> 
> On Sat, Dec 22, 2012 at 12:57:20PM +0100, Rafael J. Wysocki wrote:
> > On Friday, December 21, 2012 05:57:06 PM Tejun Heo wrote:
> > > There's no need to test whether a (delayed) work item in pending
> > > before queueing, flushing or cancelling it.  Most uses are unnecessary
> > > and quite a few of them are buggy.
> > 
> > Is the particular one you're removing from domain.c buggy?
> 
> It's a bit difficult to tell without understanding the code base but
> from quick glancing it looks like it could be.  The queueing and
> actual excution don't grab the same lock, so there doesn't seem to be
> anything work_pending() returning %true for a work item which already
> started executing.  Even if the bug is there, it's likely to be very
> difficult to trigger tho, so I wouldn't consider it an urgent fix.

OK, so I'd generally prefer changelogs like this:

"There's no need to test whether a (delayed) work item is pending
before queueing, flushing or cancelling it, so remove work_pending()
tests used in those cases."

If that's fine with you, I'll queue up [16/25] and [11/25] for v3.9
with the above as the changelog.

Thanks,
Rafael


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply

* Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
From: Tejun Heo @ 2012-12-25 17:17 UTC (permalink / raw)
  To: Aaron Lu
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
In-Reply-To: <50D2AB1D.9050602@intel.com>

Hello, Aaron.

On Thu, Dec 20, 2012 at 02:07:25PM +0800, Aaron Lu wrote:
> +static int is_gendisk_part0(struct device *dev, void *data)
> +{
> +	struct device **child = data;
> +
> +	if (dev->class == &block_class && dev->type == &disk_type) {
> +		*child = dev;
> +		return 1;
> +	} else
> +		return 0;
> +}
> +
> +/**
> + * disk_from_device - Get the gendisk pointer for this device.
> + * @dev: the device this gendisk is created for, i.e. gendisk->driverfs_dev
> + *
> + * LLD sometimes need to play with the gendisk without HLD's aware,
> + * this routine gives LLD the required access to gendisk.
> + *
> + * CONTEXT:
> + * Don't care.
> + */
> +struct gendisk *disk_from_device(struct device *dev)
> +{
> +	struct device *child;
> +
> +	if (device_for_each_child(dev, &child, is_gendisk_part0))
> +		return dev_to_disk(child);
> +	else
> +		return NULL;
> +}
> +EXPORT_SYMBOL(disk_from_device);

This is really a round-about way to find out the matching device and
it wouldn't work if the disk device nests deeper.  Doesn't really look
like a good idea to me.

> Then together with disk_try_block_events and disk_unblock_events, we can
> avoid touching SCSI layer to let ODD stay in zero power state.

Also, I'd much prefer something along the line of
block_events_nowait() instead of try_block.

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [PATCH 16/25] PM / Domains: don't use [delayed_]work_pending()
From: Tejun Heo @ 2012-12-25 17:03 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: linux-kernel, Rafael J. Wysocki, linux-pm
In-Reply-To: <21556832.hKrqJSonFg@vostro.rjw.lan>

Hello, Rafael.

On Sat, Dec 22, 2012 at 12:57:20PM +0100, Rafael J. Wysocki wrote:
> On Friday, December 21, 2012 05:57:06 PM Tejun Heo wrote:
> > There's no need to test whether a (delayed) work item in pending
> > before queueing, flushing or cancelling it.  Most uses are unnecessary
> > and quite a few of them are buggy.
> 
> Is the particular one you're removing from domain.c buggy?

It's a bit difficult to tell without understanding the code base but
from quick glancing it looks like it could be.  The queueing and
actual excution don't grab the same lock, so there doesn't seem to be
anything work_pending() returning %true for a work item which already
started executing.  Even if the bug is there, it's likely to be very
difficult to trigger tho, so I wouldn't consider it an urgent fix.

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [PATCH 11/25] pm: don't use [delayed_]work_pending()
From: Tejun Heo @ 2012-12-25 16:44 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: linux-kernel, linux-pm
In-Reply-To: <1652427.IL3F6yol0I@vostro.rjw.lan>

Hello, Rafael.

On Sat, Dec 22, 2012 at 12:53:29PM +0100, Rafael J. Wysocki wrote:
> On Friday, December 21, 2012 05:57:01 PM Tejun Heo wrote:
> > There's no need to test whether a (delayed) work item in pending
> > before queueing, flushing or cancelling it.  Most uses are unnecessary
> > and quite a few of them are buggy.
> 
> Can you please say why they are buggy?

Usually one of the following two reasons.

* The user gets confused and fails to handle !PENDING && currently
  executing properly.

* work_pending() doesn't have any memory barrier and the caller
  assumes work_pending() is somehow properly synchronized by itself.

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [PATCH 8/8] Thermal: Dummy driver used for testing
From: Wei Ni @ 2012-12-25  8:38 UTC (permalink / raw)
  To: Durgadoss R
  Cc: rui.zhang@intel.com, linux-pm@vger.kernel.org,
	linux-kernel@vger.kernel.org, hongbo.zhang@linaro.org
In-Reply-To: <1355822977-4804-9-git-send-email-durgadoss.r@intel.com>

On 12/18/2012 05:29 PM, Durgadoss R wrote:
> This patch has a dummy driver that can be used for
> testing purposes. This patch is not for merge.
> 
> Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
> ---
>  drivers/thermal/Kconfig        |    5 +
>  drivers/thermal/Makefile       |    3 +
>  drivers/thermal/thermal_test.c |  315 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 323 insertions(+)
>  create mode 100644 drivers/thermal/thermal_test.c
> 
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index c5ba3340..3b92a76 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -136,4 +136,9 @@ config DB8500_CPUFREQ_COOLING
>  	  bound cpufreq cooling device turns active to set CPU frequency low to
>  	  cool down the CPU.
>  
> +config THERMAL_TEST
> +	tristate "test driver"
> +	help
> +	  Enable this to test the thermal framework.
> +
>  endif
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index d8da683..02c3edb 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -18,3 +18,6 @@ obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
>  obj-$(CONFIG_EXYNOS_THERMAL)	+= exynos_thermal.o
>  obj-$(CONFIG_DB8500_THERMAL)	+= db8500_thermal.o
>  obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
> +
> +# dummy driver for testing
> +obj-$(CONFIG_THERMAL_TEST)	+= thermal_test.o
> diff --git a/drivers/thermal/thermal_test.c b/drivers/thermal/thermal_test.c
> new file mode 100644
> index 0000000..5a11e34
> --- /dev/null
> +++ b/drivers/thermal/thermal_test.c
> @@ -0,0 +1,315 @@
> +/*
> + * thermal_test.c - This driver can be used to test Thermal
> + *			   Framework changes. Not specific to any
> + *			   platform. Fills the log buffer generously ;)
> + *
> + * Copyright (C) 2012 Intel Corporation
> + *
> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, write to the Free Software Foundation, Inc.,
> + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
> + *
> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> + * Author: Durgadoss R <durgadoss.r@intel.com>
> + */
> +
> +#define pr_fmt(fmt)  "thermal_test: " fmt
> +
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/err.h>
> +#include <linux/param.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <linux/pm.h>
> +#include <linux/platform_device.h>
> +#include <linux/thermal.h>
> +
> +#define MAX_THERMAL_ZONES	2
> +#define MAX_THERMAL_SENSORS	2
> +#define MAX_COOLING_DEVS	4
> +#define NUM_THRESHOLDS		3
> +
> +static struct ts_data {
> +	int curr_temp;
> +	int flag;
> +} ts_data;
> +
> +int active_trips[10] = {100, 90, 80, 70, 60, 50, 40, 30, 20, 10};
> +int passive_trips[5] = {100, 90, 60, 50, 40};
> +
> +static struct platform_device *pdev;
> +static unsigned long cur_cdev_state = 2;
> +static struct thermal_sensor *ts, *ts1;
> +static struct thermal_zone *tz;
> +static struct thermal_cooling_device *cdev;
> +
> +static long thermal_thresholds[NUM_THRESHOLDS] = {30000, 40000, 50000};
> +
> +static struct thermal_trip_point trip = {
> +	.hot = 90,
> +	.crit = 100,
> +	.num_passive_trips = 5,
> +	.passive_trips = passive_trips,
> +	.num_active_trips = 10,
> +	.active_trips = active_trips,
> +	.active_trip_mask = 0xCFF,
> +};
> +
> +static struct thermal_trip_point trip1 = {
> +	.hot = 95,
> +	.crit = 125,
> +	.num_passive_trips = 0,
> +	.passive_trips = passive_trips,
> +	.num_active_trips = 6,
> +	.active_trips = active_trips,
> +	.active_trip_mask = 0xFF,
> +};
> +
> +static int read_cur_state(struct thermal_cooling_device *cdev,
> +			unsigned long *state)
> +{
> +	*state = cur_cdev_state;
> +	return 0;
> +}
> +
> +static int write_cur_state(struct thermal_cooling_device *cdev,
> +			unsigned long state)
> +{
> +	cur_cdev_state = state;
> +	return 0;
> +}
> +
> +static int read_max_state(struct thermal_cooling_device *cdev,
> +			unsigned long *state)
> +{
> +	*state = 5;
> +	return 0;
> +}
> +
> +static int read_curr_temp(struct thermal_sensor *ts, long *temp)
> +{
> +	*temp = ts_data.curr_temp;
> +	return 0;
> +}
> +
> +static ssize_t
> +flag_show(struct device *dev, struct device_attribute *devattr, char *buf)
> +{
> +	return sprintf(buf, "%d\n", ts_data.flag);
> +}
> +
> +static ssize_t
> +flag_store(struct device *dev, struct device_attribute *attr,
> +		    const char *buf, size_t count)
> +{
> +	long flag;
> +
> +	if (kstrtol(buf, 10, &flag))
> +		return -EINVAL;
> +
> +	ts_data.flag = flag;
> +
> +	if (flag == 0) {
> +		thermal_sensor_unregister(ts);
> +		ts = NULL;
> +		pr_err("thermal_sensor_unregister (ts) done\n");
> +	} else if (flag == 1) {
> +		thermal_sensor_unregister(ts1);
> +		ts1 = NULL;
> +		pr_err("thermal_sensor_unregister (ts1) done\n");
> +	} else if (flag == 2) {
> +		thermal_cooling_device_unregister(cdev);
> +		cdev = NULL;
> +		pr_err("cdev unregister (cdev) done\n");
> +	} else if (flag == 3) {
> +		if (tz)
> +			remove_thermal_zone(tz);
> +		tz = NULL;
> +		pr_err("removed thermal zone\n");
> +	}
> +
> +	return count;
> +}

What does this flag_show()/flag_store() mean?
I noticed that you didn't call xxx_unregister() in the remove callback.
Do you mean we need to provide these functions in the platform thermal
driver? or this is just for test?

Thanks.
Wei.

> +
> +static ssize_t
> +temp_show(struct device *dev, struct device_attribute *devattr, char *buf)
> +{
> +	return sprintf(buf, "%d\n", ts_data.curr_temp);
> +}
> +
> +static int read_threshold(struct thermal_sensor *ts, int indx, long *val)
> +{
> +	if (indx < 0 || indx >= NUM_THRESHOLDS)
> +		return -EINVAL;
> +
> +	*val = thermal_thresholds[indx];
> +	return 0;
> +}
> +
> +static int write_threshold(struct thermal_sensor *ts, int indx, long val)
> +{
> +	if (indx < 0 || indx >= NUM_THRESHOLDS)
> +		return -EINVAL;
> +
> +	thermal_thresholds[indx] = val;
> +	return 0;
> +}
> +
> +static ssize_t
> +temp_store(struct device *dev, struct device_attribute *attr,
> +		    const char *buf, size_t count)
> +{
> +	long temp;
> +
> +	if (kstrtol(buf, 10, &temp))
> +		return -EINVAL;
> +
> +	ts_data.curr_temp = temp;
> +	return count;
> +}
> +
> +static struct thermal_sensor_ops ts_ops = {
> +	.get_temp = read_curr_temp,
> +	.get_threshold = read_threshold,
> +	.set_threshold = write_threshold,
> +};
> +
> +static struct thermal_sensor_ops ts1_ops = {
> +	.get_temp = read_curr_temp,
> +	.get_threshold = read_threshold,
> +	.set_threshold = write_threshold,
> +};
> +
> +static struct thermal_cooling_device_ops cdev_ops = {
> +	.get_cur_state = read_cur_state,
> +	.set_cur_state = write_cur_state,
> +	.get_max_state = read_max_state,
> +};
> +
> +static DEVICE_ATTR(test_temp, S_IRUGO | S_IWUSR, temp_show, temp_store);
> +static DEVICE_ATTR(sensor_enable, S_IRUGO | S_IWUSR, flag_show, flag_store);
> +
> +static int thermal_test_probe(struct platform_device *pdev)
> +{
> +	int ret;
> +
> +	ts_data.curr_temp = 30000;
> +	ts_data.flag = 1;
> +
> +	ts = thermal_sensor_register("ts", NUM_THRESHOLDS, &ts_ops, &ts_data);
> +	if (!ts) {
> +		pr_err("thermal_sensor_register failed:\n");
> +		return -EINVAL;
> +	}
> +
> +	ts1 = thermal_sensor_register("ts1", NUM_THRESHOLDS, &ts1_ops, NULL);
> +
> +	cdev = thermal_cooling_device_register("cdev", NULL, &cdev_ops);
> +	if (!cdev) {
> +		pr_err("cdev_register failed:\n");
> +		return -EINVAL;
> +	}
> +
> +	device_create_file(&pdev->dev, &dev_attr_test_temp);
> +	device_create_file(&pdev->dev, &dev_attr_sensor_enable);
> +
> +	/* Create a zone */
> +	tz = create_thermal_zone("myZone", NULL);
> +	if (!tz) {
> +		pr_err("create_thermal_zone failed:\n");
> +		return -EINVAL;
> +	}
> +
> +	pr_err("Zone created successfully..\n");
> +
> +	ret = add_sensor_to_zone(tz, ts);
> +	if (ret) {
> +		pr_err("add_sensor_to_zone failed:%d\n", ret);
> +		return ret;
> +	}
> +
> +	ret = add_sensor_to_zone(tz, ts1);
> +	pr_err("add_sensor (ts1) ret_val: %d\n", ret);
> +
> +	ret = add_cdev_to_zone(tz, cdev);
> +	pr_err("add_cdev_to_zone (cdev) ret_val: %d\n", ret);
> +
> +	ret = add_sensor_trip_info(tz, ts, &trip);
> +	ret = add_sensor_trip_info(tz, ts1, &trip1);
> +	pr_err("add_sensor_trip_info (ts) ret_val: %d\n", ret);
> +	return 0;
> +}
> +
> +static int thermal_test_remove(struct platform_device *pdev)
> +{
> +	device_remove_file(&pdev->dev, &dev_attr_test_temp);
> +	device_remove_file(&pdev->dev, &dev_attr_sensor_enable);
> +
> +	return 0;
> +}
> +
> +/*********************************************************************
> + *		Driver initialization and finalization
> + *********************************************************************/
> +
> +#define DRIVER_NAME "thermal_test"
> +
> +static const struct platform_device_id therm_id_table[] = {
> +	{ DRIVER_NAME, 1 },
> +};
> +
> +static struct platform_driver thermal_test_driver = {
> +	.driver = {
> +		.name = DRIVER_NAME,
> +		.owner = THIS_MODULE,
> +	},
> +	.probe = thermal_test_probe,
> +	.remove = __devexit_p(thermal_test_remove),
> +	.id_table = therm_id_table,
> +};
> +
> +static int __init thermal_test_init(void)
> +{
> +	int ret;
> +
> +	ret = platform_driver_register(&thermal_test_driver);
> +	if (ret) {
> +		pr_err("platform driver register failed:%d\n", ret);
> +		return ret;
> +	}
> +
> +	pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0);
> +	if (IS_ERR(pdev)) {
> +		ret = PTR_ERR(pdev);
> +		pr_err("platform device register failed:%d\n", ret);
> +		platform_driver_unregister(&thermal_test_driver);
> +	}
> +
> +	return ret;
> +}
> +
> +static void __exit thermal_test_exit(void)
> +{
> +	pr_err("in thermal_test_exit\n");
> +	platform_device_unregister(pdev);
> +	platform_driver_unregister(&thermal_test_driver);
> +}
> +
> +module_init(thermal_test_init);
> +module_exit(thermal_test_exit);
> +
> +MODULE_AUTHOR("Durgadoss R <durgadoss.r@intel.com>");
> +MODULE_DESCRIPTION("A dummy driver to test Thermal Framework");
> +MODULE_LICENSE("GPL");
> 


^ permalink raw reply

* Re: [PATCH 3/8] Thermal: Add APIs to bind cdev to new zone structure
From: Wei Ni @ 2012-12-25  8:30 UTC (permalink / raw)
  To: Durgadoss R
  Cc: rui.zhang@intel.com, linux-pm@vger.kernel.org,
	linux-kernel@vger.kernel.org, hongbo.zhang@linaro.org
In-Reply-To: <1355822977-4804-4-git-send-email-durgadoss.r@intel.com>

On 12/18/2012 05:29 PM, Durgadoss R wrote:
> This patch creates new APIs to add/remove a
> cdev to/from a zone. This patch does not change
> the old cooling device implementation.
> 
> Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
> ---
>  drivers/thermal/thermal_sys.c |   80 +++++++++++++++++++++++++++++++++++++++++
>  include/linux/thermal.h       |    8 +++++
>  2 files changed, 88 insertions(+)
> 
> diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
> index 06d5a12..b39bf97 100644
> --- a/drivers/thermal/thermal_sys.c
> +++ b/drivers/thermal/thermal_sys.c
> @@ -58,6 +58,7 @@ static LIST_HEAD(thermal_governor_list);
>  static DEFINE_MUTEX(thermal_list_lock);
>  static DEFINE_MUTEX(sensor_list_lock);
>  static DEFINE_MUTEX(zone_list_lock);
> +static DEFINE_MUTEX(cdev_list_lock);
>  static DEFINE_MUTEX(thermal_governor_lock);
>  
>  #define for_each_thermal_sensor(pos) \
> @@ -82,6 +83,9 @@ static DEFINE_MUTEX(thermal_governor_lock);
>  		mutex_unlock(&type##_list_lock);	\
>  	} while (0)
>  
> +#define for_each_cdev(pos) \
> +	list_for_each_entry(pos, &thermal_cdev_list, node)
> +
>  static struct thermal_governor *__find_governor(const char *name)
>  {
>  	struct thermal_governor *pos;
> @@ -462,6 +466,24 @@ static void remove_sensor_from_zone(struct thermal_zone *tz,
>  	tz->sensor_indx--;
>  }
>  
> +static void remove_cdev_from_zone(struct thermal_zone *tz,
> +				struct thermal_cooling_device *cdev)
> +{
> +	int j, indx;
> +
> +	GET_INDEX(tz, cdev, indx, cdev);
> +	if (indx < 0)
> +		return;
> +
> +	sysfs_remove_link(&tz->device.kobj, kobject_name(&cdev->device.kobj));
> +
> +	/* Shift the entries in the tz->cdevs array */
> +	for (j = indx; j < MAX_CDEVS_PER_ZONE - 1; j++)
> +		tz->cdevs[j] = tz->cdevs[j + 1];
> +
> +	tz->cdev_indx--;
> +}
> +
>  /* sys I/F for thermal zone */
>  
>  #define to_thermal_zone(_dev) \
> @@ -1458,6 +1480,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
>  	int i;
>  	const struct thermal_zone_params *tzp;
>  	struct thermal_zone_device *tz;
> +	struct thermal_zone *tmp_tz;
>  	struct thermal_cooling_device *pos = NULL;
>  
>  	if (!cdev)
> @@ -1495,6 +1518,13 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
>  
>  	mutex_unlock(&thermal_list_lock);
>  
> +	mutex_lock(&zone_list_lock);
> +
> +	for_each_thermal_zone(tmp_tz)
> +		remove_cdev_from_zone(tmp_tz, cdev);
> +
> +	mutex_unlock(&zone_list_lock);
> +
>  	if (cdev->type[0])
>  		device_remove_file(&cdev->device, &dev_attr_cdev_type);
>  	device_remove_file(&cdev->device, &dev_attr_max_state);
> @@ -1790,6 +1820,23 @@ exit:
>  }
>  EXPORT_SYMBOL(remove_thermal_zone);
>  
> +struct thermal_cooling_device *get_cdev_by_name(const char *name)
> +{
> +	struct thermal_cooling_device *pos;
> +	struct thermal_cooling_device *cdev = NULL;
> +
> +	mutex_lock(&cdev_list_lock);
> +	for_each_cdev(pos) {
> +		if (!strnicmp(pos->type, name, THERMAL_NAME_LENGTH)) {
> +			cdev = pos;
> +			break;
> +		}
> +	}
> +	mutex_unlock(&cdev_list_lock);
> +	return cdev;
> +}
> +EXPORT_SYMBOL(get_cdev_by_name);

It seems you forgot to add get_cdev_by_name() and get_sensor_by_name()
to the include file.

> +
>  struct thermal_sensor *get_sensor_by_name(const char *name)
>  {
>  	struct thermal_sensor *pos;
> @@ -1840,6 +1887,39 @@ exit_zone:
>  }
>  EXPORT_SYMBOL(add_sensor_to_zone);
>  
> +int add_cdev_to_zone(struct thermal_zone *tz,
> +			struct thermal_cooling_device *cdev)
> +{
> +	int ret;
> +
> +	if (!tz || !cdev)
> +		return -EINVAL;
> +
> +	mutex_lock(&zone_list_lock);
> +
> +	/* Ensure we are not adding the same cdev again!! */
> +	GET_INDEX(tz, cdev, ret, cdev);
> +	if (ret >= 0) {
> +		ret = -EEXIST;
> +		goto exit_zone;
> +	}
> +
> +	mutex_lock(&cdev_list_lock);
> +	ret = sysfs_create_link(&tz->device.kobj, &cdev->device.kobj,
> +				kobject_name(&cdev->device.kobj));
> +	if (ret)
> +		goto exit_cdev;
> +
> +	tz->cdevs[tz->cdev_indx++] = cdev;
> +
> +exit_cdev:
> +	mutex_unlock(&cdev_list_lock);
> +exit_zone:
> +	mutex_unlock(&zone_list_lock);
> +	return ret;
> +}
> +EXPORT_SYMBOL(add_cdev_to_zone);
> +
>  /**
>   * thermal_sensor_register - register a new thermal sensor
>   * @name:	name of the thermal sensor
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index f08f774..c4e45c7 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -51,6 +51,8 @@
>  
>  #define MAX_SENSORS_PER_ZONE		5
>  
> +#define MAX_CDEVS_PER_ZONE		5
> +
>  struct thermal_sensor;
>  struct thermal_zone_device;
>  struct thermal_cooling_device;
> @@ -209,6 +211,10 @@ struct thermal_zone {
>  	/* Sensor level information */
>  	int sensor_indx; /* index into 'sensors' array */
>  	struct thermal_sensor *sensors[MAX_SENSORS_PER_ZONE];
> +
> +	/* cdev level information */
> +	int cdev_indx; /* index into 'cdevs' array */
> +	struct thermal_cooling_device *cdevs[MAX_CDEVS_PER_ZONE];
>  };
>  
>  /* Structure that holds thermal governor information */
> @@ -287,6 +293,8 @@ struct thermal_zone *create_thermal_zone(const char *, void *);
>  void remove_thermal_zone(struct thermal_zone *);
>  int add_sensor_to_zone(struct thermal_zone *, struct thermal_sensor *);
>  
> +int add_cdev_to_zone(struct thermal_zone *, struct thermal_cooling_device *);
> +
>  #ifdef CONFIG_NET
>  extern int thermal_generate_netlink_event(u32 orig, enum events event);
>  #else
> 

^ permalink raw reply

* Re: [RFC PATCH v4 1/9] CPU hotplug: Provide APIs to prevent CPU offline from atomic context
From: Srivatsa S. Bhat @ 2012-12-24 15:50 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: tglx, peterz, paulmck, rusty, mingo, akpm, namhyung,
	vincent.guittot, tj, sbw, amit.kucheria, rostedt, rjw, wangyun,
	xiaoguangrong, nikunj, linux-pm, linux-kernel
In-Reply-To: <20121223164242.GA9979@redhat.com>

On 12/23/2012 10:12 PM, Oleg Nesterov wrote:
> On 12/23, Srivatsa S. Bhat wrote:
>>
>> On 12/20/2012 07:12 PM, Oleg Nesterov wrote:
>>>
>>> We need mb() + rmb(). Plust cli/sti unless this arch has optimized
>>> this_cpu_add() like x86 (as you pointed out).
>>>
>>
>> Hey, IIUC, we actually don't need mb() in the reader!! Just an rmb() will do.
> 
> Well. I don't think so. But when it comes to the barriers I am never sure
> until Paul confirms my understanding ;)
> 
>> #define reader_nested_percpu()						\
>> 	     (__this_cpu_read(reader_percpu_refcnt) & READER_REFCNT_MASK)
>>
>> #define writer_active()							\
>> 				(__this_cpu_read(writer_signal))
>>
>>
>> #define READER_PRESENT		(1UL << 16)
>> #define READER_REFCNT_MASK	(READER_PRESENT - 1)
>>
>> void get_online_cpus_atomic(void)
>> {
>> 	preempt_disable();
>>
>> 	/*
>> 	 * First and foremost, make your presence known to the writer.
>> 	 */
>> 	this_cpu_add(reader_percpu_refcnt, READER_PRESENT);
>>
>> 	/*
>> 	 * If we are already using per-cpu refcounts, it is not safe to switch
>> 	 * the synchronization scheme. So continue using the refcounts.
>> 	 */
>> 	if (reader_nested_percpu()) {
>> 		this_cpu_inc(reader_percpu_refcnt);
>> 	} else {
>> 		smp_rmb();
>> 		if (unlikely(writer_active())) {
>> 			... //take hotplug_rwlock
>> 		}
>> 	}
>>
>> 	...
>>
>> 	/* Prevent reordering of any subsequent reads of cpu_online_mask. */
>> 	smp_rmb();
>> }
>>
>> The smp_rmb() before writer_active() ensures that LOAD(writer_signal) follows
>> LOAD(reader_percpu_refcnt) (at the 'if' condition). And in turn, that load is
>> automatically going to follow the STORE(reader_percpu_refcnt)
> 
> But why this STORE should be visible on another CPU before we LOAD(writer_signal)?
> 
> Lets discuss the simple and artificial example. Suppose we have
> 
> 	int X, Y;
> 
> 	int func(void)
> 	{
> 		X = 1;	// suppose that nobody else can change it
> 		mb();
> 		return Y;
> 	}
> 
> Now you are saying that we can change it and avoid the costly mb():
> 
> 	int func(void)
> 	{
> 		X = 1;
> 
> 		if (X != 1)
> 			BUG();
> 	
> 		rmb();
> 		return Y;
> 	}
> 
> I doubt. rmb() can only guarantee that the preceding LOAD's should be
> completed. Without mb() it is possible that this CPU won't write X to
> memory at all.
> 

Oh, ok :-( Thanks for correcting me and for the detailed explanation!
For a moment, I really thought we had it solved at last! ;-(

Regards,
Srivatsa S. Bhat


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox