* [PATCH 4/8] Thermal: Add Thermal_trip sysfs node
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
In-Reply-To: <1355822977-4804-1-git-send-email-durgadoss.r@intel.com>
This patch adds a thermal_trip directory under
/sys/class/thermal/zoneX. This directory contains
the trip point values for sensors bound to this
zone.
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
drivers/thermal/thermal_sys.c | 237 ++++++++++++++++++++++++++++++++++++++++-
include/linux/thermal.h | 37 +++++++
2 files changed, 272 insertions(+), 2 deletions(-)
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index b39bf97..29ec073 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -448,6 +448,22 @@ static void thermal_zone_device_check(struct work_struct *work)
thermal_zone_device_update(tz);
}
+static int get_sensor_indx_by_kobj(struct thermal_zone *tz, const char *name)
+{
+ int i, indx = -EINVAL;
+
+ mutex_lock(&sensor_list_lock);
+ for (i = 0; i < tz->sensor_indx; i++) {
+ if (!strnicmp(name, kobject_name(tz->kobj_trip[i]),
+ THERMAL_NAME_LENGTH)) {
+ indx = i;
+ break;
+ }
+ }
+ mutex_unlock(&sensor_list_lock);
+ return indx;
+}
+
static void remove_sensor_from_zone(struct thermal_zone *tz,
struct thermal_sensor *ts)
{
@@ -459,9 +475,15 @@ static void remove_sensor_from_zone(struct thermal_zone *tz,
sysfs_remove_link(&tz->device.kobj, kobject_name(&ts->device.kobj));
+ /* Delete this sensor's trip Kobject */
+ kobject_del(tz->kobj_trip[indx]);
+
/* Shift the entries in the tz->sensors array */
- for (j = indx; j < MAX_SENSORS_PER_ZONE - 1; j++)
+ for (j = indx; j < MAX_SENSORS_PER_ZONE - 1; j++) {
tz->sensors[j] = tz->sensors[j + 1];
+ tz->sensor_trip[j] = tz->sensor_trip[j + 1];
+ tz->kobj_trip[j] = tz->kobj_trip[j + 1];
+ }
tz->sensor_indx--;
}
@@ -875,6 +897,120 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf)
return sprintf(buf, "%s\n", tz->governor->name);
}
+static ssize_t
+active_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ int i, indx, ret = 0;
+ struct thermal_zone *tz;
+ struct device *dev;
+
+ /* In this function, for
+ * /sys/class/thermal/zoneX/thermal_trip/sensorY:
+ * attr points to sysfs node 'active'
+ * kobj points to sensorY
+ * kobj->parent points to thermal_trip
+ * kobj->parent->parent points to zoneX
+ */
+
+ /* Get the zone pointer */
+ dev = container_of(kobj->parent->parent, struct device, kobj);
+ tz = to_zone(dev);
+ if (!tz)
+ return -EINVAL;
+
+ /*
+ * We need this because in the sysfs tree, 'sensorY' is
+ * not really the sensor pointer. It just has the name
+ * 'sensorY'; whereas 'zoneX' is actually the zone pointer.
+ * This means container_of(kobj, struct device, kobj) will not
+ * provide the actual sensor pointer.
+ */
+ indx = get_sensor_indx_by_kobj(tz, kobject_name(kobj));
+ if (indx < 0)
+ return indx;
+
+ if (tz->sensor_trip[indx]->num_active_trips <= 0)
+ return sprintf(buf, "<Not available>\n");
+
+ ret += sprintf(buf, "0x%x", tz->sensor_trip[indx]->active_trip_mask);
+ for (i = 0; i < tz->sensor_trip[indx]->num_active_trips; i++) {
+ ret += sprintf(buf + ret, " %d",
+ tz->sensor_trip[indx]->active_trips[i]);
+ }
+
+ ret += sprintf(buf + ret, "\n");
+ return ret;
+}
+
+static ssize_t
+ptrip_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ int i, indx, ret = 0;
+ struct thermal_zone *tz;
+ struct device *dev;
+
+ /* Get the zone pointer */
+ dev = container_of(kobj->parent->parent, struct device, kobj);
+ tz = to_zone(dev);
+ if (!tz)
+ return -EINVAL;
+
+ indx = get_sensor_indx_by_kobj(tz, kobject_name(kobj));
+ if (indx < 0)
+ return indx;
+
+ if (tz->sensor_trip[indx]->num_passive_trips <= 0)
+ return sprintf(buf, "<Not available>\n");
+
+ for (i = 0; i < tz->sensor_trip[indx]->num_passive_trips; i++) {
+ ret += sprintf(buf + ret, "%d ",
+ tz->sensor_trip[indx]->passive_trips[i]);
+ }
+
+ ret += sprintf(buf + ret, "\n");
+ return ret;
+}
+
+static ssize_t
+hot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ int indx;
+ struct thermal_zone *tz;
+ struct device *dev;
+
+ /* Get the zone pointer */
+ dev = container_of(kobj->parent->parent, struct device, kobj);
+ tz = to_zone(dev);
+ if (!tz)
+ return -EINVAL;
+
+ indx = get_sensor_indx_by_kobj(tz, kobject_name(kobj));
+ if (indx < 0)
+ return indx;
+
+ return sprintf(buf, "%d\n", tz->sensor_trip[indx]->hot);
+}
+
+static ssize_t
+critical_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ int indx;
+ struct thermal_zone *tz;
+ struct device *dev;
+
+ /* Get the zone pointer */
+ dev = container_of(kobj->parent->parent, struct device, kobj);
+ tz = to_zone(dev);
+ if (!tz)
+ return -EINVAL;
+
+ indx = get_sensor_indx_by_kobj(tz, kobject_name(kobj));
+ if (indx < 0)
+ return indx;
+
+ return sprintf(buf, "%d\n", tz->sensor_trip[indx]->crit);
+}
+
static DEVICE_ATTR(type, 0444, type_show, NULL);
static DEVICE_ATTR(temp, 0444, temp_show, NULL);
static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
@@ -885,7 +1021,27 @@ static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store);
static DEVICE_ATTR(sensor_name, 0444, sensor_name_show, NULL);
static DEVICE_ATTR(temp_input, 0444, sensor_temp_show, NULL);
-static DEVICE_ATTR(zone_name, 0444, zone_name_show, NULL);
+/* Thermal zone attributes */
+static DEVICE_ATTR(zone_name, S_IRUGO, zone_name_show, NULL);
+
+/* Thermal trip attributes */
+static struct kobj_attribute active_attr = __ATTR_RO(active);
+/* TODO: rename this to passive while removing old code */
+static struct kobj_attribute passive_attr = __ATTR_RO(ptrip);
+static struct kobj_attribute hot_attr = __ATTR_RO(hot);
+static struct kobj_attribute crit_attr = __ATTR_RO(critical);
+
+static struct attribute *trip_attrs[] = {
+ &active_attr.attr,
+ &passive_attr.attr,
+ &hot_attr.attr,
+ &crit_attr.attr,
+ NULL,
+ };
+
+static struct attribute_group trip_attr_group = {
+ .attrs = trip_attrs,
+ };
/* sys I/F for cooling device */
#define to_cooling_device(_dev) \
@@ -1770,12 +1926,19 @@ struct thermal_zone *create_thermal_zone(const char *name, void *devdata)
if (ret)
goto exit_unregister;
+ tz->kobj_thermal_trip = kobject_create_and_add("thermal_trip",
+ &tz->device.kobj);
+ if (!tz->kobj_thermal_trip)
+ goto exit_name;
+
/* Add this zone to the global list of thermal zones */
mutex_lock(&zone_list_lock);
list_add_tail(&tz->node, &thermal_zone_list);
mutex_unlock(&zone_list_lock);
return tz;
+exit_name:
+ device_remove_file(&tz->device, &dev_attr_zone_name);
exit_unregister:
device_unregister(&tz->device);
exit_idr:
@@ -1789,6 +1952,7 @@ EXPORT_SYMBOL(create_thermal_zone);
void remove_thermal_zone(struct thermal_zone *tz)
{
struct thermal_zone *pos, *next;
+ int i;
bool found = false;
if (!tz)
@@ -1809,6 +1973,33 @@ void remove_thermal_zone(struct thermal_zone *tz)
device_remove_file(&tz->device, &dev_attr_zone_name);
+ /* Just for ease of usage */
+ i = tz->sensor_indx;
+
+ while (--i >= 0) {
+ /* Remove /sys/class/thermal/zoneX/sensorY */
+ sysfs_remove_link(&tz->device.kobj,
+ kobject_name(&tz->sensors[i]->device.kobj));
+
+ /* Remove /sys/class/thermal/zoneX/thermal_trip/sensorY */
+ if (tz->kobj_trip[i]) {
+ sysfs_remove_group(tz->kobj_trip[i], &trip_attr_group);
+ kobject_del(tz->kobj_trip[i]);
+ }
+ }
+
+ /* Remove /sys/class/thermal/zoneX/thermal_trip */
+ kobject_del(tz->kobj_thermal_trip);
+
+ /* Release the cdevs attached to this zone */
+ i = tz->cdev_indx;
+
+ while (--i >= 0) {
+ /* Remove /sys/class/thermal/zoneX/cooling_deviceY */
+ sysfs_remove_link(&tz->device.kobj,
+ kobject_name(&tz->cdevs[i]->device.kobj));
+ }
+
release_idr(&thermal_zone_idr, &thermal_idr_lock, tz->id);
idr_destroy(&tz->idr);
@@ -1920,6 +2111,48 @@ exit_zone:
}
EXPORT_SYMBOL(add_cdev_to_zone);
+int add_sensor_trip_info(struct thermal_zone *tz, struct thermal_sensor *ts,
+ struct thermal_trip_point *trip)
+{
+ int indx, ret = -EINVAL;
+
+ if (!tz || !ts || !trip)
+ return ret;
+
+ mutex_lock(&zone_list_lock);
+
+ GET_INDEX(tz, ts, indx, sensor);
+ if (indx < 0)
+ goto exit_indx;
+
+ /* Create kobj for /sys/class/thermal/zoneX/thermal_trip/sensorY */
+ tz->kobj_trip[indx] = kobject_create_and_add(
+ kobject_name(&ts->device.kobj),
+ tz->kobj_thermal_trip);
+ if (!tz->kobj_trip[indx]) {
+ ret = -ENOMEM;
+ goto exit_indx;
+ }
+
+ ret = sysfs_create_group(tz->kobj_trip[indx], &trip_attr_group);
+ if (ret) {
+ dev_err(&tz->device, "sysfs_create_group failed:%d\n", ret);
+ goto exit_kobj;
+ }
+
+ tz->sensor_trip[indx] = trip;
+ mutex_unlock(&zone_list_lock);
+ return 0;
+
+exit_kobj:
+ kobject_del(tz->kobj_trip[indx]);
+ tz->kobj_trip[indx] = NULL;
+exit_indx:
+ mutex_unlock(&zone_list_lock);
+ return ret;
+}
+EXPORT_SYMBOL(add_sensor_trip_info);
+
/**
* thermal_sensor_register - register a new thermal sensor
* @name: name of the thermal sensor
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index c4e45c7..8372f05 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -158,6 +158,30 @@ struct thermal_attr {
char name[THERMAL_NAME_LENGTH];
};
+/*
+ * This structure defines the trip points for a sensor.
+ * The actual values for these trip points come from
+ * platform characterization. The thermal governors
+ * (either kernel or user space) may take appropriate
+ * actions when the sensors reach these trip points.
+ * See Documentation/thermal/sysfs-api2.txt for more details.
+ *
+ * As of now, For a particular sensor, we support:
+ * a) 1 hot trip point
+ * b) 1 critical trip point
+ * c) 'n' passive trip points
+ * d) 'm' active trip points
+ */
+struct thermal_trip_point {
+ int hot;
+ int crit;
+ int num_passive_trips;
+ int *passive_trips;
+ int num_active_trips;
+ int *active_trips;
+ int active_trip_mask;
+};
+
struct thermal_sensor {
char name[THERMAL_NAME_LENGTH];
int id;
@@ -215,6 +239,16 @@ struct thermal_zone {
/* cdev level information */
int cdev_indx; /* index into 'cdevs' array */
struct thermal_cooling_device *cdevs[MAX_CDEVS_PER_ZONE];
+
+ /*
+ * Thermal sensors trip information:
+ * kobj_thermal_trip: /sys/class/thermal/zoneX/thermal_trip
+ * kobj_trip: /sys/class/thermal/zoneX/thermal_trip/sensorY
+ * sensor_trip: trip point information for each sensor
+ */
+ struct kobject *kobj_thermal_trip;
+ struct kobject *kobj_trip[MAX_SENSORS_PER_ZONE];
+ struct thermal_trip_point *sensor_trip[MAX_SENSORS_PER_ZONE];
};
/* Structure that holds thermal governor information */
@@ -295,6 +329,9 @@ int add_sensor_to_zone(struct thermal_zone *, struct thermal_sensor *);
int add_cdev_to_zone(struct thermal_zone *, struct thermal_cooling_device *);
+int add_sensor_trip_info(struct thermal_zone *, struct thermal_sensor *,
+ struct thermal_trip_point *);
+
#ifdef CONFIG_NET
extern int thermal_generate_netlink_event(u32 orig, enum events event);
#else
--
1.7.9.5
^ permalink raw reply related
* [PATCH 1/8] Thermal: Create sensor level APIs
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
In-Reply-To: <1355822977-4804-1-git-send-email-durgadoss.r@intel.com>
This patch creates sensor level APIs, in the
generic thermal framework.
A Thermal sensor is a piece of hardware that can report
temperature of the spot in which it is placed. A thermal
sensor driver reads the temperature from this sensor
and reports it out. This kind of driver can be in
any subsystem. If the sensor needs to participate
in platform thermal management, the corresponding
driver can use the APIs introduced in this patch, to
register(or unregister) with the thermal framework.
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
drivers/thermal/thermal_sys.c | 280 +++++++++++++++++++++++++++++++++++++++++
include/linux/thermal.h | 29 +++++
2 files changed, 309 insertions(+)
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 8f0f37b..b2becb9 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -45,13 +45,16 @@ MODULE_LICENSE("GPL");
static DEFINE_IDR(thermal_tz_idr);
static DEFINE_IDR(thermal_cdev_idr);
+static DEFINE_IDR(thermal_sensor_idr);
static DEFINE_MUTEX(thermal_idr_lock);
static LIST_HEAD(thermal_tz_list);
+static LIST_HEAD(thermal_sensor_list);
static LIST_HEAD(thermal_cdev_list);
static LIST_HEAD(thermal_governor_list);
static DEFINE_MUTEX(thermal_list_lock);
+static DEFINE_MUTEX(sensor_list_lock);
static DEFINE_MUTEX(thermal_governor_lock);
static struct thermal_governor *__find_governor(const char *name)
@@ -421,6 +424,103 @@ static void thermal_zone_device_check(struct work_struct *work)
#define to_thermal_zone(_dev) \
container_of(_dev, struct thermal_zone_device, device)
+#define to_thermal_sensor(_dev) \
+ container_of(_dev, struct thermal_sensor, device)
+
+static ssize_t
+sensor_name_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct thermal_sensor *ts = to_thermal_sensor(dev);
+
+ return sprintf(buf, "%s\n", ts->name);
+}
+
+static ssize_t
+sensor_temp_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ int ret;
+ long val;
+ struct thermal_sensor *ts = to_thermal_sensor(dev);
+
+ ret = ts->ops->get_temp(ts, &val);
+
+ return ret ? ret : sprintf(buf, "%ld\n", val);
+}
+
+static ssize_t
+hyst_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ int indx, ret;
+ long val;
+ struct thermal_sensor *ts = to_thermal_sensor(dev);
+
+ if (!sscanf(attr->attr.name, "threshold%d_hyst", &indx))
+ return -EINVAL;
+
+ ret = ts->ops->get_hyst(ts, indx, &val);
+
+ return ret ? ret : sprintf(buf, "%ld\n", val);
+}
+
+static ssize_t
+hyst_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int indx, ret;
+ long val;
+ struct thermal_sensor *ts = to_thermal_sensor(dev);
+
+ if (!ts->ops->set_hyst)
+ return -EPERM;
+
+ if (!sscanf(attr->attr.name, "threshold%d_hyst", &indx))
+ return -EINVAL;
+
+ if (kstrtol(buf, 10, &val))
+ return -EINVAL;
+
+ ret = ts->ops->set_hyst(ts, indx, val);
+
+ return ret ? ret : count;
+}
+
+static ssize_t
+threshold_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ int indx, ret;
+ long val;
+ struct thermal_sensor *ts = to_thermal_sensor(dev);
+
+ if (!sscanf(attr->attr.name, "threshold%d", &indx))
+ return -EINVAL;
+
+ ret = ts->ops->get_threshold(ts, indx, &val);
+
+ return ret ? ret : sprintf(buf, "%ld\n", val);
+}
+
+static ssize_t
+threshold_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int indx, ret;
+ long val;
+ struct thermal_sensor *ts = to_thermal_sensor(dev);
+
+ if (!ts->ops->set_threshold)
+ return -EPERM;
+
+ if (!sscanf(attr->attr.name, "threshold%d", &indx))
+ return -EINVAL;
+
+ if (kstrtol(buf, 10, &val))
+ return -EINVAL;
+
+ ret = ts->ops->set_threshold(ts, indx, val);
+
+ return ret ? ret : count;
+}
+
static ssize_t
type_show(struct device *dev, struct device_attribute *attr, char *buf)
{
@@ -705,6 +805,10 @@ static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show, passive_store);
static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store);
+/* Thermal sensor attributes */
+static DEVICE_ATTR(sensor_name, 0444, sensor_name_show, NULL);
+static DEVICE_ATTR(temp_input, 0444, sensor_temp_show, NULL);
+
/* sys I/F for cooling device */
#define to_cooling_device(_dev) \
container_of(_dev, struct thermal_cooling_device, device)
@@ -1491,6 +1595,182 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
}
/**
+ * enable_sensor_thresholds - create sysfs nodes for thresholdX
+ * @ts: the thermal sensor
+ * @count: Number of thresholds supported by sensor hardware
+ *
+ * 'Thresholds' are temperatures programmed into the sensor hardware,
+ * on crossing which the sensor generates an interrupt. 'Trip points'
+ * are temperatures which the thermal manager/governor thinks, some
+ * action should be taken when the sensor reaches the value.
+ */
+static int enable_sensor_thresholds(struct thermal_sensor *ts, int count)
+{
+ int i;
+ int size = sizeof(struct thermal_attr) * count;
+
+ ts->thresh_attrs = kzalloc(size, GFP_KERNEL);
+ if (!ts->thresh_attrs)
+ return -ENOMEM;
+
+ if (ts->ops->get_hyst) {
+ ts->hyst_attrs = kzalloc(size, GFP_KERNEL);
+ if (!ts->hyst_attrs) {
+ kfree(ts->thresh_attrs);
+ return -ENOMEM;
+ }
+ }
+
+ ts->thresholds = count;
+
+ /* Create threshold attributes */
+ for (i = 0; i < count; i++) {
+ snprintf(ts->thresh_attrs[i].name, THERMAL_NAME_LENGTH,
+ "threshold%d", i);
+
+ sysfs_attr_init(&ts->thresh_attrs[i].attr.attr);
+ ts->thresh_attrs[i].attr.attr.name = ts->thresh_attrs[i].name;
+ ts->thresh_attrs[i].attr.attr.mode = S_IRUGO | S_IWUSR;
+ ts->thresh_attrs[i].attr.show = threshold_show;
+ ts->thresh_attrs[i].attr.store = threshold_store;
+
+ device_create_file(&ts->device, &ts->thresh_attrs[i].attr);
+
+ /* Create threshold_hyst attributes */
+ if (!ts->ops->get_hyst)
+ continue;
+
+ snprintf(ts->hyst_attrs[i].name, THERMAL_NAME_LENGTH,
+ "threshold%d_hyst", i);
+
+ sysfs_attr_init(&ts->hyst_attrs[i].attr.attr);
+ ts->hyst_attrs[i].attr.attr.name = ts->hyst_attrs[i].name;
+ ts->hyst_attrs[i].attr.attr.mode = S_IRUGO | S_IWUSR;
+ ts->hyst_attrs[i].attr.show = hyst_show;
+ ts->hyst_attrs[i].attr.store = hyst_store;
+
+ device_create_file(&ts->device, &ts->hyst_attrs[i].attr);
+ }
+ return 0;
+}
+
+/**
+ * thermal_sensor_register - register a new thermal sensor
+ * @name: name of the thermal sensor
+ * @count: Number of thresholds supported by hardware
+ * @ops: standard thermal sensor callbacks
+ * @devdata: private device data
+ */
+struct thermal_sensor *thermal_sensor_register(const char *name, int count,
+ struct thermal_sensor_ops *ops, void *devdata)
+{
+ struct thermal_sensor *ts;
+ int ret;
+
+ if (!name || (name && strlen(name) >= THERMAL_NAME_LENGTH))
+ return ERR_PTR(-EINVAL);
+
+ if (!ops || !ops->get_temp)
+ return ERR_PTR(-EINVAL);
+
+ ts = kzalloc(sizeof(*ts), GFP_KERNEL);
+ if (!ts)
+ return ERR_PTR(-ENOMEM);
+
+ idr_init(&ts->idr);
+ ret = get_idr(&thermal_sensor_idr, &thermal_idr_lock, &ts->id);
+ if (ret)
+ goto exit_free;
+
+ strcpy(ts->name, name);
+ ts->ops = ops;
+ ts->devdata = devdata;
+ ts->device.class = &thermal_class;
+
+ dev_set_name(&ts->device, "sensor%d", ts->id);
+ ret = device_register(&ts->device);
+ if (ret)
+ goto exit_idr;
+
+ ret = device_create_file(&ts->device, &dev_attr_sensor_name);
+ if (ret)
+ goto exit_unregister;
+
+ ret = device_create_file(&ts->device, &dev_attr_temp_input);
+ if (ret)
+ goto exit_name;
+
+ if (count > 0 && ts->ops->get_threshold) {
+ ret = enable_sensor_thresholds(ts, count);
+ if (ret)
+ goto exit_temp;
+ }
+
+ /* Add this sensor to the global list of sensors */
+ mutex_lock(&sensor_list_lock);
+ list_add_tail(&ts->node, &thermal_sensor_list);
+ mutex_unlock(&sensor_list_lock);
+
+ return ts;
+
+exit_temp:
+ device_remove_file(&ts->device, &dev_attr_temp_input);
+exit_name:
+ device_remove_file(&ts->device, &dev_attr_sensor_name);
+exit_unregister:
+ device_unregister(&ts->device);
+exit_idr:
+ release_idr(&thermal_sensor_idr, &thermal_idr_lock, ts->id);
+exit_free:
+ kfree(ts);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(thermal_sensor_register);
+
+void thermal_sensor_unregister(struct thermal_sensor *ts)
+{
+ int i;
+ struct thermal_sensor *pos, *next;
+ bool found = false;
+
+ if (!ts)
+ return;
+
+ mutex_lock(&sensor_list_lock);
+ list_for_each_entry_safe(pos, next, &thermal_sensor_list, node) {
+ if (pos == ts) {
+ list_del(&ts->node);
+ found = true;
+ break;
+ }
+ }
+ mutex_unlock(&sensor_list_lock);
+
+ if (!found)
+ return;
+
+ for (i = 0; i < ts->thresholds; i++) {
+ device_remove_file(&ts->device, &ts->thresh_attrs[i].attr);
+ if (ts->ops->get_hyst) {
+ device_remove_file(&ts->device,
+ &ts->hyst_attrs[i].attr);
+ }
+ }
+
+ device_remove_file(&ts->device, &dev_attr_sensor_name);
+ device_remove_file(&ts->device, &dev_attr_temp_input);
+
+ release_idr(&thermal_sensor_idr, &thermal_idr_lock, ts->id);
+ idr_destroy(&ts->idr);
+
+ device_unregister(&ts->device);
+
+ kfree(ts);
+ return;
+}
+EXPORT_SYMBOL(thermal_sensor_unregister);
+
+/**
* thermal_zone_device_register - register a new thermal zone device
* @type: the thermal zone device type
* @trips: the number of trip points the thermal zone support
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 807f214..a49cb38 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -49,6 +49,7 @@
/* Default Thermal Governor: Does Linear Throttling */
#define DEFAULT_THERMAL_GOVERNOR "step_wise"
+struct thermal_sensor;
struct thermal_zone_device;
struct thermal_cooling_device;
@@ -127,6 +128,15 @@ struct thermal_cooling_device_ops {
int (*set_cur_state) (struct thermal_cooling_device *, unsigned long);
};
+struct thermal_sensor_ops {
+ int (*get_temp) (struct thermal_sensor *, long *);
+ int (*get_trend) (struct thermal_sensor *, int, enum thermal_trend *);
+ int (*set_threshold) (struct thermal_sensor *, int, long);
+ int (*get_threshold) (struct thermal_sensor *, int, long *);
+ int (*set_hyst) (struct thermal_sensor *, int, long);
+ int (*get_hyst) (struct thermal_sensor *, int, long *);
+};
+
struct thermal_cooling_device {
int id;
char type[THERMAL_NAME_LENGTH];
@@ -144,6 +154,21 @@ struct thermal_attr {
char name[THERMAL_NAME_LENGTH];
};
+struct thermal_sensor {
+ char name[THERMAL_NAME_LENGTH];
+ int id;
+ int temp;
+ int prev_temp;
+ int thresholds;
+ void *devdata;
+ struct idr idr;
+ struct device device;
+ struct list_head node;
+ struct thermal_sensor_ops *ops;
+ struct thermal_attr *thresh_attrs;
+ struct thermal_attr *hyst_attrs;
+};
+
struct thermal_zone_device {
int id;
char type[THERMAL_NAME_LENGTH];
@@ -237,6 +262,10 @@ void notify_thermal_framework(struct thermal_zone_device *, int);
int thermal_register_governor(struct thermal_governor *);
void thermal_unregister_governor(struct thermal_governor *);
+struct thermal_sensor *thermal_sensor_register(const char *, int,
+ struct thermal_sensor_ops *, void *);
+void thermal_sensor_unregister(struct thermal_sensor *);
+
#ifdef CONFIG_NET
extern int thermal_generate_netlink_event(u32 orig, enum events event);
#else
--
1.7.9.5
^ permalink raw reply related
* [PATCH 0/8] Thermal Framework Enhancements
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
This patch is a v1 based on the RFC submitted here:
https://patchwork.kernel.org/patch/1758921/
This patch set is based on Rui's -thermal tree, and is
tested on a Core-i5 and an Atom netbook.
This series contains 8 patches:
Patch 1/8: Creates new sensor level APIs
Patch 2/8: Creates new zone level APIs. The existing tzd structure is
kept as such for clarity and compatibility purposes.
Patch 3/8: Creates functions to add/remove a cdev to/from a zone. The
existing tcd structure need not be modified.
Patch 4/8: Adds a thermal_trip sysfs node, which exposes various trip
points for all sensors present in a zone.
Patch 5/8: Adds a thermal_map sysfs node. It is a compact representation
of the binding relationship between a sensor and a cdev,
within a zone.
Patch 6/8: Creates Documentation for the new APIs. A new file is
created for clarity. Final goal is to merge with the existing
file or refactor the files, as whatever seems appropriate.
Patch 7/8: Make PER ZONE values configurable through Kconfig
Patch 8/8: A dummy driver that can be used for testing. This is not for merge.
Thanks to Rui Zhang, Honghbo Zhang, Wei Ni for their feedback on the
RFC version.
Durgadoss R (8):
Thermal: Create sensor level APIs
Thermal: Create zone level APIs
Thermal: Add APIs to bind cdev to new zone structure
Thermal: Add Thermal_trip sysfs node
Thermal: Add 'thermal_map' sysfs node
Thermal: Add Documentation to new APIs
Thermal: Make PER_ZONE values configurable
Thermal: Dummy driver used for testing
Documentation/thermal/sysfs-api2.txt | 248 +++++++++
drivers/thermal/Kconfig | 19 +
drivers/thermal/Makefile | 3 +
drivers/thermal/thermal_sys.c | 932 ++++++++++++++++++++++++++++++++++
drivers/thermal/thermal_test.c | 315 ++++++++++++
include/linux/thermal.h | 124 +++++
6 files changed, 1641 insertions(+)
create mode 100644 Documentation/thermal/sysfs-api2.txt
create mode 100644 drivers/thermal/thermal_test.c
--
1.7.9.5
^ permalink raw reply
* [PATCH 8/8] Thermal: Dummy driver used for testing
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
In-Reply-To: <1355822977-4804-1-git-send-email-durgadoss.r@intel.com>
This patch has a dummy driver that can be used for
testing purposes. This patch is not for merge.
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
drivers/thermal/Kconfig | 5 +
drivers/thermal/Makefile | 3 +
drivers/thermal/thermal_test.c | 315 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 323 insertions(+)
create mode 100644 drivers/thermal/thermal_test.c
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index c5ba3340..3b92a76 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -136,4 +136,9 @@ config DB8500_CPUFREQ_COOLING
bound cpufreq cooling device turns active to set CPU frequency low to
cool down the CPU.
+config THERMAL_TEST
+ tristate "test driver"
+ help
+ Enable this to test the thermal framework.
+
endif
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index d8da683..02c3edb 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -18,3 +18,6 @@ obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o
obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o
obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o
obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o
+
+# dummy driver for testing
+obj-$(CONFIG_THERMAL_TEST) += thermal_test.o
diff --git a/drivers/thermal/thermal_test.c b/drivers/thermal/thermal_test.c
new file mode 100644
index 0000000..5a11e34
--- /dev/null
+++ b/drivers/thermal/thermal_test.c
@@ -0,0 +1,315 @@
+/*
+ * thermal_test.c - This driver can be used to test Thermal
+ * Framework changes. Not specific to any
+ * platform. Fills the log buffer generously ;)
+ *
+ * Copyright (C) 2012 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Author: Durgadoss R <durgadoss.r@intel.com>
+ */
+
+#define pr_fmt(fmt) "thermal_test: " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/param.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define MAX_THERMAL_ZONES 2
+#define MAX_THERMAL_SENSORS 2
+#define MAX_COOLING_DEVS 4
+#define NUM_THRESHOLDS 3
+
+static struct ts_data {
+ int curr_temp;
+ int flag;
+} ts_data;
+
+int active_trips[10] = {100, 90, 80, 70, 60, 50, 40, 30, 20, 10};
+int passive_trips[5] = {100, 90, 60, 50, 40};
+
+static struct platform_device *pdev;
+static unsigned long cur_cdev_state = 2;
+static struct thermal_sensor *ts, *ts1;
+static struct thermal_zone *tz;
+static struct thermal_cooling_device *cdev;
+
+static long thermal_thresholds[NUM_THRESHOLDS] = {30000, 40000, 50000};
+
+static struct thermal_trip_point trip = {
+ .hot = 90,
+ .crit = 100,
+ .num_passive_trips = 5,
+ .passive_trips = passive_trips,
+ .num_active_trips = 10,
+ .active_trips = active_trips,
+ .active_trip_mask = 0xCFF,
+};
+
+static struct thermal_trip_point trip1 = {
+ .hot = 95,
+ .crit = 125,
+ .num_passive_trips = 0,
+ .passive_trips = passive_trips,
+ .num_active_trips = 6,
+ .active_trips = active_trips,
+ .active_trip_mask = 0xFF,
+};
+
+static int read_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ *state = cur_cdev_state;
+ return 0;
+}
+
+static int write_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long state)
+{
+ cur_cdev_state = state;
+ return 0;
+}
+
+static int read_max_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ *state = 5;
+ return 0;
+}
+
+static int read_curr_temp(struct thermal_sensor *ts, long *temp)
+{
+ *temp = ts_data.curr_temp;
+ return 0;
+}
+
+static ssize_t
+flag_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+ return sprintf(buf, "%d\n", ts_data.flag);
+}
+
+static ssize_t
+flag_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ long flag;
+
+ if (kstrtol(buf, 10, &flag))
+ return -EINVAL;
+
+ ts_data.flag = flag;
+
+ if (flag == 0) {
+ thermal_sensor_unregister(ts);
+ ts = NULL;
+ pr_err("thermal_sensor_unregister (ts) done\n");
+ } else if (flag == 1) {
+ thermal_sensor_unregister(ts1);
+ ts1 = NULL;
+ pr_err("thermal_sensor_unregister (ts1) done\n");
+ } else if (flag == 2) {
+ thermal_cooling_device_unregister(cdev);
+ cdev = NULL;
+ pr_err("cdev unregister (cdev) done\n");
+ } else if (flag == 3) {
+ if (tz)
+ remove_thermal_zone(tz);
+ tz = NULL;
+ pr_err("removed thermal zone\n");
+ }
+
+ return count;
+}
+
+static ssize_t
+temp_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+ return sprintf(buf, "%d\n", ts_data.curr_temp);
+}
+
+static int read_threshold(struct thermal_sensor *ts, int indx, long *val)
+{
+ if (indx < 0 || indx >= NUM_THRESHOLDS)
+ return -EINVAL;
+
+ *val = thermal_thresholds[indx];
+ return 0;
+}
+
+static int write_threshold(struct thermal_sensor *ts, int indx, long val)
+{
+ if (indx < 0 || indx >= NUM_THRESHOLDS)
+ return -EINVAL;
+
+ thermal_thresholds[indx] = val;
+ return 0;
+}
+
+static ssize_t
+temp_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ long temp;
+
+ if (kstrtol(buf, 10, &temp))
+ return -EINVAL;
+
+ ts_data.curr_temp = temp;
+ return count;
+}
+
+static struct thermal_sensor_ops ts_ops = {
+ .get_temp = read_curr_temp,
+ .get_threshold = read_threshold,
+ .set_threshold = write_threshold,
+};
+
+static struct thermal_sensor_ops ts1_ops = {
+ .get_temp = read_curr_temp,
+ .get_threshold = read_threshold,
+ .set_threshold = write_threshold,
+};
+
+static struct thermal_cooling_device_ops cdev_ops = {
+ .get_cur_state = read_cur_state,
+ .set_cur_state = write_cur_state,
+ .get_max_state = read_max_state,
+};
+
+static DEVICE_ATTR(test_temp, S_IRUGO | S_IWUSR, temp_show, temp_store);
+static DEVICE_ATTR(sensor_enable, S_IRUGO | S_IWUSR, flag_show, flag_store);
+
+static int thermal_test_probe(struct platform_device *pdev)
+{
+ int ret;
+
+ ts_data.curr_temp = 30000;
+ ts_data.flag = 1;
+
+ ts = thermal_sensor_register("ts", NUM_THRESHOLDS, &ts_ops, &ts_data);
+ if (!ts) {
+ pr_err("thermal_sensor_register failed:\n");
+ return -EINVAL;
+ }
+
+ ts1 = thermal_sensor_register("ts1", NUM_THRESHOLDS, &ts1_ops, NULL);
+
+ cdev = thermal_cooling_device_register("cdev", NULL, &cdev_ops);
+ if (!cdev) {
+ pr_err("cdev_register failed:\n");
+ return -EINVAL;
+ }
+
+ device_create_file(&pdev->dev, &dev_attr_test_temp);
+ device_create_file(&pdev->dev, &dev_attr_sensor_enable);
+
+ /* Create a zone */
+ tz = create_thermal_zone("myZone", NULL);
+ if (!tz) {
+ pr_err("create_thermal_zone failed:\n");
+ return -EINVAL;
+ }
+
+ pr_err("Zone created successfully..\n");
+
+ ret = add_sensor_to_zone(tz, ts);
+ if (ret) {
+ pr_err("add_sensor_to_zone failed:%d\n", ret);
+ return ret;
+ }
+
+ ret = add_sensor_to_zone(tz, ts1);
+ pr_err("add_sensor (ts1) ret_val: %d\n", ret);
+
+ ret = add_cdev_to_zone(tz, cdev);
+ pr_err("add_cdev_to_zone (cdev) ret_val: %d\n", ret);
+
+ ret = add_sensor_trip_info(tz, ts, &trip);
+ ret = add_sensor_trip_info(tz, ts1, &trip1);
+ pr_err("add_sensor_trip_info (ts) ret_val: %d\n", ret);
+ return 0;
+}
+
+static int thermal_test_remove(struct platform_device *pdev)
+{
+ device_remove_file(&pdev->dev, &dev_attr_test_temp);
+ device_remove_file(&pdev->dev, &dev_attr_sensor_enable);
+
+ return 0;
+}
+
+/*********************************************************************
+ * Driver initialization and finalization
+ *********************************************************************/
+
+#define DRIVER_NAME "thermal_test"
+
+static const struct platform_device_id therm_id_table[] = {
+ { DRIVER_NAME, 1 },
+};
+
+static struct platform_driver thermal_test_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .owner = THIS_MODULE,
+ },
+ .probe = thermal_test_probe,
+ .remove = __devexit_p(thermal_test_remove),
+ .id_table = therm_id_table,
+};
+
+static int __init thermal_test_init(void)
+{
+ int ret;
+
+ ret = platform_driver_register(&thermal_test_driver);
+ if (ret) {
+ pr_err("platform driver register failed:%d\n", ret);
+ return ret;
+ }
+
+ pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ ret = PTR_ERR(pdev);
+ pr_err("platform device register failed:%d\n", ret);
+ platform_driver_unregister(&thermal_test_driver);
+ }
+
+ return ret;
+}
+
+static void __exit thermal_test_exit(void)
+{
+ pr_err("in thermal_test_exit\n");
+ platform_device_unregister(pdev);
+ platform_driver_unregister(&thermal_test_driver);
+}
+
+module_init(thermal_test_init);
+module_exit(thermal_test_exit);
+
+MODULE_AUTHOR("Durgadoss R <durgadoss.r@intel.com>");
+MODULE_DESCRIPTION("A dummy driver to test Thermal Framework");
+MODULE_LICENSE("GPL");
--
1.7.9.5
^ permalink raw reply related
* [PATCH 7/8] Thermal: Make PER_ZONE values configurable
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
In-Reply-To: <1355822977-4804-1-git-send-email-durgadoss.r@intel.com>
This patch makes MAX_SENSORS_PER_ZONE and
MAX_CDEVS_PER_ZONE values configurable. The
default value is 1, and range is 1-12.
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
No great reason for using 12.
---
drivers/thermal/Kconfig | 14 ++++++++++++++
include/linux/thermal.h | 6 +++---
2 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index d96da07..c5ba3340 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -15,6 +15,20 @@ menuconfig THERMAL
if THERMAL
+config THERMAL_MAX_SENSORS_PER_ZONE
+ int "Maximum number of sensors allowed per thermal zone"
+ default 1
+ range 1 12
+ ---help---
+ Specify the number of sensors allowed per zone
+
+config THERMAL_MAX_CDEVS_PER_ZONE
+ int "Maximum number of cooling devices allowed per thermal zone"
+ default 1
+ range 1 12
+ ---help---
+ Specify the number of cooling devices allowed per zone
+
config THERMAL_HWMON
bool
depends on HWMON=y || HWMON=THERMAL
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 581dc87..7b0359b 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -49,9 +49,9 @@
/* Default Thermal Governor: Does Linear Throttling */
#define DEFAULT_THERMAL_GOVERNOR "step_wise"
-#define MAX_SENSORS_PER_ZONE 5
-
-#define MAX_CDEVS_PER_ZONE 5
+/* Maximum number of sensors/cdevs per zone, defined through Kconfig */
+#define MAX_SENSORS_PER_ZONE CONFIG_THERMAL_MAX_SENSORS_PER_ZONE
+#define MAX_CDEVS_PER_ZONE CONFIG_THERMAL_MAX_CDEVS_PER_ZONE
/* If we map each sensor with every possible cdev for a zone */
#define MAX_MAPS_PER_ZONE (MAX_SENSORS_PER_ZONE * MAX_CDEVS_PER_ZONE)
--
1.7.9.5
^ permalink raw reply related
* [PATCH 6/8] Thermal: Add Documentation to new APIs
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
In-Reply-To: <1355822977-4804-1-git-send-email-durgadoss.r@intel.com>
This patch adds Documentation for the new APIs
introduced in this patch set. The documentation
also has a model sysfs structure for reference.
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
Documentation/thermal/sysfs-api2.txt | 248 ++++++++++++++++++++++++++++++++++
1 file changed, 248 insertions(+)
create mode 100644 Documentation/thermal/sysfs-api2.txt
diff --git a/Documentation/thermal/sysfs-api2.txt b/Documentation/thermal/sysfs-api2.txt
new file mode 100644
index 0000000..ffd0402
--- /dev/null
+++ b/Documentation/thermal/sysfs-api2.txt
@@ -0,0 +1,248 @@
+Thermal Framework
+-----------------
+
+Written by Durgadoss R <durgadoss.r@intel.com>
+Copyright (c) 2012 Intel Corporation
+
+Created on: 4 November 2012
+Updated on: 18 December 2012
+
+0. Introduction
+---------------
+The Linux thermal framework provides a set of interfaces for thermal
+sensors and thermal cooling devices (fan, processor...) to register
+with the thermal management solution and to be a part of it.
+
+This document focuses on how to enable new thermal sensors and cooling
+devices to participate in thermal management. This solution is intended
+to be 'light-weight' and platform/architecture independent. Any thermal
+sensor/cooling device should be able to use the infrastructure easily.
+
+The goal of thermal framework is to expose the thermal sensor/zone and
+cooling device attributes in a consistent way. This will help the
+thermal governors to make use of the information to manage platform
+thermals efficiently.
+
+The thermal sensor source file can be generic (can be any sensor driver,
+in any subsystem). This driver will use the sensor APIs and register with
+thermal framework to participate in platform Thermal management. This
+does not (and should not) know about which zone it belongs to, or any
+other information about platform thermals. A sensor driver is a standalone
+piece of code, which can optionally register with thermal framework.
+
+However, for any platform, there should be a platformX_thermal.c file,
+which will know about the platform thermal characteristics (like how many
+sensors, zones, cooling devices, etc.. And how they are related to each other
+i.e the mapping information). Only in this file, the zone level APIs should
+be used, in which case the file will have all information required to attach
+various sensors to a particular zone.
+
+This way, we can have one platform level thermal file, which can support
+multiple platforms (may be)using the same set of sensors (but)binded in
+a different way. This file can get the platform thermal information
+through Firmware, ACPI tables, device tree etc.
+
+Unfortunately, today we don't have many drivers that can be clearly
+differentiated as 'sensor_file.c' and 'platform_thermal_file.c'.
+But very soon we will need/have. The reason I am saying this is because
+we are seeing a lot of chip drivers, starting to use thermal framework,
+and we should keep it really light-weight for them to do so.
+
+An Example: drivers/hwmon/emc1403.c - a generic thermal chip driver
+In one platform this sensor can belong to 'ZoneA' and in another the
+same can belong to 'ZoneB'. But, emc1403.c does not really care about
+where does it belong. It just reports temperature.
+
+1. Terminology
+--------------
+This section describes the terminology used in the rest of this
+document as well as the thermal framework code.
+
+thermal_sensor: Hardware that can report temperature of a particular
+ spot in the platform, where it is placed. The temperature
+ reported by the sensor is the 'real' temperature reported
+ by the hardware.
+thermal_zone: A virtual area on the device, that gets heated up. It may
+ have one or more thermal sensors attached to it.
+cooling_device: Any component that can help in reducing the temperature of
+ a 'hot spot' either by reducing its performance (passive
+ cooling) or by other means(Active cooling E.g. Fan)
+
+trip_points: Various temperature levels for each sensor. As of now, we
+ have four levels namely active, passive, hot and critical.
+ Hot and critical trip point support only one value whereas
+ active and passive can have any number of values. These
+ temperature values can come from platform data, and are
+ exposed through sysfs in a consistent manner. Stand-alone
+ thermal sensor drivers are not expected to know these values.
+ These values are RO.
+thresholds: These are programmable temperature limits, on reaching which
+ the thermal sensor generates an interrupt. The framework is
+ notified about this interrupt to take appropriate action.
+ There can be as many number of thresholds as that of the
+ hardware supports. These values are RW.
+
+thermal_map: This provides the mapping (aka binding) information between
+ various sensors and cooling devices in a particular zone.
+ Typically, this also comes from platform data; Stand-alone
+ sensor drivers or cooling device drivers are not expected
+ to know these mapping information.
+
+2. Thermal framework APIs
+-------------------------
+2.1: For Thermal Sensors
+2.1.1 thermal_sensor_register:
+ This function creates a new sensor directory under /sys/class/thermal/
+ as sensor[0-*]. This API is expected to be called by thermal sensor
+ drivers. These drivers may or may not be in thermal subsystem. This
+ function returns a thermal_sensor structure on success and appropriate
+ error on failure.
+
+ name: Name of the sensor
+ count: Number of programmable thresholds associated with this sensor
+ devdata: Device private data
+ ops: Thermal sensor callbacks
+ .get_temp: obtain the current temperature of the sensor
+ .get_trend: obtain the trend of the sensor
+ .get_threshold: get a particular threshold temperature
+ .set_threshold: set a particular threshold temperature
+ .get_hyst: get hysteresis value associated with a threshold
+ .set_hyst: set hysteresis value associated with a threshold
+
+2.1.2 thermal_sensor_unregister:
+ This function deletes the sensor directory under /sys/class/thermal/
+ for the given sensor. Thermal sensor drivers may call this API
+ during the driver's 'exit' routine.
+
+ ts: Thermal sensor that has to be unregistered
+
+2.1.3 enable_sensor_thresholds:
+ This function creates 'threshold[0-*]' attributes under a particular
+ sensorX directory. These values are RW. This function is called by
+ the sensr driver only if the sensor supports interrupt mechanism.
+
+ ts: Thermal sensor for which thresholds have to be enabled
+ num_thresholds: Number of thresholds supported by the sensor
+
+2.2: For Cooling Devices
+2.2.1 thermal_cooling_device_register:
+ This function adds a new thermal cooling device (fan/processor/...)
+ to /sys/class/thermal/ folder as cooling_device[0-*]. This function
+ is expected to be called by cooling device drivers that may be
+ present in other subsystems also.
+
+ name: the cooling device name
+ devdata: device private data
+ ops: thermal cooling devices callbacks
+ .get_max_state: get the Maximum throttle state of the cooling device
+ .get_cur_state: get the Current throttle state of the cooling device
+ .set_cur_state: set the Current throttle state of the cooling device
+
+2.2.2 thermal_cooling_device_unregister:
+ This function deletes the given cdev entry form /sys/class/thermal;
+ and also cleans all the symlinks referred from various zones.
+
+ cdev: Cooling device to be unregistered
+
+2.3: For Thermal Zones
+2.3.1 create_thermal_zone:
+ This function adds a new 'zone' under /sys/class/thermal/
+ directory as zone[0-*]. This zone has at least one thermal
+ sensor and at most MAX_SENSORS_PER_ZONE number of sensors
+ attached to it. Similarly, this zone has at least one cdev
+ and at most MAX_CDEVS_PER_ZONE number of cdevs attached to it.
+ Both the MAX_*_PER_ZONE values are configurable, through
+ Kconfig option(during 'menuconfig').
+
+ name: Name of the thermal zone
+ devdata: Device private data
+
+2.3.2 add_sensor_to_zone
+ This function adds a 'sensorX' entry under /sys/class/thermal/
+ zoneY/ directory. This 'sensorX' is a symlink to the actual
+ sensor entry under /sys/class/thermal/. Correspondingly, the
+ method remove_sensor_from_zone deletes the symlink.
+
+ tz: thermal zone structure
+ ts: thermal sensor structure
+
+2.3.3 add_cdev_to_zone
+ This function adds a 'cdevX' entry under /sys/class/thermal/
+ zoneY/ directory. This 'cdevX' is a symlink to the actual
+ cdev entry under /sys/class/thermal/. Correspondingly, the
+ method remove_cdev_from_zone deletes the symlink.
+
+ tz: thermal zone structure
+ cdev: thermal cooling device structure
+
+2.4 For Thermal Trip
+2.4.1 add_sensor_trip_info
+ This function adds trip point information for the given sensor,
+ (under a given zone) under /sys/class/thermal/zoneX/thermal_trip/
+ This API creates 4 sysfs attributes namely active, passive, hot,
+ and critical. Each of these hold one or more trip point temperature
+ values, as provided from platform data.
+
+ tz: thermal zone structure
+ ts: thermal sensor to which the trip points are attached
+ trip: trip point structure. Usually obtained from platform data
+
+2.5 For Thermal Map
+2.5.1 add_map_entry
+ This function adds a 'map[0-*]' sysfs attribute under
+ /sys/class/thermal/zoneX/thermal_map/. Each map holds a space
+ separated list of values, that specify the binding relationship
+ between a sensor and a cdev in the given zone. The map structure
+ is typically obtained as platform data. For example, through
+ ACPI tables, SFI tables, Device tree etc.
+
+ tz: thermal zone to which a 'map' is being added
+ map: thermal_map structure
+
+3. Sysfs attributes structure
+-----------------------------
+Thermal sysfs attributes will be represented under /sys/class/thermal.
+
+3.1: For Thermal Sensors
+ /sys/class/thermal/sensor[0-*]:
+ |---type: Name of the thermal sensor
+ |---temp_input: Current temperature in mC
+ |---threshold[0-*]: Threshold temperature in mC
+ |---threshold[0-*]_hyst:Optional hysteresis value in mC
+
+3.2: For Thermal Cooling Devices
+ /sys/class/thermal/cooling_device[0-*]:
+ |---type: Type of the cooling device
+ |---max_state: Maximum throttle state of the cdev
+ |---cur_state: Current throttle state of the cdev
+
+3.3: For Thermal Zones
+ /sys/class/thermal/zone[0-*]:
+ |---name: Name of the thermal
+ |---sensorX: Symlink to ../sensorX
+ |---cdevY: Symlink to ../cdevY
+ |---thermal_trip: trip point values for sensors
+ |---thermal_map: mapping info between sensors and cdevs
+
+3.4: For Thermal Trip
+ This attribute represents the trip point values for all sensors
+ present in the thermal zone. All values are in mC.
+ /sys/class/thermal/zoneX/thermal_trip/sensorY:
+ |---hot: hot trip point value
+ |---critical: critical trip point value
+ |---passive: list of passive trip point values
+ |---active: list of active trip point values
+
+3.5: For Thermal Map
+ Each attribute represents the mapping/binding information between
+ a sensor and a cdev, together with a trip type.
+ /sys/class/thermal/zoneX/thermal_map/:
+ |---mapX: mapping information
+ A typical map entry is like below:
+
+ trip_type sensor cdev trip_mask weight(s)
+ passive cpu proc 0x03 50 30
+ active cpu fan0 0x03 50 70
+
+ The trip mask is a bit string; if 'n' th bit is set, then for
+ trip point 'n' this cdev is throttled with the given weight[n].
--
1.7.9.5
^ permalink raw reply related
* [PATCH 5/8] Thermal: Add 'thermal_map' sysfs node
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
In-Reply-To: <1355822977-4804-1-git-send-email-durgadoss.r@intel.com>
This patch creates a thermal map sysfs node under
/sys/class/thermal/thermal_zoneX/. This contains
entries named map0, map1 .. mapN. Each map has the
following space separated values:
trip_type sensor_name cdev_name trip_mask weights
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
drivers/thermal/thermal_sys.c | 149 ++++++++++++++++++++++++++++++++++++++++-
include/linux/thermal.h | 29 ++++++++
2 files changed, 176 insertions(+), 2 deletions(-)
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 29ec073..a3adc00 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -506,6 +506,41 @@ static void remove_cdev_from_zone(struct thermal_zone *tz,
tz->cdev_indx--;
}
+static void __clean_map_entry(struct thermal_zone *tz, int i)
+{
+ tz->map[i] = NULL;
+ sysfs_remove_file(tz->kobj_thermal_map, &tz->map_attr[i]->attr.attr);
+ /* Free map attributes */
+ kfree(tz->map_attr[i]);
+ tz->map_attr[i] = NULL;
+}
+
+static void remove_sensor_map_entry(struct thermal_zone *tz,
+ struct thermal_sensor *ts)
+{
+ int i;
+
+ for (i = 0; i < MAX_MAPS_PER_ZONE; i++) {
+ if (tz->map[i] && !strnicmp(ts->name, tz->map[i]->sensor_name,
+ THERMAL_NAME_LENGTH)) {
+ __clean_map_entry(tz, i);
+ }
+ }
+}
+
+static void remove_cdev_map_entry(struct thermal_zone *tz,
+ struct thermal_cooling_device *cdev)
+{
+ int i;
+
+ for (i = 0; i < MAX_MAPS_PER_ZONE; i++) {
+ if (tz->map[i] && !strnicmp(cdev->type, tz->map[i]->cdev_name,
+ THERMAL_NAME_LENGTH)) {
+ __clean_map_entry(tz, i);
+ }
+ }
+}
+
/* sys I/F for thermal zone */
#define to_thermal_zone(_dev) \
@@ -898,6 +933,52 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf)
}
static ssize_t
+map_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ int i, indx, ret = 0;
+ struct thermal_zone *tz;
+ struct thermal_map *map;
+ struct device *dev;
+ char *trip;
+
+ /*
+ * For maps under /sys/class/thermal/zoneX/thermal_map/mapY:
+ * attr points to mapY
+ * kobj points to thermal_map
+ * kobj->parent points to zoneX
+ */
+
+ /* Get zone pointer */
+ dev = container_of(kobj->parent, struct device, kobj);
+ tz = to_zone(dev);
+ if (!tz)
+ return -EINVAL;
+
+ sscanf(attr->attr.name, "map%d", &indx);
+
+ if (indx < 0 || indx >= MAX_MAPS_PER_ZONE)
+ return -EINVAL;
+
+ if (!tz->map[indx])
+ return sprintf(buf, "<Unavailable>\n");
+
+ map = tz->map[indx];
+
+ trip = (map->trip_type == THERMAL_TRIP_ACTIVE) ?
+ "active" : "passive";
+ ret += sprintf(buf, "%s", trip);
+ ret += sprintf(buf + ret, " %s", map->sensor_name);
+ ret += sprintf(buf + ret, " %s", map->cdev_name);
+ ret += sprintf(buf + ret, " 0x%x", map->trip_mask);
+
+ for (i = 0; i < map->num_weights; i++)
+ ret += sprintf(buf + ret, " %d", map->weights[i]);
+
+ ret += sprintf(buf + ret, "\n");
+ return ret;
+}
+
+static ssize_t
active_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
int i, indx, ret = 0;
@@ -1676,8 +1757,10 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
mutex_lock(&zone_list_lock);
- for_each_thermal_zone(tmp_tz)
+ for_each_thermal_zone(tmp_tz) {
remove_cdev_from_zone(tmp_tz, cdev);
+ remove_cdev_map_entry(tmp_tz, cdev);
+ }
mutex_unlock(&zone_list_lock);
@@ -1931,12 +2014,19 @@ struct thermal_zone *create_thermal_zone(const char *name, void *devdata)
if (!tz->kobj_thermal_trip)
goto exit_name;
+ tz->kobj_thermal_map = kobject_create_and_add("thermal_map",
+ &tz->device.kobj);
+ if (!tz->kobj_thermal_map)
+ goto exit_trip;
+
/* Add this zone to the global list of thermal zones */
mutex_lock(&zone_list_lock);
list_add_tail(&tz->node, &thermal_zone_list);
mutex_unlock(&zone_list_lock);
return tz;
+exit_trip:
+ kobject_del(tz->kobj_thermal_trip);
exit_name:
device_remove_file(&tz->device, &dev_attr_zone_name);
exit_unregister:
@@ -2000,6 +2090,12 @@ void remove_thermal_zone(struct thermal_zone *tz)
kobject_name(&tz->cdevs[i]->device.kobj));
}
+ for (i = 0; i < MAX_MAPS_PER_ZONE; i++)
+ __clean_map_entry(tz, i);
+
+ /* Remove /sys/class/thermal/zoneX/thermal_map */
+ kobject_del(tz->kobj_thermal_map);
+
release_idr(&thermal_zone_idr, &thermal_idr_lock, tz->id);
idr_destroy(&tz->idr);
@@ -2045,6 +2141,53 @@ struct thermal_sensor *get_sensor_by_name(const char *name)
}
EXPORT_SYMBOL(get_sensor_by_name);
+int add_map_entry(struct thermal_zone *tz, struct thermal_map *map)
+{
+ int ret, indx;
+
+ if (!tz || !map)
+ return -EINVAL;
+
+ mutex_lock(&zone_list_lock);
+
+ for (indx = 0; indx < MAX_MAPS_PER_ZONE; indx++) {
+ if (tz->map[indx] == NULL)
+ break;
+ }
+
+ if (indx >= MAX_MAPS_PER_ZONE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ tz->map_attr[indx] = kzalloc(sizeof(struct thermal_map_attr),
+ GFP_KERNEL);
+ if (!tz->map_attr[indx]) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ sprintf(tz->map_attr[indx]->name, "map%d", indx);
+
+ tz->map_attr[indx]->attr.attr.name = tz->map_attr[indx]->name;
+ tz->map_attr[indx]->attr.attr.mode = S_IRUGO;
+ tz->map_attr[indx]->attr.show = map_show;
+
+ sysfs_attr_init(&tz->map_attr[indx]->attr.attr);
+ ret = sysfs_create_file(tz->kobj_thermal_map,
+ &tz->map_attr[indx]->attr.attr);
+ if (ret) {
+ kfree(tz->map_attr[indx]);
+ goto exit;
+ }
+
+ tz->map[indx] = map;
+exit:
+ mutex_unlock(&zone_list_lock);
+ return ret;
+}
+EXPORT_SYMBOL(add_map_entry);
+
int add_sensor_to_zone(struct thermal_zone *tz, struct thermal_sensor *ts)
{
int ret;
@@ -2251,8 +2394,10 @@ void thermal_sensor_unregister(struct thermal_sensor *ts)
mutex_lock(&zone_list_lock);
- for_each_thermal_zone(tz)
+ for_each_thermal_zone(tz) {
remove_sensor_from_zone(tz, ts);
+ remove_sensor_map_entry(tz, ts);
+ }
mutex_unlock(&zone_list_lock);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 8372f05..581dc87 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -53,6 +53,9 @@
#define MAX_CDEVS_PER_ZONE 5
+/* If we map each sensor with every possible cdev for a zone */
+#define MAX_MAPS_PER_ZONE (MAX_SENSORS_PER_ZONE * MAX_CDEVS_PER_ZONE)
+
struct thermal_sensor;
struct thermal_zone_device;
struct thermal_cooling_device;
@@ -158,6 +161,21 @@ struct thermal_attr {
char name[THERMAL_NAME_LENGTH];
};
+struct thermal_map_attr {
+ struct kobj_attribute attr;
+ char name[THERMAL_NAME_LENGTH];
+};
+
+struct thermal_map {
+ enum thermal_trip_type trip_type;
+ char cdev_name[THERMAL_NAME_LENGTH];
+ char sensor_name[THERMAL_NAME_LENGTH];
+
+ int trip_mask;
+ int num_weights;
+ int *weights;
+};
+
/*
* This structure defines the trip points for a sensor.
* The actual values for these trip points come from
@@ -249,6 +267,15 @@ struct thermal_zone {
struct kobject *kobj_thermal_trip;
struct kobject *kobj_trip[MAX_SENSORS_PER_ZONE];
struct thermal_trip_point *sensor_trip[MAX_SENSORS_PER_ZONE];
+
+ /*
+ * Thermal map information:
+ * kobj_thermal_map: /sys/class/thermal/zoneX/thermal_map
+ * map_attr: /sys/class/thermal/zoneX/thermal_map/mapY
+ */
+ struct kobject *kobj_thermal_map;
+ struct thermal_map_attr *map_attr[MAX_MAPS_PER_ZONE];
+ struct thermal_map *map[MAX_MAPS_PER_ZONE];
};
/* Structure that holds thermal governor information */
@@ -332,6 +359,8 @@ int add_cdev_to_zone(struct thermal_zone *, struct thermal_cooling_device *);
int add_sensor_trip_info(struct thermal_zone *, struct thermal_sensor *,
struct thermal_trip_point *);
+int add_map_entry(struct thermal_zone *, struct thermal_map *);
+
#ifdef CONFIG_NET
extern int thermal_generate_netlink_event(u32 orig, enum events event);
#else
--
1.7.9.5
^ permalink raw reply related
* [PATCH 3/8] Thermal: Add APIs to bind cdev to new zone structure
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
In-Reply-To: <1355822977-4804-1-git-send-email-durgadoss.r@intel.com>
This patch creates new APIs to add/remove a
cdev to/from a zone. This patch does not change
the old cooling device implementation.
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
drivers/thermal/thermal_sys.c | 80 +++++++++++++++++++++++++++++++++++++++++
include/linux/thermal.h | 8 +++++
2 files changed, 88 insertions(+)
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 06d5a12..b39bf97 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -58,6 +58,7 @@ static LIST_HEAD(thermal_governor_list);
static DEFINE_MUTEX(thermal_list_lock);
static DEFINE_MUTEX(sensor_list_lock);
static DEFINE_MUTEX(zone_list_lock);
+static DEFINE_MUTEX(cdev_list_lock);
static DEFINE_MUTEX(thermal_governor_lock);
#define for_each_thermal_sensor(pos) \
@@ -82,6 +83,9 @@ static DEFINE_MUTEX(thermal_governor_lock);
mutex_unlock(&type##_list_lock); \
} while (0)
+#define for_each_cdev(pos) \
+ list_for_each_entry(pos, &thermal_cdev_list, node)
+
static struct thermal_governor *__find_governor(const char *name)
{
struct thermal_governor *pos;
@@ -462,6 +466,24 @@ static void remove_sensor_from_zone(struct thermal_zone *tz,
tz->sensor_indx--;
}
+static void remove_cdev_from_zone(struct thermal_zone *tz,
+ struct thermal_cooling_device *cdev)
+{
+ int j, indx;
+
+ GET_INDEX(tz, cdev, indx, cdev);
+ if (indx < 0)
+ return;
+
+ sysfs_remove_link(&tz->device.kobj, kobject_name(&cdev->device.kobj));
+
+ /* Shift the entries in the tz->cdevs array */
+ for (j = indx; j < MAX_CDEVS_PER_ZONE - 1; j++)
+ tz->cdevs[j] = tz->cdevs[j + 1];
+
+ tz->cdev_indx--;
+}
+
/* sys I/F for thermal zone */
#define to_thermal_zone(_dev) \
@@ -1458,6 +1480,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
int i;
const struct thermal_zone_params *tzp;
struct thermal_zone_device *tz;
+ struct thermal_zone *tmp_tz;
struct thermal_cooling_device *pos = NULL;
if (!cdev)
@@ -1495,6 +1518,13 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
mutex_unlock(&thermal_list_lock);
+ mutex_lock(&zone_list_lock);
+
+ for_each_thermal_zone(tmp_tz)
+ remove_cdev_from_zone(tmp_tz, cdev);
+
+ mutex_unlock(&zone_list_lock);
+
if (cdev->type[0])
device_remove_file(&cdev->device, &dev_attr_cdev_type);
device_remove_file(&cdev->device, &dev_attr_max_state);
@@ -1790,6 +1820,23 @@ exit:
}
EXPORT_SYMBOL(remove_thermal_zone);
+struct thermal_cooling_device *get_cdev_by_name(const char *name)
+{
+ struct thermal_cooling_device *pos;
+ struct thermal_cooling_device *cdev = NULL;
+
+ mutex_lock(&cdev_list_lock);
+ for_each_cdev(pos) {
+ if (!strnicmp(pos->type, name, THERMAL_NAME_LENGTH)) {
+ cdev = pos;
+ break;
+ }
+ }
+ mutex_unlock(&cdev_list_lock);
+ return cdev;
+}
+EXPORT_SYMBOL(get_cdev_by_name);
+
struct thermal_sensor *get_sensor_by_name(const char *name)
{
struct thermal_sensor *pos;
@@ -1840,6 +1887,39 @@ exit_zone:
}
EXPORT_SYMBOL(add_sensor_to_zone);
+int add_cdev_to_zone(struct thermal_zone *tz,
+ struct thermal_cooling_device *cdev)
+{
+ int ret;
+
+ if (!tz || !cdev)
+ return -EINVAL;
+
+ mutex_lock(&zone_list_lock);
+
+ /* Ensure we are not adding the same cdev again!! */
+ GET_INDEX(tz, cdev, ret, cdev);
+ if (ret >= 0) {
+ ret = -EEXIST;
+ goto exit_zone;
+ }
+
+ mutex_lock(&cdev_list_lock);
+ ret = sysfs_create_link(&tz->device.kobj, &cdev->device.kobj,
+ kobject_name(&cdev->device.kobj));
+ if (ret)
+ goto exit_cdev;
+
+ tz->cdevs[tz->cdev_indx++] = cdev;
+
+exit_cdev:
+ mutex_unlock(&cdev_list_lock);
+exit_zone:
+ mutex_unlock(&zone_list_lock);
+ return ret;
+}
+EXPORT_SYMBOL(add_cdev_to_zone);
+
/**
* thermal_sensor_register - register a new thermal sensor
* @name: name of the thermal sensor
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index f08f774..c4e45c7 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -51,6 +51,8 @@
#define MAX_SENSORS_PER_ZONE 5
+#define MAX_CDEVS_PER_ZONE 5
+
struct thermal_sensor;
struct thermal_zone_device;
struct thermal_cooling_device;
@@ -209,6 +211,10 @@ struct thermal_zone {
/* Sensor level information */
int sensor_indx; /* index into 'sensors' array */
struct thermal_sensor *sensors[MAX_SENSORS_PER_ZONE];
+
+ /* cdev level information */
+ int cdev_indx; /* index into 'cdevs' array */
+ struct thermal_cooling_device *cdevs[MAX_CDEVS_PER_ZONE];
};
/* Structure that holds thermal governor information */
@@ -287,6 +293,8 @@ struct thermal_zone *create_thermal_zone(const char *, void *);
void remove_thermal_zone(struct thermal_zone *);
int add_sensor_to_zone(struct thermal_zone *, struct thermal_sensor *);
+int add_cdev_to_zone(struct thermal_zone *, struct thermal_cooling_device *);
+
#ifdef CONFIG_NET
extern int thermal_generate_netlink_event(u32 orig, enum events event);
#else
--
1.7.9.5
^ permalink raw reply related
* [PATCH 2/8] Thermal: Create zone level APIs
From: Durgadoss R @ 2012-12-18 9:29 UTC (permalink / raw)
To: rui.zhang, linux-pm; +Cc: linux-kernel, hongbo.zhang, wni, Durgadoss R
In-Reply-To: <1355822977-4804-1-git-send-email-durgadoss.r@intel.com>
This patch adds a new thermal_zone structure to
thermal.h. Also, adds zone level APIs to the thermal
framework.
A thermal zone is a hot spot on the platform, which
can have one or more sensors and cooling devices attached
to it. These sensors can be mapped to a set of cooling
devices, which when throttled, can help to bring down
the temperature of the hot spot.
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
drivers/thermal/thermal_sys.c | 194 +++++++++++++++++++++++++++++++++++++++++
include/linux/thermal.h | 21 +++++
2 files changed, 215 insertions(+)
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index b2becb9..06d5a12 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -44,19 +44,44 @@ MODULE_DESCRIPTION("Generic thermal management sysfs support");
MODULE_LICENSE("GPL");
static DEFINE_IDR(thermal_tz_idr);
+static DEFINE_IDR(thermal_zone_idr);
static DEFINE_IDR(thermal_cdev_idr);
static DEFINE_IDR(thermal_sensor_idr);
static DEFINE_MUTEX(thermal_idr_lock);
static LIST_HEAD(thermal_tz_list);
static LIST_HEAD(thermal_sensor_list);
+static LIST_HEAD(thermal_zone_list);
static LIST_HEAD(thermal_cdev_list);
static LIST_HEAD(thermal_governor_list);
static DEFINE_MUTEX(thermal_list_lock);
static DEFINE_MUTEX(sensor_list_lock);
+static DEFINE_MUTEX(zone_list_lock);
static DEFINE_MUTEX(thermal_governor_lock);
+#define for_each_thermal_sensor(pos) \
+ list_for_each_entry(pos, &thermal_sensor_list, node)
+
+#define for_each_thermal_zone(pos) \
+ list_for_each_entry(pos, &thermal_zone_list, node)
+
+#define GET_INDEX(tz, ptr, indx, type) \
+ do { \
+ int i; \
+ indx = -EINVAL; \
+ if (!tz || !ptr) \
+ break; \
+ mutex_lock(&type##_list_lock); \
+ for (i = 0; i < tz->type##_indx; i++) { \
+ if (tz->type##s[i] == ptr) { \
+ indx = i; \
+ break; \
+ } \
+ } \
+ mutex_unlock(&type##_list_lock); \
+ } while (0)
+
static struct thermal_governor *__find_governor(const char *name)
{
struct thermal_governor *pos;
@@ -419,15 +444,44 @@ static void thermal_zone_device_check(struct work_struct *work)
thermal_zone_device_update(tz);
}
+static void remove_sensor_from_zone(struct thermal_zone *tz,
+ struct thermal_sensor *ts)
+{
+ int j, indx;
+
+ GET_INDEX(tz, ts, indx, sensor);
+ if (indx < 0)
+ return;
+
+ sysfs_remove_link(&tz->device.kobj, kobject_name(&ts->device.kobj));
+
+ /* Shift the entries in the tz->sensors array */
+ for (j = indx; j < MAX_SENSORS_PER_ZONE - 1; j++)
+ tz->sensors[j] = tz->sensors[j + 1];
+
+ tz->sensor_indx--;
+}
+
/* sys I/F for thermal zone */
#define to_thermal_zone(_dev) \
container_of(_dev, struct thermal_zone_device, device)
+#define to_zone(_dev) \
+ container_of(_dev, struct thermal_zone, device)
+
#define to_thermal_sensor(_dev) \
container_of(_dev, struct thermal_sensor, device)
static ssize_t
+zone_name_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct thermal_zone *tz = to_zone(dev);
+
+ return sprintf(buf, "%s\n", tz->name);
+}
+
+static ssize_t
sensor_name_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct thermal_sensor *ts = to_thermal_sensor(dev);
@@ -809,6 +863,8 @@ static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store);
static DEVICE_ATTR(sensor_name, 0444, sensor_name_show, NULL);
static DEVICE_ATTR(temp_input, 0444, sensor_temp_show, NULL);
+static DEVICE_ATTR(zone_name, 0444, zone_name_show, NULL);
+
/* sys I/F for cooling device */
#define to_cooling_device(_dev) \
container_of(_dev, struct thermal_cooling_device, device)
@@ -1654,6 +1710,136 @@ static int enable_sensor_thresholds(struct thermal_sensor *ts, int count)
return 0;
}
+struct thermal_zone *create_thermal_zone(const char *name, void *devdata)
+{
+ struct thermal_zone *tz;
+ int ret;
+
+ if (!name || (name && strlen(name) >= THERMAL_NAME_LENGTH))
+ return ERR_PTR(-EINVAL);
+
+ tz = kzalloc(sizeof(*tz), GFP_KERNEL);
+ if (!tz)
+ return ERR_PTR(-ENOMEM);
+
+ idr_init(&tz->idr);
+ ret = get_idr(&thermal_zone_idr, &thermal_idr_lock, &tz->id);
+ if (ret)
+ goto exit_free;
+
+ strcpy(tz->name, name);
+ tz->devdata = devdata;
+ tz->device.class = &thermal_class;
+
+ dev_set_name(&tz->device, "zone%d", tz->id);
+ ret = device_register(&tz->device);
+ if (ret)
+ goto exit_idr;
+
+ ret = device_create_file(&tz->device, &dev_attr_zone_name);
+ if (ret)
+ goto exit_unregister;
+
+ /* Add this zone to the global list of thermal zones */
+ mutex_lock(&zone_list_lock);
+ list_add_tail(&tz->node, &thermal_zone_list);
+ mutex_unlock(&zone_list_lock);
+ return tz;
+
+exit_unregister:
+ device_unregister(&tz->device);
+exit_idr:
+ release_idr(&thermal_zone_idr, &thermal_idr_lock, tz->id);
+exit_free:
+ kfree(tz);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(create_thermal_zone);
+
+void remove_thermal_zone(struct thermal_zone *tz)
+{
+ struct thermal_zone *pos, *next;
+ bool found = false;
+
+ if (!tz)
+ return;
+
+ mutex_lock(&zone_list_lock);
+
+ list_for_each_entry_safe(pos, next, &thermal_zone_list, node) {
+ if (pos == tz) {
+ list_del(&tz->node);
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ goto exit;
+
+ device_remove_file(&tz->device, &dev_attr_zone_name);
+
+ release_idr(&thermal_zone_idr, &thermal_idr_lock, tz->id);
+ idr_destroy(&tz->idr);
+
+ device_unregister(&tz->device);
+ kfree(tz);
+exit:
+ mutex_unlock(&zone_list_lock);
+ return;
+}
+EXPORT_SYMBOL(remove_thermal_zone);
+
+struct thermal_sensor *get_sensor_by_name(const char *name)
+{
+ struct thermal_sensor *pos;
+ struct thermal_sensor *ts = NULL;
+
+ mutex_lock(&sensor_list_lock);
+ for_each_thermal_sensor(pos) {
+ if (!strnicmp(pos->name, name, THERMAL_NAME_LENGTH)) {
+ ts = pos;
+ break;
+ }
+ }
+ mutex_unlock(&sensor_list_lock);
+ return ts;
+}
+EXPORT_SYMBOL(get_sensor_by_name);
+
+int add_sensor_to_zone(struct thermal_zone *tz, struct thermal_sensor *ts)
+{
+ int ret;
+
+ if (!tz || !ts)
+ return -EINVAL;
+
+ mutex_lock(&zone_list_lock);
+
+ /* Ensure we are not adding the same sensor again!! */
+ GET_INDEX(tz, ts, ret, sensor);
+ if (ret >= 0) {
+ ret = -EEXIST;
+ goto exit_zone;
+ }
+
+ mutex_lock(&sensor_list_lock);
+
+ ret = sysfs_create_link(&tz->device.kobj, &ts->device.kobj,
+ kobject_name(&ts->device.kobj));
+ if (ret)
+ goto exit_sensor;
+
+ tz->sensors[tz->sensor_indx++] = ts;
+
+exit_sensor:
+ mutex_unlock(&sensor_list_lock);
+exit_zone:
+ mutex_unlock(&zone_list_lock);
+ return ret;
+}
+EXPORT_SYMBOL(add_sensor_to_zone);
+
/**
* thermal_sensor_register - register a new thermal sensor
* @name: name of the thermal sensor
@@ -1730,6 +1916,7 @@ EXPORT_SYMBOL(thermal_sensor_register);
void thermal_sensor_unregister(struct thermal_sensor *ts)
{
int i;
+ struct thermal_zone *tz;
struct thermal_sensor *pos, *next;
bool found = false;
@@ -1749,6 +1936,13 @@ void thermal_sensor_unregister(struct thermal_sensor *ts)
if (!found)
return;
+ mutex_lock(&zone_list_lock);
+
+ for_each_thermal_zone(tz)
+ remove_sensor_from_zone(tz, ts);
+
+ mutex_unlock(&zone_list_lock);
+
for (i = 0; i < ts->thresholds; i++) {
device_remove_file(&ts->device, &ts->thresh_attrs[i].attr);
if (ts->ops->get_hyst) {
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index a49cb38..f08f774 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -49,6 +49,8 @@
/* Default Thermal Governor: Does Linear Throttling */
#define DEFAULT_THERMAL_GOVERNOR "step_wise"
+#define MAX_SENSORS_PER_ZONE 5
+
struct thermal_sensor;
struct thermal_zone_device;
struct thermal_cooling_device;
@@ -194,6 +196,21 @@ struct thermal_zone_device {
struct delayed_work poll_queue;
};
+struct thermal_zone {
+ char name[THERMAL_NAME_LENGTH];
+ int id;
+ void *devdata;
+ struct thermal_zone *ops;
+ struct thermal_governor *governor;
+ struct idr idr;
+ struct device device;
+ struct list_head node;
+
+ /* Sensor level information */
+ int sensor_indx; /* index into 'sensors' array */
+ struct thermal_sensor *sensors[MAX_SENSORS_PER_ZONE];
+};
+
/* Structure that holds thermal governor information */
struct thermal_governor {
char name[THERMAL_NAME_LENGTH];
@@ -266,6 +283,10 @@ struct thermal_sensor *thermal_sensor_register(const char *, int,
struct thermal_sensor_ops *, void *);
void thermal_sensor_unregister(struct thermal_sensor *);
+struct thermal_zone *create_thermal_zone(const char *, void *);
+void remove_thermal_zone(struct thermal_zone *);
+int add_sensor_to_zone(struct thermal_zone *, struct thermal_sensor *);
+
#ifdef CONFIG_NET
extern int thermal_generate_netlink_event(u32 orig, enum events event);
#else
--
1.7.9.5
^ permalink raw reply related
* Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
From: Aaron Lu @ 2012-12-18 8:30 UTC (permalink / raw)
To: Tejun Heo
Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
Alan Stern, Jeff Wu, linux-ide, linux-scsi, linux-acpi
In-Reply-To: <20121211051027.GB1932@mtj.dyndns.org>
Hi Everyone,
Due to lack of information, I think I'll go ahead and pick up a simple
solution for this(i.e. the code I attached previously to set a flag
event_driven in scsi_device to let sr skip events poll) and send a new
series out for you to review. After all, I can't wait forever...
Please feel free to let me know if you don't think I should send a new
series with the above mentioned solution.
Thanks a lot for your(all of you) kind help and comments.
-Aaron
On 12/11/2012 01:10 PM, Tejun Heo wrote:
> Hello, guys.
>
> On Mon, Dec 10, 2012 at 11:26:07AM +0800, Aaron Lu wrote:
>>>>> The problem here is there's no easy to reach genhd from libata (or the
>>>>> other way around) without going through sr. I think we're gonna have
>>>>> to have something in sr one way or the other.
>>>>
>>>> Can't we do that via an event? It's a bit clunky because we need the
>>>> callback in the layer that sees the sdev, which is libata-scsi, we just
>>>> need an analogue of ata_scsi_media_change_notify, but ignoring and
>>>> allowing polling is essentially event driven as well, so it should all
>>>> work. We'll need a listener in genhd, which might be trickier.
>
> I'm not really following what you mean. Can you please elaborate?
> One way or the other, doesn't the notification have to bubble up
> through SCSI?
>
>> A colleague of mine reminded me that it's impolite to write something
>> like this, and here is my apology. I didn't mean to be rude, I'm sorry
>> for this if it made you feel uncomfortable.
>
> Oh, no, it's not rude at all. I was just being distracted w/ other
> stuff and lazy. Sorry about that.
>
> Thanks.
>
^ permalink raw reply
* [PATCH] [RFC] cpufreq: can't raise max frequency with cpu_thermal
From: Sonny Rao @ 2012-12-18 8:29 UTC (permalink / raw)
To: linux-pm
Cc: linux-kernel, Zhang Rui, Amit Daniel Kachhap, Doug Anderson,
Sameer Nanda, Sonny Rao
The cpu_thermal generic thermal management code has a bug where once
max cpu frequency has been lowered in sysfs (scaling_max_freq) it is
not possible to raise the max back up later. The bug is that the
notifer gets called by __cpufreq_set_policy() before the user policy
max is raised, and is incorrectly trying to enforce the max frequency
policy even when we are trying to change the policy. It is also not
clear why this driver is looking at the user policy since it is
primarily supposed to enforce thermal policy, not user set policy.
Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
---
drivers/thermal/cpu_cooling.c | 4 ----
1 files changed, 0 insertions(+), 4 deletions(-)
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 836828e..63bc708 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -219,10 +219,6 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
if (cpumask_test_cpu(policy->cpu, ¬ify_device->allowed_cpus))
max_freq = notify_device->cpufreq_val;
- /* Never exceed user_policy.max*/
- if (max_freq > policy->user_policy.max)
- max_freq = policy->user_policy.max;
-
if (policy->max != max_freq)
cpufreq_verify_within_limits(policy, 0, max_freq);
--
1.7.7.3
^ permalink raw reply related
* Re: [Resend][PATCH] PM: Move disabling/enabling runtime PM to late suspend/early resume
From: Rafael J. Wysocki @ 2012-12-17 23:35 UTC (permalink / raw)
To: Ulf Hansson
Cc: Alan Stern, Jiri Kosina, Linux PM list, LKML, Jan-Matthias Braun
In-Reply-To: <CAPDyKFpXAokEr-ktU9=R+SWnfJvhRNhYPbmz-pw8-YpJiuNSSA@mail.gmail.com>
On Monday, December 17, 2012 10:18:14 PM Ulf Hansson wrote:
> On 16 December 2012 16:29, Alan Stern <stern@rowland.harvard.edu> wrote:
> > On Sun, 16 Dec 2012, Rafael J. Wysocki wrote:
> >
> >> On Saturday, December 15, 2012 10:16:29 PM Jiri Kosina wrote:
> >> > On Sat, 15 Dec 2012, Rafael J. Wysocki wrote:
> >> >
> >> > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >> > >
> >> > > Currently, the PM core disables runtime PM for all devices right
> >> > > after executing subsystem/driver .suspend() callbacks for them
> >> > > and re-enables it right before executing subsystem/driver .resume()
> >> > > callbacks for them. This may lead to problems when there are
> >> > > two devices such that the .suspend() callback executed for one of
> >> > > them depends on runtime PM working for the other. In that case,
> >> > > if runtime PM has already been disabled for the second device,
> >> > > the first one's .suspend() won't work correctly (and analogously
> >> > > for resume).
> >> > >
> >> > > To make those issues go away, make the PM core disable runtime PM
> >> > > for devices right before executing subsystem/driver .suspend_late()
> >> > > callbacks for them and enable runtime PM for them right after
> >> > > executing subsystem/driver .resume_early() callbacks for them. This
> >> > > way the potential conflitcs between .suspend_late()/.resume_early()
> >> > > and their runtime PM counterparts are still prevented from happening,
> >> > > but the subtle ordering issues related to disabling/enabling runtime
> >> > > PM for devices during system suspend/resume are much easier to avoid.
> >> > >
> >> > > Reported-and-tested-by: Jan-Matthias Braun <jan_braun@gmx.net>
> >> > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >> >
> >> > Hi Rafael,
> >> >
> >> > just curious what is the reason for resend? Do you want to gather more
> >> > Acks before pushing this upstream?
> >>
> >> Well, I thought that some people might actually look at it when they found it
> >> again in their mailboxes. :-)
> >
> > I did look at it the first time it appeared. It seemed to be okay, but
> > I haven't tried any testing.
> >
> > Alan Stern
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> I believe this should work fine for ux500 platforms, so:
>
> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Thanks!
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply
* Re: [regression] cpuidle_get_cpu_driver livelocks idle system
From: Rafael J. Wysocki @ 2012-12-17 23:33 UTC (permalink / raw)
To: Daniel Lezcano, Sivaram Nair
Cc: Russ Anderson, Peter De Schrijver, akpm@linux-foundation.org,
shuox.liu@intel.com, yanmin_zhang@intel.com,
linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <50CF95AD.8030406@linaro.org>
On Monday, December 17, 2012 10:59:09 PM Daniel Lezcano wrote:
> On 12/17/2012 08:36 PM, Russ Anderson wrote:
> > The 3.7 kernel grinds to a halt on boot of a system with
> > 2048 cpus. NMI showed most of the cpus in
> > _raw_spin_lock in cpuidle_get_cpu_driver(). (backtrace below)
> >
> > A quick look at cpuidle_get_cpu_driver() shows the hot lock.
> >
> > In drivers/cpuidle/driver.c:
> > --------------------------------------------------------
> > /**
> > * cpuidle_get_cpu_driver - return the driver tied with a cpu
> > */
> > struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
> > {
> > struct cpuidle_driver *drv;
> >
> > if (!dev)
> > return NULL;
> >
> > spin_lock(&cpuidle_driver_lock);
> > drv = __cpuidle_get_cpu_driver(dev->cpu);
> > spin_unlock(&cpuidle_driver_lock);
> >
> > return drv;
> > }
> > --------------------------------------------------------
>
> Hi Russ,
>
> thanks for investigating the problem. You are right, there is a
> bottleneck here.
>
> Regarding how is used the cpuidle code, I think it is safe to remove the
> locks.
OK, a patch would be appreciated. :-)
If you prepare one, please explain in the changelog why it is safe to drop the
locks.
Thanks,
Rafael
> > This change was added in on Nov 14th, 2012.
> > http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commit;h=bf4d1b5ddb78f86078ac6ae0415802d5f0c68f92
> >
> > The patch says it adds support for cpus with different characteristics,
> > but adds a big global lock. The comment claims "no impact for the other
> > platforms if the option is disabled", which leads me to believe the
> > spin_lock was added inadvertently. CPU_IDLE_MULTIPLE_DRIVERS is off
> > in my config file.
> >
> > linux$ grep CPU_IDLE_MULTIPLE_DRIVERS .config
> > # CONFIG_CPU_IDLE_MULTIPLE_DRIVERS is not set
> >
> > As more cpus become idle, more cpus fight over the lock until
> > the system livelocks on the crushing weight of idle.
> >
> > The fix may be to move the spin_lock into __cpuidle_get_cpu_driver,
> > which has different versions for CONFIG_CPU_IDLE_MULTIPLE_DRIVERS,
> > to avoid impacting the disabled case, or get rid of the spin_lock
> > all together.
> >
> >
> > --------------------------------------------------------
> > == UV NMI process trace cpu 12: ==
> > CPU 12
> > Pid: 0, comm: swapper/12 Tainted: G O 3.7.0.rja-sgi+ #38
> > RIP: 0010:[<ffffffff81614e45>] [<ffffffff81614e45>] _raw_spin_lock+0x25/0x30
> > [...]
> > Call Trace:
> > [<ffffffff814c891c>] cpuidle_get_cpu_driver+0x1c/0x30
> > [<ffffffff814c871d>] cpuidle_idle_call+0x7d/0x1b0
> > [<ffffffff8101d08d>] cpu_idle+0xdd/0x130
> > [<ffffffff8160a3ea>] start_secondary+0xc6/0xcc
> > --------------------------------------------------------
> >
>
>
>
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply
* Re: [PATCH 2/2] cpufreq: db8500: set CPUFREQ_CONST_LOOPS
From: Fabio Baltieri @ 2012-12-17 22:03 UTC (permalink / raw)
To: Stephen Boyd
Cc: John Stultz, Arnd Bergmann, Thomas Gleixner, Rafael J. Wysocki,
linux-pm, Linus Walleij, linux-kernel, cpufreq, linux-arm-kernel
In-Reply-To: <50CF634F.1040708@codeaurora.org>
On Mon, Dec 17, 2012 at 10:24:15AM -0800, Stephen Boyd wrote:
> On 12/04/12 02:10, Fabio Baltieri wrote:
> > static struct cpufreq_driver db8500_cpufreq_driver = {
> > - .flags = CPUFREQ_STICKY,
> > + .flags = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS,
>
> Wouldn't you want to fold this into the previous patch so bisection is
> not broken? Otherwise you have a place where lpj is all screwed up when
> cpufreq mucks with it.
You are basically right... but that's not going to be as screwed up as
it's now, so I'd say that it's not worth the rebase unless the
maintainer says so. :-)
Thanks!
Fabio
--
Fabio Baltieri
^ permalink raw reply
* Re: [regression] cpuidle_get_cpu_driver livelocks idle system
From: Daniel Lezcano @ 2012-12-17 21:59 UTC (permalink / raw)
To: Russ Anderson
Cc: rafael.j.wysocki@intel.com, Sivaram Nair, Peter De Schrijver,
akpm@linux-foundation.org, shuox.liu@intel.com,
yanmin_zhang@intel.com, linux-pm@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20121217193612.GA28600@sgi.com>
On 12/17/2012 08:36 PM, Russ Anderson wrote:
> The 3.7 kernel grinds to a halt on boot of a system with
> 2048 cpus. NMI showed most of the cpus in
> _raw_spin_lock in cpuidle_get_cpu_driver(). (backtrace below)
>
> A quick look at cpuidle_get_cpu_driver() shows the hot lock.
>
> In drivers/cpuidle/driver.c:
> --------------------------------------------------------
> /**
> * cpuidle_get_cpu_driver - return the driver tied with a cpu
> */
> struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
> {
> struct cpuidle_driver *drv;
>
> if (!dev)
> return NULL;
>
> spin_lock(&cpuidle_driver_lock);
> drv = __cpuidle_get_cpu_driver(dev->cpu);
> spin_unlock(&cpuidle_driver_lock);
>
> return drv;
> }
> --------------------------------------------------------
Hi Russ,
thanks for investigating the problem. You are right, there is a
bottleneck here.
Regarding how is used the cpuidle code, I think it is safe to remove the
locks.
> This change was added in on Nov 14th, 2012.
> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commit;h=bf4d1b5ddb78f86078ac6ae0415802d5f0c68f92
>
> The patch says it adds support for cpus with different characteristics,
> but adds a big global lock. The comment claims "no impact for the other
> platforms if the option is disabled", which leads me to believe the
> spin_lock was added inadvertently. CPU_IDLE_MULTIPLE_DRIVERS is off
> in my config file.
>
> linux$ grep CPU_IDLE_MULTIPLE_DRIVERS .config
> # CONFIG_CPU_IDLE_MULTIPLE_DRIVERS is not set
>
> As more cpus become idle, more cpus fight over the lock until
> the system livelocks on the crushing weight of idle.
>
> The fix may be to move the spin_lock into __cpuidle_get_cpu_driver,
> which has different versions for CONFIG_CPU_IDLE_MULTIPLE_DRIVERS,
> to avoid impacting the disabled case, or get rid of the spin_lock
> all together.
>
>
> --------------------------------------------------------
> == UV NMI process trace cpu 12: ==
> CPU 12
> Pid: 0, comm: swapper/12 Tainted: G O 3.7.0.rja-sgi+ #38
> RIP: 0010:[<ffffffff81614e45>] [<ffffffff81614e45>] _raw_spin_lock+0x25/0x30
> [...]
> Call Trace:
> [<ffffffff814c891c>] cpuidle_get_cpu_driver+0x1c/0x30
> [<ffffffff814c871d>] cpuidle_idle_call+0x7d/0x1b0
> [<ffffffff8101d08d>] cpu_idle+0xdd/0x130
> [<ffffffff8160a3ea>] start_secondary+0xc6/0xcc
> --------------------------------------------------------
>
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
^ permalink raw reply
* Re: [Resend][PATCH] PM: Move disabling/enabling runtime PM to late suspend/early resume
From: Ulf Hansson @ 2012-12-17 21:18 UTC (permalink / raw)
To: Alan Stern
Cc: Rafael J. Wysocki, Jiri Kosina, Linux PM list, LKML,
Jan-Matthias Braun
In-Reply-To: <Pine.LNX.4.44L0.1212161028330.13850-100000@netrider.rowland.org>
On 16 December 2012 16:29, Alan Stern <stern@rowland.harvard.edu> wrote:
> On Sun, 16 Dec 2012, Rafael J. Wysocki wrote:
>
>> On Saturday, December 15, 2012 10:16:29 PM Jiri Kosina wrote:
>> > On Sat, 15 Dec 2012, Rafael J. Wysocki wrote:
>> >
>> > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>> > >
>> > > Currently, the PM core disables runtime PM for all devices right
>> > > after executing subsystem/driver .suspend() callbacks for them
>> > > and re-enables it right before executing subsystem/driver .resume()
>> > > callbacks for them. This may lead to problems when there are
>> > > two devices such that the .suspend() callback executed for one of
>> > > them depends on runtime PM working for the other. In that case,
>> > > if runtime PM has already been disabled for the second device,
>> > > the first one's .suspend() won't work correctly (and analogously
>> > > for resume).
>> > >
>> > > To make those issues go away, make the PM core disable runtime PM
>> > > for devices right before executing subsystem/driver .suspend_late()
>> > > callbacks for them and enable runtime PM for them right after
>> > > executing subsystem/driver .resume_early() callbacks for them. This
>> > > way the potential conflitcs between .suspend_late()/.resume_early()
>> > > and their runtime PM counterparts are still prevented from happening,
>> > > but the subtle ordering issues related to disabling/enabling runtime
>> > > PM for devices during system suspend/resume are much easier to avoid.
>> > >
>> > > Reported-and-tested-by: Jan-Matthias Braun <jan_braun@gmx.net>
>> > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>> >
>> > Hi Rafael,
>> >
>> > just curious what is the reason for resend? Do you want to gather more
>> > Acks before pushing this upstream?
>>
>> Well, I thought that some people might actually look at it when they found it
>> again in their mailboxes. :-)
>
> I did look at it the first time it appeared. It seemed to be okay, but
> I haven't tried any testing.
>
> Alan Stern
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
I believe this should work fine for ux500 platforms, so:
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Kind regards
Ulf Hansson
^ permalink raw reply
* [regression] cpuidle_get_cpu_driver livelocks idle system
From: Russ Anderson @ 2012-12-17 19:36 UTC (permalink / raw)
To: daniel.lezcano@linaro.org, rafael.j.wysocki@intel.com
Cc: Sivaram Nair, Peter De Schrijver, akpm@linux-foundation.org,
shuox.liu@intel.com, yanmin_zhang@intel.com,
linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org,
Russ Anderson
The 3.7 kernel grinds to a halt on boot of a system with
2048 cpus. NMI showed most of the cpus in
_raw_spin_lock in cpuidle_get_cpu_driver(). (backtrace below)
A quick look at cpuidle_get_cpu_driver() shows the hot lock.
In drivers/cpuidle/driver.c:
--------------------------------------------------------
/**
* cpuidle_get_cpu_driver - return the driver tied with a cpu
*/
struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
{
struct cpuidle_driver *drv;
if (!dev)
return NULL;
spin_lock(&cpuidle_driver_lock);
drv = __cpuidle_get_cpu_driver(dev->cpu);
spin_unlock(&cpuidle_driver_lock);
return drv;
}
--------------------------------------------------------
This change was added in on Nov 14th, 2012.
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commit;h=bf4d1b5ddb78f86078ac6ae0415802d5f0c68f92
The patch says it adds support for cpus with different characteristics,
but adds a big global lock. The comment claims "no impact for the other
platforms if the option is disabled", which leads me to believe the
spin_lock was added inadvertently. CPU_IDLE_MULTIPLE_DRIVERS is off
in my config file.
linux$ grep CPU_IDLE_MULTIPLE_DRIVERS .config
# CONFIG_CPU_IDLE_MULTIPLE_DRIVERS is not set
As more cpus become idle, more cpus fight over the lock until
the system livelocks on the crushing weight of idle.
The fix may be to move the spin_lock into __cpuidle_get_cpu_driver,
which has different versions for CONFIG_CPU_IDLE_MULTIPLE_DRIVERS,
to avoid impacting the disabled case, or get rid of the spin_lock
all together.
--------------------------------------------------------
== UV NMI process trace cpu 12: ==
CPU 12
Pid: 0, comm: swapper/12 Tainted: G O 3.7.0.rja-sgi+ #38
RIP: 0010:[<ffffffff81614e45>] [<ffffffff81614e45>] _raw_spin_lock+0x25/0x30
[...]
Call Trace:
[<ffffffff814c891c>] cpuidle_get_cpu_driver+0x1c/0x30
[<ffffffff814c871d>] cpuidle_idle_call+0x7d/0x1b0
[<ffffffff8101d08d>] cpu_idle+0xdd/0x130
[<ffffffff8160a3ea>] start_secondary+0xc6/0xcc
--------------------------------------------------------
--
Russ Anderson, OS RAS/Partitioning Project Lead
SGI - Silicon Graphics Inc rja@sgi.com
^ permalink raw reply
* [GIT PULL] power tools for Linux-3.8
From: Len Brown @ 2012-12-17 19:17 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Linux PM list, linux-kernel
Hi Linus,
Please pull these power tools patches.
This updates the tree w/ the latest version of turbostat,
which reports temperature and - on SNB and later - Watts.
There will be a logical conflict with the uapi changes
which breaks the turbostat build. It can be fixed this way:
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index e48ef07..f09641d 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -5,7 +5,7 @@ DESTDIR :=
turbostat : turbostat.c
CFLAGS += -Wall
-CFLAGS += -I../../../../arch/x86/include/
+CFLAGS += -I../../../../arch/x86/include/uapi/
thanks!
Len Brown, Intel Open Source Technology Center
The following changes since commit d91bb17c2a874493603c4d99db305d8caf2d180c:
tools/power turbostat: graceful fail on garbage input (2012-11-01 00:22:00 -0400)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git next
for you to fetch changes up to 55f1f545f709a6023371848028a3029118855576:
tools: Allow tools to be installed in a user specified location (2012-11-30 01:09:45 -0500)
----------------------------------------------------------------
Colin Ian King (1):
tools/power x86_energy_perf_policy: close /proc/stat in for_every_cpu()
Josh Boyer (1):
tools: Allow tools to be installed in a user specified location
Len Brown (5):
tools/power/x86/turbostat: share kernel MSR #defines
x86 power: define RAPL MSRs
tools/power turbostat: prevent infinite loop on migration error path
tools/power turbostat: fix output buffering issue
tools/power turbostat: v3.0: monitor Watts and Temperature
Mark Asselstine (1):
tools/power: turbostat: make Makefile a bit more capable
arch/x86/include/asm/msr-index.h | 37 ++
tools/power/x86/turbostat/Makefile | 21 +-
tools/power/x86/turbostat/turbostat.8 | 103 ++--
tools/power/x86/turbostat/turbostat.c | 677 +++++++++++++++++++--
tools/power/x86/x86_energy_perf_policy/Makefile | 6 +-
.../x86_energy_perf_policy.c | 2 +-
6 files changed, 765 insertions(+), 81 deletions(-)
^ permalink raw reply related
* Re: [PATCH 0/2] clocksource: nomadik-mtu: support timer-based delay
From: John Stultz @ 2012-12-17 18:59 UTC (permalink / raw)
To: Linus Walleij
Cc: Fabio Baltieri, Arnd Bergmann, Thomas Gleixner, Rafael J. Wysocki,
cpufreq, linux-pm, linux-arm-kernel, linux-kernel
In-Reply-To: <CACRpkdYRJvFEUvB++fKgVWEXZrTQ7aob0MPMwW4TBs_VRNuzsA@mail.gmail.com>
On 12/17/2012 10:38 AM, Linus Walleij wrote:
> On Mon, Dec 17, 2012 at 12:36 PM, Fabio Baltieri
> <fabio.baltieri@linaro.org> wrote:
>> On Tue, Dec 04, 2012 at 11:10:43AM +0100, Fabio Baltieri wrote:
>>> this implements timer-based delay support for nomadik and ux500
>>> platforms, using the MTU as time source, and marks the u8500 cpufreq
>>> driver as CPUFREQ_CONST_LOOPS accordingly.
>>>
>>> The patches are based on Arnd's arm-soc/ux500/mtu-clk branch, as that
>>> contains latest MTU driver developments, including a driver move/rename,
>>> but I can rebase if necessary.
>> The patches applies cleanly on current mainline now, as all dependencies
>> has already been merged, can this be considered for for merging too?
> If no clocksource maintainer steps up in a week then notify me and
> I'll simply stick these into the ux500 tree.
Yea, I'd actually prefer these go through the arch tree where they can
get testing.
thanks
-john
^ permalink raw reply
* Re: [PATCH 0/2] clocksource: nomadik-mtu: support timer-based delay
From: Linus Walleij @ 2012-12-17 18:38 UTC (permalink / raw)
To: Fabio Baltieri, John Stultz, Arnd Bergmann, Thomas Gleixner,
Rafael J. Wysocki, cpufreq, linux-pm, Linus Walleij,
linux-arm-kernel, linux-kernel
In-Reply-To: <20121217113611.GA31470@balto.lan>
On Mon, Dec 17, 2012 at 12:36 PM, Fabio Baltieri
<fabio.baltieri@linaro.org> wrote:
> On Tue, Dec 04, 2012 at 11:10:43AM +0100, Fabio Baltieri wrote:
>> this implements timer-based delay support for nomadik and ux500
>> platforms, using the MTU as time source, and marks the u8500 cpufreq
>> driver as CPUFREQ_CONST_LOOPS accordingly.
>>
>> The patches are based on Arnd's arm-soc/ux500/mtu-clk branch, as that
>> contains latest MTU driver developments, including a driver move/rename,
>> but I can rebase if necessary.
>
> The patches applies cleanly on current mainline now, as all dependencies
> has already been merged, can this be considered for for merging too?
If no clocksource maintainer steps up in a week then notify me and
I'll simply stick these into the ux500 tree.
Yours,
Linus Walleij
^ permalink raw reply
* Re: turbostat tool update for Linux-3.8
From: Len Brown @ 2012-12-17 18:34 UTC (permalink / raw)
To: Betty Dall; +Cc: linux-pm, linux-kernel
In-Reply-To: <1353011874.3055.22.camel@ejdallLaptop>
On 11/15/2012 03:37 PM, Betty Dall wrote:
> On Wed, 2012-11-14 at 15:43 -0500, Len Brown wrote:
>> Here are some turbostat patches I have staged.
>> The 1st two I've requested be pulled into 3.7,
>> the rest are for 3.8
>>
>> The final patch allows turbostat to print Watts
>> as measured by hardware RAPL counters -- something
>> that people have been asking for.
>>
>> Please let me know if you see troubles with any of these patches.
>
> Hi Len,
>
> I tested out these patches on an IvyBridge system and see the new Watts
> fields. They look like reasonable numbers to me. I ran with the system
> idle and then with lookbusy -c 50 and saw the Watts increase. Is there
> anything else to do to validate the numbers? In one case I saw that the
> Pkg_W for the system was off by .01, e.g. socket 0 was 28.33 and socket
> 1 was 29.74 and the system total was 58.08 instead of 58.07. That is
> probably fine and just rounding up.
This is likely a result of printf truncation.
note that these numbers are added internally w/o that truncation,
and then their sum is printed.
If the total system power is what you're looking for, note that
you need to calibrate these numbers vs an external A/C watt meter.
eg. add a base Watts to idle -- which covers things like fans
and fixed power supply loss, then use a coefficient on the counter
to handle factors such as power conversion loss, which tend to be
somewhat linear with load.
With constant temperature, I've found this trivial curve fitting
to be remarkably accurate.
eg. on a dual SNB Xeon box I have...
45 Watts + 1.25 * RAPL-Watt-Meter = System A/C watts
within a couple %.
cheers,
-Len
^ permalink raw reply
* Re: [PATCH 2/2] cpufreq: db8500: set CPUFREQ_CONST_LOOPS
From: Stephen Boyd @ 2012-12-17 18:24 UTC (permalink / raw)
To: Fabio Baltieri
Cc: Arnd Bergmann, linux-pm, John Stultz, Linus Walleij, linux-kernel,
cpufreq, Rafael J. Wysocki, Thomas Gleixner, linux-arm-kernel
In-Reply-To: <1354615845-2758-3-git-send-email-fabio.baltieri@linaro.org>
On 12/04/12 02:10, Fabio Baltieri wrote:
> As ux500 is being converted to timer based delay loops, and the timer
> used is not depending on CPUs clock frequency, set cpufreq_driver flag
> CPUFREQ_CONST_LOOPS to prevent cpufreq rescaling loops_for_jiffies.
>
> Acked-by: Linus Walleij <linus.walleij@linaro.org>
> Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
> ---
> drivers/cpufreq/db8500-cpufreq.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/db8500-cpufreq.c b/drivers/cpufreq/db8500-cpufreq.c
> index 74b830b..e758891 100644
> --- a/drivers/cpufreq/db8500-cpufreq.c
> +++ b/drivers/cpufreq/db8500-cpufreq.c
> @@ -150,7 +150,7 @@ static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy)
> }
>
> static struct cpufreq_driver db8500_cpufreq_driver = {
> - .flags = CPUFREQ_STICKY,
> + .flags = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS,
> .verify = db8500_cpufreq_verify_speed,
> .target = db8500_cpufreq_target,
> .get = db8500_cpufreq_getspeed,
Wouldn't you want to fold this into the previous patch so bisection is
not broken? Otherwise you have a place where lpj is all screwed up when
cpufreq mucks with it.
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation
^ permalink raw reply
* Re: [PATCH 2/2] cpuidle: fix sysfs output for power_usage
From: Rafael J. Wysocki @ 2012-12-17 12:09 UTC (permalink / raw)
To: Sivaram Nair
Cc: rafael.j.wysocki@intel.com, daniel.lezcano@linaro.org,
shuox.liu@intel.com, akpm@linux-foundation.org,
yanmin_zhang@intel.com, linux-pm@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20121217084607.GN10090@sivaramn-lnx>
On Monday, December 17, 2012 10:46:07 AM Sivaram Nair wrote:
> On Mon, Dec 17, 2012 at 08:56:45AM +0100, Rafael J. Wysocki wrote:
> > On Monday, December 17, 2012 09:38:15 AM Sivaram Nair wrote:
> > > On Sat, Dec 15, 2012 at 01:03:02AM +0100, Rafael J. Wysocki wrote:
> > > > On Friday, December 14, 2012 03:17:37 PM Sivaram Nair wrote:
> > > > > cpuidle_state->power_usage is signed; so change the corresponding sysfs
> > > > > ops to output signed value instead of unsigned.
> > > >
> > > > What's actually wrong with printing it as an unsigned int?
> > >
> > > power_usage could have negative values (for example cpuidle/driver.c
> > > inits this value to -1, -2 etc. when drv->power_specified is not set) and
> > > these shows up badly in the sysfs output.
> >
> > Does "badly" mean "as big positive numbers"?
>
> Yes (sorry for not being clearer).
>
> >
> > Should we actually print them at all in those case? Perhaps it'll be better to
> > make the file appear empty then?
>
> May be, but why is power_usage signed in the first place?
Please read the comment in driver.c:set_power_states() for the answer. :-)
> I also noticed
> Daniel Lezcano's patches that reduces the scope of this variable. So,
> perhaps we can just ignore this change.
OK
Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply
* Re: [PATCH 0/2] clocksource: nomadik-mtu: support timer-based delay
From: Fabio Baltieri @ 2012-12-17 11:36 UTC (permalink / raw)
To: John Stultz, Arnd Bergmann, Thomas Gleixner
Cc: Rafael J. Wysocki, cpufreq, linux-pm, Linus Walleij,
linux-arm-kernel, linux-kernel
In-Reply-To: <1354615845-2758-1-git-send-email-fabio.baltieri@linaro.org>
Hi all,
On Tue, Dec 04, 2012 at 11:10:43AM +0100, Fabio Baltieri wrote:
> this implements timer-based delay support for nomadik and ux500
> platforms, using the MTU as time source, and marks the u8500 cpufreq
> driver as CPUFREQ_CONST_LOOPS accordingly.
>
> The patches are based on Arnd's arm-soc/ux500/mtu-clk branch, as that
> contains latest MTU driver developments, including a driver move/rename,
> but I can rebase if necessary.
The patches applies cleanly on current mainline now, as all dependencies
has already been merged, can this be considered for for merging too?
Also, the nomadik-mtu driver has been moved into drivers/clocksource so
do anybody knows if this this should end up into arm-soc or in the
clocksource tree?
Thanks,
Fabio
> Fabio Baltieri (2):
> clocksource: nomadik-mtu: support timer-based delay
> cpufreq: db8500: set CPUFREQ_CONST_LOOPS
>
> drivers/clocksource/nomadik-mtu.c | 11 +++++++++++
> drivers/cpufreq/db8500-cpufreq.c | 2 +-
> 2 files changed, 12 insertions(+), 1 deletion(-)
--
Fabio Baltieri
^ permalink raw reply
* Re: [PATCH 2/2] cpuidle: fix sysfs output for power_usage
From: Sivaram Nair @ 2012-12-17 8:46 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: rafael.j.wysocki@intel.com, daniel.lezcano@linaro.org,
shuox.liu@intel.com, akpm@linux-foundation.org,
yanmin_zhang@intel.com, linux-pm@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <4160971.sv9ruOS3QC@vostro.rjw.lan>
On Mon, Dec 17, 2012 at 08:56:45AM +0100, Rafael J. Wysocki wrote:
> On Monday, December 17, 2012 09:38:15 AM Sivaram Nair wrote:
> > On Sat, Dec 15, 2012 at 01:03:02AM +0100, Rafael J. Wysocki wrote:
> > > On Friday, December 14, 2012 03:17:37 PM Sivaram Nair wrote:
> > > > cpuidle_state->power_usage is signed; so change the corresponding sysfs
> > > > ops to output signed value instead of unsigned.
> > >
> > > What's actually wrong with printing it as an unsigned int?
> >
> > power_usage could have negative values (for example cpuidle/driver.c
> > inits this value to -1, -2 etc. when drv->power_specified is not set) and
> > these shows up badly in the sysfs output.
>
> Does "badly" mean "as big positive numbers"?
Yes (sorry for not being clearer).
>
> Should we actually print them at all in those case? Perhaps it'll be better to
> make the file appear empty then?
May be, but why is power_usage signed in the first place? I also noticed
Daniel Lezcano's patches that reduces the scope of this variable. So,
perhaps we can just ignore this change.
-Sivaram
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox