* [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Support to
@ 2011-07-05 11:38 Durgadoss R
2011-07-05 19:58 ` [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Guenter Roeck
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Durgadoss R @ 2011-07-05 11:38 UTC (permalink / raw)
To: lm-sensors
This patch adds the core and pkg support to coretemp.
These thresholds can be configured via the sysfs interfaces tempX_max
and tempX_max_hyst. An interrupt is generated when CPU temperature reaches
or crosses above tempX_max OR drops below tempX_max_hyst.
This patch is based on the documentation in IA Manual vol 3A, that can be
downloaded from here:
http://download.intel.com/design/processor/manuals/253668.pdf
Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
---
Documentation/hwmon/coretemp | 7 ++
drivers/hwmon/coretemp.c | 171 +++++++++++++++++++++++++++++++-----------
2 files changed, 133 insertions(+), 45 deletions(-)
diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp
index f85e913..fa8776a 100644
--- a/Documentation/hwmon/coretemp
+++ b/Documentation/hwmon/coretemp
@@ -35,6 +35,13 @@ the Out-Of-Spec bit. Following table summarizes the exported sysfs files:
All Sysfs entries are named with their core_id (represented here by 'X').
tempX_input - Core temperature (in millidegrees Celsius).
tempX_max - All cooling devices should be turned on (on Core2).
+ Initialized with IA32_THERM_INTERRUPT. When the CPU
+ temperature reaches this temperature, an interrupt is
+ generated and tempX_max_alarm is set.
+tempX_max_hyst - If the CPU temperature falls below than temperature,
+ an interrupt is generated and tempX_max_alarm is reset.
+tempX_max_alarm - Set if the temperature reaches or exceeds tempX_max.
+ Reset if the temperature drops to or below tempX_max_hyst.
tempX_crit - Maximum junction temperature (in millidegrees Celsius).
tempX_crit_alarm - Set when Out-of-spec bit is set, never clears.
Correct CPU operation is no longer guaranteed.
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index de3d246..4aec5f3 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -44,7 +44,9 @@
#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
#define NUM_REAL_CORES 16 /* Number of Real cores per cpu */
#define CORETEMP_NAME_LENGTH 17 /* String Length of attrs */
-#define MAX_ATTRS 5 /* Maximum no of per-core attrs */
+#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
+#define MAX_THRESH_ATTRS 3 /* Maximum no of Threshold attrs */
+#define TOTAL_ATTRS (MAX_CORE_ATTRS + MAX_THRESH_ATTRS)
#define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
#ifdef CONFIG_SMP
@@ -67,6 +69,9 @@
* This value is passed as "id" field to rdmsr/wrmsr functions.
* @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS,
* from where the temperature values should be read.
+ * @intrpt_reg: One of IA32_THERM_INTERRUPT or IA32_PACKAGE_THERM_INTERRUPT,
+ * from where the thresholds are read.
+ * @attr_size: Total number of pre-core attrs displayed in the sysfs.
* @is_pkg_data: If this is 1, the temp_data holds pkgtemp data.
* Otherwise, temp_data holds coretemp data.
* @valid: If this is 1, the current temperature is valid.
@@ -74,15 +79,18 @@
struct temp_data {
int temp;
int ttarget;
+ int tmin;
int tjmax;
unsigned long last_updated;
unsigned int cpu;
u32 cpu_core_id;
u32 status_reg;
+ u32 intrpt_reg;
+ int attr_size;
bool is_pkg_data;
bool valid;
- struct sensor_device_attribute sd_attrs[MAX_ATTRS];
- char attr_name[MAX_ATTRS][CORETEMP_NAME_LENGTH];
+ struct sensor_device_attribute sd_attrs[TOTAL_ATTRS];
+ char attr_name[TOTAL_ATTRS][CORETEMP_NAME_LENGTH];
struct mutex update_lock;
};
@@ -137,6 +145,19 @@ static ssize_t show_crit_alarm(struct device *dev,
return sprintf(buf, "%d\n", (eax >> 5) & 1);
}
+static ssize_t show_max_alarm(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ u32 eax, edx;
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+ struct platform_data *pdata = dev_get_drvdata(dev);
+ struct temp_data *tdata = pdata->core_data[attr->index];
+
+ rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
+
+ return sprintf(buf, "%d\n", !!(eax & THERM_LOG_THRESHOLD1));
+}
+
static ssize_t show_tjmax(struct device *dev,
struct device_attribute *devattr, char *buf)
{
@@ -155,6 +176,77 @@ static ssize_t show_ttarget(struct device *dev,
return sprintf(buf, "%d\n", pdata->core_data[attr->index]->ttarget);
}
+static ssize_t store_ttarget(struct device *dev,
+ struct device_attribute *devattr,
+ const char *buf, size_t count)
+{
+ struct platform_data *pdata = dev_get_drvdata(dev);
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+ struct temp_data *tdata = pdata->core_data[attr->index];
+ u32 eax, edx;
+ unsigned long val;
+ int diff;
+
+ if (strict_strtoul(buf, 10, &val))
+ return -EINVAL;
+
+ /* THERM_MASK_THRESHOLD1 is 7 bits wide. So don't accept val > 127 */
+ if (val > tdata->tjmax || val > 127)
+ return -EINVAL;
+
+ diff = (tdata->tjmax - val) / 1000;
+
+ mutex_lock(&tdata->update_lock);
+ rdmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, &eax, &edx);
+ eax = (eax & ~THERM_MASK_THRESHOLD1) |
+ (diff << THERM_SHIFT_THRESHOLD1);
+ wrmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, eax, edx);
+ tdata->ttarget = val;
+ mutex_unlock(&tdata->update_lock);
+
+ return count;
+}
+
+static ssize_t show_tmin(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+ struct platform_data *pdata = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", pdata->core_data[attr->index]->tmin);
+}
+
+static ssize_t store_tmin(struct device *dev,
+ struct device_attribute *devattr,
+ const char *buf, size_t count)
+{
+ struct platform_data *pdata = dev_get_drvdata(dev);
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+ struct temp_data *tdata = pdata->core_data[attr->index];
+ u32 eax, edx;
+ unsigned long val;
+ int diff;
+
+ if (strict_strtoul(buf, 10, &val))
+ return -EINVAL;
+
+ /* THERM_MASK_THRESHOLD0 is 7 bits wide. So don't accept val > 127 */
+ if (val > tdata->tjmax || val > 127)
+ return -EINVAL;
+
+ diff = (tdata->tjmax - val) / 1000;
+
+ mutex_lock(&tdata->update_lock);
+ rdmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, &eax, &edx);
+ eax = (eax & ~THERM_MASK_THRESHOLD0) |
+ (diff << THERM_SHIFT_THRESHOLD0);
+ wrmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, eax, edx);
+ tdata->tmin = val;
+ mutex_unlock(&tdata->update_lock);
+
+ return count;
+}
+
static ssize_t show_temp(struct device *dev,
struct device_attribute *devattr, char *buf)
{
@@ -361,23 +453,31 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
int attr_no)
{
int err, i;
- static ssize_t (*rd_ptr[MAX_ATTRS]) (struct device *dev,
+ static ssize_t (*rd_ptr[TOTAL_ATTRS]) (struct device *dev,
struct device_attribute *devattr, char *buf) = {
- show_label, show_crit_alarm, show_ttarget,
- show_temp, show_tjmax };
- static const char *names[MAX_ATTRS] = {
+ show_label, show_crit_alarm, show_temp, show_tjmax,
+ show_max_alarm, show_ttarget, show_tmin };
+ static ssize_t (*rw_ptr[TOTAL_ATTRS]) (struct device *dev,
+ struct device_attribute *devattr, const char *buf,
+ size_t count) = { NULL, NULL, NULL, NULL, NULL,
+ store_ttarget, store_tmin };
+ static const char *names[TOTAL_ATTRS] = {
"temp%d_label", "temp%d_crit_alarm",
- "temp%d_max", "temp%d_input",
- "temp%d_crit" };
+ "temp%d_input", "temp%d_crit",
+ "temp%d_max_alarm", "temp%d_max",
+ "temp%d_max_hyst" };
- for (i = 0; i < MAX_ATTRS; i++) {
+ for (i = 0; i < tdata->attr_size; i++) {
snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, names[i],
attr_no);
sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr);
tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
tdata->sd_attrs[i].dev_attr.attr.mode = S_IRUGO;
+ if (rw_ptr[i]) {
+ tdata->sd_attrs[i].dev_attr.attr.mode |= S_IWUSR;
+ tdata->sd_attrs[i].dev_attr.store = rw_ptr[i];
+ }
tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
- tdata->sd_attrs[i].dev_attr.store = NULL;
tdata->sd_attrs[i].index = attr_no;
err = device_create_file(dev, &tdata->sd_attrs[i].dev_attr);
if (err)
@@ -391,38 +491,6 @@ exit_free:
return err;
}
-static void update_ttarget(__u8 cpu_model, struct temp_data *tdata,
- struct device *dev)
-{
- int err;
- u32 eax, edx;
-
- /*
- * Initialize ttarget value. Eventually this will be
- * initialized with the value from MSR_IA32_THERM_INTERRUPT
- * register. If IA32_TEMPERATURE_TARGET is supported, this
- * value will be over written below.
- * To Do: Patch to initialize ttarget from MSR_IA32_THERM_INTERRUPT
- */
- tdata->ttarget = tdata->tjmax - 20000;
-
- /*
- * Read the still undocumented IA32_TEMPERATURE_TARGET. It exists
- * on older CPUs but not in this register,
- * Atoms don't have it either.
- */
- if (cpu_model > 0xe && cpu_model != 0x1c) {
- err = rdmsr_safe_on_cpu(tdata->cpu,
- MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
- if (err) {
- dev_warn(dev,
- "Unable to read IA32_TEMPERATURE_TARGET MSR\n");
- } else {
- tdata->ttarget = tdata->tjmax -
- ((eax >> 8) & 0xff) * 1000;
- }
- }
-}
static int __devinit chk_ucode_version(struct platform_device *pdev)
{
@@ -481,9 +549,12 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
tdata->status_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_STATUS :
MSR_IA32_THERM_STATUS;
+ tdata->intrpt_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_INTERRUPT :
+ MSR_IA32_THERM_INTERRUPT;
tdata->is_pkg_data = pkg_flag;
tdata->cpu = cpu;
tdata->cpu_core_id = TO_CORE_ID(cpu);
+ tdata->attr_size = MAX_CORE_ATTRS;
mutex_init(&tdata->update_lock);
return tdata;
}
@@ -533,7 +604,17 @@ static int create_core_data(struct platform_data *pdata,
else
tdata->tjmax = get_tjmax(c, cpu, &pdev->dev);
- update_ttarget(c->x86_model, tdata, &pdev->dev);
+ /*
+ * Test if we can access the intrpt register. If so, increase the
+ * 'size' enough to have ttarget/tmin/max_alarm interfaces.
+ * Initialize ttarget with bits 16:22 of MSR_IA32_THERM_INTERRUPT
+ */
+ err = rdmsr_safe_on_cpu(cpu, tdata->intrpt_reg, &eax, &edx);
+ if (!err) {
+ tdata->attr_size += MAX_THRESH_ATTRS;
+ tdata->ttarget = tdata->tjmax - ((eax >> 16) & 0x7f) * 1000;
+ }
+
pdata->core_data[attr_no] = tdata;
/* Create sysfs interfaces */
@@ -570,7 +651,7 @@ static void coretemp_remove_core(struct platform_data *pdata,
struct temp_data *tdata = pdata->core_data[indx];
/* Remove the sysfs attributes */
- for (i = 0; i < MAX_ATTRS; i++)
+ for (i = 0; i < tdata->attr_size; i++)
device_remove_file(dev, &tdata->sd_attrs[i].dev_attr);
kfree(pdata->core_data[indx]);
--
1.6.1
_______________________________________________
lm-sensors mailing list
lm-sensors@lm-sensors.org
http://lists.lm-sensors.org/mailman/listinfo/lm-sensors
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold
2011-07-05 11:38 [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Support to Durgadoss R
@ 2011-07-05 19:58 ` Guenter Roeck
2011-07-06 7:57 ` R, Durgadoss
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Guenter Roeck @ 2011-07-05 19:58 UTC (permalink / raw)
To: lm-sensors
Hi Durgadoss,
On Tue, Jul 05, 2011 at 01:02:14PM -0400, Durgadoss R wrote:
> This patch adds the core and pkg support to coretemp.
> These thresholds can be configured via the sysfs interfaces tempX_max
> and tempX_max_hyst. An interrupt is generated when CPU temperature reaches
> or crosses above tempX_max OR drops below tempX_max_hyst.
>
> This patch is based on the documentation in IA Manual vol 3A, that can be
> downloaded from here:
> http://download.intel.com/design/processor/manuals/253668.pdf
>
> Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
> ---
> Documentation/hwmon/coretemp | 7 ++
> drivers/hwmon/coretemp.c | 171 +++++++++++++++++++++++++++++++-----------
> 2 files changed, 133 insertions(+), 45 deletions(-)
>
> diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp
> index f85e913..fa8776a 100644
> --- a/Documentation/hwmon/coretemp
> +++ b/Documentation/hwmon/coretemp
> @@ -35,6 +35,13 @@ the Out-Of-Spec bit. Following table summarizes the exported sysfs files:
> All Sysfs entries are named with their core_id (represented here by 'X').
> tempX_input - Core temperature (in millidegrees Celsius).
> tempX_max - All cooling devices should be turned on (on Core2).
> + Initialized with IA32_THERM_INTERRUPT. When the CPU
> + temperature reaches this temperature, an interrupt is
> + generated and tempX_max_alarm is set.
> +tempX_max_hyst - If the CPU temperature falls below than temperature,
> + an interrupt is generated and tempX_max_alarm is reset.
> +tempX_max_alarm - Set if the temperature reaches or exceeds tempX_max.
> + Reset if the temperature drops to or below tempX_max_hyst.
> tempX_crit - Maximum junction temperature (in millidegrees Celsius).
> tempX_crit_alarm - Set when Out-of-spec bit is set, never clears.
> Correct CPU operation is no longer guaranteed.
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index de3d246..4aec5f3 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -44,7 +44,9 @@
> #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
> #define NUM_REAL_CORES 16 /* Number of Real cores per cpu */
> #define CORETEMP_NAME_LENGTH 17 /* String Length of attrs */
> -#define MAX_ATTRS 5 /* Maximum no of per-core attrs */
> +#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
> +#define MAX_THRESH_ATTRS 3 /* Maximum no of Threshold attrs */
> +#define TOTAL_ATTRS (MAX_CORE_ATTRS + MAX_THRESH_ATTRS)
> #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>
> #ifdef CONFIG_SMP
> @@ -67,6 +69,9 @@
> * This value is passed as "id" field to rdmsr/wrmsr functions.
> * @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS,
> * from where the temperature values should be read.
> + * @intrpt_reg: One of IA32_THERM_INTERRUPT or IA32_PACKAGE_THERM_INTERRUPT,
> + * from where the thresholds are read.
> + * @attr_size: Total number of pre-core attrs displayed in the sysfs.
> * @is_pkg_data: If this is 1, the temp_data holds pkgtemp data.
> * Otherwise, temp_data holds coretemp data.
> * @valid: If this is 1, the current temperature is valid.
> @@ -74,15 +79,18 @@
> struct temp_data {
> int temp;
> int ttarget;
> + int tmin;
> int tjmax;
> unsigned long last_updated;
> unsigned int cpu;
> u32 cpu_core_id;
> u32 status_reg;
> + u32 intrpt_reg;
> + int attr_size;
> bool is_pkg_data;
> bool valid;
> - struct sensor_device_attribute sd_attrs[MAX_ATTRS];
> - char attr_name[MAX_ATTRS][CORETEMP_NAME_LENGTH];
> + struct sensor_device_attribute sd_attrs[TOTAL_ATTRS];
> + char attr_name[TOTAL_ATTRS][CORETEMP_NAME_LENGTH];
> struct mutex update_lock;
> };
>
> @@ -137,6 +145,19 @@ static ssize_t show_crit_alarm(struct device *dev,
> return sprintf(buf, "%d\n", (eax >> 5) & 1);
> }
>
> +static ssize_t show_max_alarm(struct device *dev,
> + struct device_attribute *devattr, char *buf)
> +{
> + u32 eax, edx;
> + struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> + struct platform_data *pdata = dev_get_drvdata(dev);
> + struct temp_data *tdata = pdata->core_data[attr->index];
> +
> + rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
> +
> + return sprintf(buf, "%d\n", !!(eax & THERM_LOG_THRESHOLD1));
> +}
> +
> static ssize_t show_tjmax(struct device *dev,
> struct device_attribute *devattr, char *buf)
> {
> @@ -155,6 +176,77 @@ static ssize_t show_ttarget(struct device *dev,
> return sprintf(buf, "%d\n", pdata->core_data[attr->index]->ttarget);
> }
>
> +static ssize_t store_ttarget(struct device *dev,
> + struct device_attribute *devattr,
> + const char *buf, size_t count)
> +{
> + struct platform_data *pdata = dev_get_drvdata(dev);
> + struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> + struct temp_data *tdata = pdata->core_data[attr->index];
> + u32 eax, edx;
> + unsigned long val;
> + int diff;
> +
> + if (strict_strtoul(buf, 10, &val))
> + return -EINVAL;
> +
> + /* THERM_MASK_THRESHOLD1 is 7 bits wide. So don't accept val > 127 */
> + if (val > tdata->tjmax || val > 127)
This needs to be 127000, since val is in mill-degrees C.
> + return -EINVAL;
> +
> + diff = (tdata->tjmax - val) / 1000;
> +
> + mutex_lock(&tdata->update_lock);
> + rdmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, &eax, &edx);
> + eax = (eax & ~THERM_MASK_THRESHOLD1) |
> + (diff << THERM_SHIFT_THRESHOLD1);
> + wrmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, eax, edx);
> + tdata->ttarget = val;
> + mutex_unlock(&tdata->update_lock);
> +
> + return count;
> +}
> +
> +static ssize_t show_tmin(struct device *dev,
> + struct device_attribute *devattr, char *buf)
> +{
> + struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> + struct platform_data *pdata = dev_get_drvdata(dev);
> +
> + return sprintf(buf, "%d\n", pdata->core_data[attr->index]->tmin);
> +}
> +
> +static ssize_t store_tmin(struct device *dev,
> + struct device_attribute *devattr,
> + const char *buf, size_t count)
> +{
> + struct platform_data *pdata = dev_get_drvdata(dev);
> + struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> + struct temp_data *tdata = pdata->core_data[attr->index];
> + u32 eax, edx;
> + unsigned long val;
> + int diff;
> +
> + if (strict_strtoul(buf, 10, &val))
> + return -EINVAL;
> +
> + /* THERM_MASK_THRESHOLD0 is 7 bits wide. So don't accept val > 127 */
> + if (val > tdata->tjmax || val > 127)
Same here.
Other observations:
I tested the patch on two systems (Sandy Bridge and Xeon C5528). On both, critical
temperature and max temperatures are the same. tempX_max_hyst is 0 in both cases.
Is this as expected ?
It is possible to set tempX_max_hyst to a value >= tempX_max. Not sure if this makes sense.
I can trigger max_alarm by setting the max temperature below the current temperature.
However, I seem to be unable to reset the alarm flag; it looks like once it is set,
it stays set forever. That may be because I am triggering the alarm by reducing
the max limit, but one should assume that the flag is reset once all limits
are set to values above the current temperature. What is the expected behavior,
and how can the alarm flag be reset ?
Thanks,
Guenter
_______________________________________________
lm-sensors mailing list
lm-sensors@lm-sensors.org
http://lists.lm-sensors.org/mailman/listinfo/lm-sensors
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold
2011-07-05 11:38 [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Support to Durgadoss R
2011-07-05 19:58 ` [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Guenter Roeck
@ 2011-07-06 7:57 ` R, Durgadoss
2011-07-06 8:30 ` Jean Delvare
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: R, Durgadoss @ 2011-07-06 7:57 UTC (permalink / raw)
To: lm-sensors
Hi Guenter,
> This needs to be 127000, since val is in mill-degrees C.
Oops..I will fix this disaster :(
> I tested the patch on two systems (Sandy Bridge and Xeon C5528). On both,
> critical
> temperature and max temperatures are the same. tempX_max_hyst is 0 in both
> cases.
> Is this as expected ?
Yes. Initially, TempX_max is TjMax and TempX_max_hyst is 0.
i.e the lowest and highest temperatures. But the these tempX_max and max_hyst
can be programmed to reasonable values.
For example,
I am using an Atom based device.
Initially, max will be 90 and max_hyst will be 0.
Assume, my temp1_input is 65.
Then, I set the max to 70 and max_hyst to 60.
Then the HW will trigger an interrupt when the temperature drops below 60
Or crosses above 70. I forward it to some user space app, which takes some
actions. Thus, the whole idea of max and max_hyst is to 'do something'
so that CPU will not reach TjMax.
> It is possible to set tempX_max_hyst to a value >= tempX_max. Not sure if this
> makes sense.
Yes..The H/W will allow that. Even the manual says we can program like this,
but is not recommended. I can put a small check inside the store methods of
tempX_max and tempX_max_hyst. Is that Ok with you ?
>
> I can trigger max_alarm by setting the max temperature below the current
> temperature.
> However, I seem to be unable to reset the alarm flag; it looks like once it is
> set,
> it stays set forever. That may be because I am triggering the alarm by reducing
With the current implementation, it will stay set forever.
It's a sticky bit.
> the max limit, but one should assume that the flag is reset once all limits
> are set to values above the current temperature. What is the expected behavior,
> and how can the alarm flag be reset ?
This is how it works:
[Bits 6,7,8,9 in 0x19C THERM_STATUS MSR]
Bits 6 and 8: Threshold 1 and 2 status bits
Bits 7 and 9: Threshold 1 and 2 log bits
Assuming, T1 is lower threshold and T2 is higher threshold
The Hardware generates interrupt on four conditions:
1) temp1_input goes below T1
2) temp1_input goes above T1
3) temp1_input goes below T2
4) temp1_input goes above T2
When the temp1_input goes above Threshold 1, the corresponding
log bit(7) and the status bit(6) gets set(and an interrupt is generated).
When the temp1_input drops below Threshold 1, the status bit is reset to 0.
But the log bit is not(and an interrupt is generated again). So, We are supposed
to catch this interrupt and in the Interrupt handler 'set the log bit to 0'
manually.
And This Interrupt is forwarded to 'coretemp' from the MCheck Code(therm_throt.c)
We can catch this interrupt inside our code and change the max_alarm.
But, more than that, what do we do with the interrupt ?
Thanks,
Durga
_______________________________________________
lm-sensors mailing list
lm-sensors@lm-sensors.org
http://lists.lm-sensors.org/mailman/listinfo/lm-sensors
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold
2011-07-05 11:38 [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Support to Durgadoss R
2011-07-05 19:58 ` [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Guenter Roeck
2011-07-06 7:57 ` R, Durgadoss
@ 2011-07-06 8:30 ` Jean Delvare
2011-07-06 16:41 ` Guenter Roeck
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Jean Delvare @ 2011-07-06 8:30 UTC (permalink / raw)
To: lm-sensors
On Wed, 6 Jul 2011 13:23:52 +0530, R, Durgadoss wrote:
> Hi Guenter,
>
> > This needs to be 127000, since val is in mill-degrees C.
>
> Oops..I will fix this disaster :(
>
> > I tested the patch on two systems (Sandy Bridge and Xeon C5528). On both,
> > critical
> > temperature and max temperatures are the same. tempX_max_hyst is 0 in both
> > cases.
> > Is this as expected ?
>
> Yes. Initially, TempX_max is TjMax and TempX_max_hyst is 0.
> i.e the lowest and highest temperatures. But the these tempX_max and max_hyst
> can be programmed to reasonable values.
>
> For example,
> I am using an Atom based device.
> Initially, max will be 90 and max_hyst will be 0.
> Assume, my temp1_input is 65.
> Then, I set the max to 70 and max_hyst to 60.
> Then the HW will trigger an interrupt when the temperature drops below 60
> Or crosses above 70. I forward it to some user space app, which takes some
> actions. Thus, the whole idea of max and max_hyst is to 'do something'
> so that CPU will not reach TjMax.
>
> > It is possible to set tempX_max_hyst to a value >= tempX_max. Not sure if this
> > makes sense.
>
> Yes..The H/W will allow that. Even the manual says we can program like this,
> but is not recommended. I can put a small check inside the store methods of
> tempX_max and tempX_max_hyst. Is that Ok with you ?
Please don't. Other hwmon drivers don't have such checks, and they
would prevent users from setting the limits in the order they want,
depending on the initial values.
> > I can trigger max_alarm by setting the max temperature below the current
> > temperature.
> > However, I seem to be unable to reset the alarm flag; it looks like once it is
> > set,
> > it stays set forever. That may be because I am triggering the alarm by reducing
>
> With the current implementation, it will stay set forever.
> It's a sticky bit.
Then the driver has to clear the alarm bit after reporting it to
user-space. Hopefully it will reassert immediately when cleared if the
out-of-limit conditions still exist?
> > the max limit, but one should assume that the flag is reset once all limits
> > are set to values above the current temperature. What is the expected behavior,
> > and how can the alarm flag be reset ?
>
> This is how it works:
> [Bits 6,7,8,9 in 0x19C THERM_STATUS MSR]
> Bits 6 and 8: Threshold 1 and 2 status bits
> Bits 7 and 9: Threshold 1 and 2 log bits
> Assuming, T1 is lower threshold and T2 is higher threshold
> The Hardware generates interrupt on four conditions:
> 1) temp1_input goes below T1
> 2) temp1_input goes above T1
> 3) temp1_input goes below T2
> 4) temp1_input goes above T2
>
> When the temp1_input goes above Threshold 1, the corresponding
> log bit(7) and the status bit(6) gets set(and an interrupt is generated).
> When the temp1_input drops below Threshold 1, the status bit is reset to 0.
> But the log bit is not(and an interrupt is generated again). So, We are supposed
> to catch this interrupt and in the Interrupt handler 'set the log bit to 0'
> manually.
It is OK for the coretemp driver to report once an alarm which happened
in the past and is no longer relevant. But it is also OK to not do
that. We have drivers doing both, depending on the underlying hardware
implementation. But alarms should be cleared when read by the user, in
both cases. If the hardware does it (most frequent case) then alright.
If not, the driver should do it.
> And This Interrupt is forwarded to 'coretemp' from the MCheck Code(therm_throt.c)
Actually it's not. therm_throt indeed exports a platform_thermal_notify
function pointer, but the coretemp driver never sets it.
The whole thing looks pretty clumsy to me anyway. Firtsly, I fail to
see how this mechanism would be safe on removal of the coretemp driver.
Secondly I don't see how you'll handle multi-core setups, as
platform_thermal_notify doesn't pass the CPU as a parameter. And
lastly, this seems to delegate thermal management to the coretemp
driver, which is wrong by design, as the coretemp module might as well
not be loaded.
> We can catch this interrupt inside our code and change the max_alarm.
> But, more than that, what do we do with the interrupt ?
I don't think we want to deal with this interrupt in coretemp at all.
This is all therm_throt's job.
--
Jean Delvare
_______________________________________________
lm-sensors mailing list
lm-sensors@lm-sensors.org
http://lists.lm-sensors.org/mailman/listinfo/lm-sensors
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold
2011-07-05 11:38 [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Support to Durgadoss R
` (2 preceding siblings ...)
2011-07-06 8:30 ` Jean Delvare
@ 2011-07-06 16:41 ` Guenter Roeck
2011-07-07 11:20 ` R, Durgadoss
2011-07-07 18:06 ` Guenter Roeck
5 siblings, 0 replies; 7+ messages in thread
From: Guenter Roeck @ 2011-07-06 16:41 UTC (permalink / raw)
To: lm-sensors
Hi Jean,
On Wed, Jul 06, 2011 at 04:30:20AM -0400, Jean Delvare wrote:
> On Wed, 6 Jul 2011 13:23:52 +0530, R, Durgadoss wrote:
> > Hi Guenter,
> >
> > > This needs to be 127000, since val is in mill-degrees C.
> >
> > Oops..I will fix this disaster :(
> >
> > > I tested the patch on two systems (Sandy Bridge and Xeon C5528). On both,
> > > critical
> > > temperature and max temperatures are the same. tempX_max_hyst is 0 in both
> > > cases.
> > > Is this as expected ?
> >
> > Yes. Initially, TempX_max is TjMax and TempX_max_hyst is 0.
> > i.e the lowest and highest temperatures. But the these tempX_max and max_hyst
> > can be programmed to reasonable values.
> >
> > For example,
> > I am using an Atom based device.
> > Initially, max will be 90 and max_hyst will be 0.
> > Assume, my temp1_input is 65.
> > Then, I set the max to 70 and max_hyst to 60.
> > Then the HW will trigger an interrupt when the temperature drops below 60
> > Or crosses above 70. I forward it to some user space app, which takes some
> > actions. Thus, the whole idea of max and max_hyst is to 'do something'
> > so that CPU will not reach TjMax.
> >
> > > It is possible to set tempX_max_hyst to a value >= tempX_max. Not sure if this
> > > makes sense.
> >
> > Yes..The H/W will allow that. Even the manual says we can program like this,
> > but is not recommended. I can put a small check inside the store methods of
> > tempX_max and tempX_max_hyst. Is that Ok with you ?
>
> Please don't. Other hwmon drivers don't have such checks, and they
> would prevent users from setting the limits in the order they want,
> depending on the initial values.
>
On the other side, hysteresis is typically or at least often handled as offset,
meaning it is adjusted automatically if the associated limit changes. So I would
have thought it to be ok to validate that max_hyst < max, as long as max_hyst is
adjusted automatically if max is changed.
> > > I can trigger max_alarm by setting the max temperature below the current
> > > temperature.
> > > However, I seem to be unable to reset the alarm flag; it looks like once it is
> > > set,
> > > it stays set forever. That may be because I am triggering the alarm by reducing
> >
> > With the current implementation, it will stay set forever.
> > It's a sticky bit.
>
> Then the driver has to clear the alarm bit after reporting it to
> user-space. Hopefully it will reassert immediately when cleared if the
> out-of-limit conditions still exist?
>
> > > the max limit, but one should assume that the flag is reset once all limits
> > > are set to values above the current temperature. What is the expected behavior,
> > > and how can the alarm flag be reset ?
> >
> > This is how it works:
> > [Bits 6,7,8,9 in 0x19C THERM_STATUS MSR]
> > Bits 6 and 8: Threshold 1 and 2 status bits
> > Bits 7 and 9: Threshold 1 and 2 log bits
> > Assuming, T1 is lower threshold and T2 is higher threshold
> > The Hardware generates interrupt on four conditions:
> > 1) temp1_input goes below T1
> > 2) temp1_input goes above T1
> > 3) temp1_input goes below T2
> > 4) temp1_input goes above T2
> >
> > When the temp1_input goes above Threshold 1, the corresponding
> > log bit(7) and the status bit(6) gets set(and an interrupt is generated).
> > When the temp1_input drops below Threshold 1, the status bit is reset to 0.
> > But the log bit is not(and an interrupt is generated again). So, We are supposed
> > to catch this interrupt and in the Interrupt handler 'set the log bit to 0'
> > manually.
>
> It is OK for the coretemp driver to report once an alarm which happened
> in the past and is no longer relevant. But it is also OK to not do
> that. We have drivers doing both, depending on the underlying hardware
> implementation. But alarms should be cleared when read by the user, in
> both cases. If the hardware does it (most frequent case) then alright.
> If not, the driver should do it.
>
Definitely. Wonder if it might be better to use bit 6 instead of bit 7.
> > And This Interrupt is forwarded to 'coretemp' from the MCheck Code(therm_throt.c)
>
> Actually it's not. therm_throt indeed exports a platform_thermal_notify
> function pointer, but the coretemp driver never sets it.
>
> The whole thing looks pretty clumsy to me anyway. Firtsly, I fail to
> see how this mechanism would be safe on removal of the coretemp driver.
> Secondly I don't see how you'll handle multi-core setups, as
> platform_thermal_notify doesn't pass the CPU as a parameter. And
> lastly, this seems to delegate thermal management to the coretemp
> driver, which is wrong by design, as the coretemp module might as well
> not be loaded.
>
Yes, I start wondering about this as well. Does this functionality belong into
coretemp to start with, or should it be implemented in the thermal driver instead ?
> > We can catch this interrupt inside our code and change the max_alarm.
> > But, more than that, what do we do with the interrupt ?
>
> I don't think we want to deal with this interrupt in coretemp at all.
> This is all therm_throt's job.
>
Agreed.
Thanks,
Guenter
_______________________________________________
lm-sensors mailing list
lm-sensors@lm-sensors.org
http://lists.lm-sensors.org/mailman/listinfo/lm-sensors
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold
2011-07-05 11:38 [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Support to Durgadoss R
` (3 preceding siblings ...)
2011-07-06 16:41 ` Guenter Roeck
@ 2011-07-07 11:20 ` R, Durgadoss
2011-07-07 18:06 ` Guenter Roeck
5 siblings, 0 replies; 7+ messages in thread
From: R, Durgadoss @ 2011-07-07 11:20 UTC (permalink / raw)
To: lm-sensors
Hi Guenter/Jean,
> > I don't think we want to deal with this interrupt in coretemp at all.
> > This is all therm_throt's job.
> >
> Agreed.
From the earlier mails, what I clearly get is that we don't want 'coretemp'
to deal with the Threshold Violation Interrupts.
In this case, I would submit the next version of the patch doing:
1) Update the max_alarm by reading 0x19C and checking the status bits
2) Since I see different opinions on max and max_hyst comparisons,
Should I do a check inside the store methods ?
Thanks,
Durga
_______________________________________________
lm-sensors mailing list
lm-sensors@lm-sensors.org
http://lists.lm-sensors.org/mailman/listinfo/lm-sensors
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold
2011-07-05 11:38 [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Support to Durgadoss R
` (4 preceding siblings ...)
2011-07-07 11:20 ` R, Durgadoss
@ 2011-07-07 18:06 ` Guenter Roeck
5 siblings, 0 replies; 7+ messages in thread
From: Guenter Roeck @ 2011-07-07 18:06 UTC (permalink / raw)
To: lm-sensors
On Thu, 2011-07-07 at 07:08 -0400, R, Durgadoss wrote:
> Hi Guenter/Jean,
>
> > > I don't think we want to deal with this interrupt in coretemp at all.
> > > This is all therm_throt's job.
> > >
> > Agreed.
>
> From the earlier mails, what I clearly get is that we don't want 'coretemp'
> to deal with the Threshold Violation Interrupts.
> In this case, I would submit the next version of the patch doing:
> 1) Update the max_alarm by reading 0x19C and checking the status bits
> 2) Since I see different opinions on max and max_hyst comparisons,
> Should I do a check inside the store methods ?
>
No agreement -> do nothing, ie no check.
Thanks,
Guenter
_______________________________________________
lm-sensors mailing list
lm-sensors@lm-sensors.org
http://lists.lm-sensors.org/mailman/listinfo/lm-sensors
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2011-07-07 18:06 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-07-05 11:38 [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Support to Durgadoss R
2011-07-05 19:58 ` [lm-sensors] [PATCHv3 1/1] Hwmon: Add core_pkg Threshold Guenter Roeck
2011-07-06 7:57 ` R, Durgadoss
2011-07-06 8:30 ` Jean Delvare
2011-07-06 16:41 ` Guenter Roeck
2011-07-07 11:20 ` R, Durgadoss
2011-07-07 18:06 ` Guenter Roeck
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.