[PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons.

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons.
@ 2006-09-15  0:42 Dima Zavin
  2006-09-15  6:59 ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: Dima Zavin @ 2006-09-15  0:42 UTC (permalink / raw)
  To: linux-kernel; +Cc: ak, akpm, zwane

- Maintain a count of thermal throttle events and export them to /sys/devices/system/cpu/cpuX/thermal_throttle/count
- Factor out the thermal throttle event handling (i.e. logging) to a separate file, therm_throt.c under
  i386/kernel/cpu/mcheck.
  - The interrupt handlers call into these functions, so that i386 and x86_64 behaviour is the same, and there is no
    code duplication (except for registering the irq, and actually handling it. It's probably better to keep that
    separate even though it is, in fact, almost identical)
  - I will see if it's plausible to refactor that as well, but for a later patch.
- These changes make it easy to add thermal throttling detection for other x86 vendors (coming in a later patch)


Signed-off-by: Dmitriy Zavin <dmitriyz@google.com>

---
 arch/i386/kernel/cpu/mcheck/Makefile      |    2 
 arch/i386/kernel/cpu/mcheck/p4.c          |   25 +--
 arch/i386/kernel/cpu/mcheck/therm_throt.c |  239 +++++++++++++++++++++++++++++
 arch/x86_64/kernel/Makefile               |    4 
 arch/x86_64/kernel/mce.c                  |   50 ++++++
 arch/x86_64/kernel/mce_intel.c            |   34 ++--
 include/asm-i386/therm_throt.h            |   17 ++
 include/asm-x86_64/mce.h                  |    4 
 include/asm-x86_64/therm_throt.h          |    1 
 9 files changed, 338 insertions(+), 38 deletions(-)

diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile
index 30808f3..f1ebe1c 100644
--- a/arch/i386/kernel/cpu/mcheck/Makefile
+++ b/arch/i386/kernel/cpu/mcheck/Makefile
@@ -1,2 +1,2 @@
-obj-y	=	mce.o k7.o p4.o p5.o p6.o winchip.o
+obj-y	=	mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
 obj-$(CONFIG_X86_MCE_NONFATAL)	+=	non-fatal.o
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index b95f1b3..4a64bb9 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -13,6 +13,8 @@ #include <asm/system.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
 
+#include <asm/therm_throt.h>
+
 #include "mce.h"
 
 /* as supported by the P4/Xeon family */
@@ -45,24 +47,14 @@ static void unexpected_thermal_interrupt
 static void intel_thermal_interrupt(struct pt_regs *regs)
 {
 	u32 l, h;
-	unsigned int cpu = smp_processor_id();
-	static unsigned long next[NR_CPUS];
 
 	ack_APIC_irq();
 
-	if (time_after(next[cpu], jiffies))
-		return;
-
-	next[cpu] = jiffies + HZ*5;
 	rdmsr(MSR_IA32_THERM_STATUS, l, h);
-	if (l & 0x1) {
-		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
-		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-				cpu);
-		add_taint(TAINT_MACHINE_CHECK);
-	} else {
-		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
-	}
+	if (l & 0x1)
+		thermal_throttle_inc_count();
+
+	thermal_throttle_process_event(NULL, (l & 0x1));
 }
 
 /* Thermal interrupt handler for this CPU setup */
@@ -122,7 +114,10 @@ static void intel_init_thermal(struct cp
 	
 	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
 	wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
-	
+
+	/* init thermal throttle counting before re-enabling the interrupt */
+	thermal_throttle_count_init(cpu);
+
 	l = apic_read (APIC_LVTTHMR);
 	apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 	printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
new file mode 100644
index 0000000..fd3993a
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -0,0 +1,239 @@
+/*
+ * Thermal throttle event support code.
+ * It creates /sys variables for keeping track
+ * of events, keeps an ongoing count of events, logs to mce_log (if available).
+ *
+ * Author: Dmitriy Zavin (dmitriyz@google.com)
+ *
+ * Credits:
+ *      The mce_log stuff adapted from mce_intel.c (Zwane Mwaitambo)
+ *      The sysfs stuff blatantly copied/pasted from base/topology.c
+ *      Inspired by rossb/alb
+ */
+
+#include <linux/percpu.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <asm/cpu.h>
+#include <linux/notifier.h>
+#include <asm/therm_throt.h>
+
+/* How long to wait between reporting thermal events */
+#define CHECK_INTERVAL              (300 * HZ)
+
+static DEFINE_PER_CPU(atomic_t, tt_report_flag);
+static DEFINE_PER_CPU(struct timer_list, tt_report_timer);
+
+static DEFINE_PER_CPU(unsigned long, last_time);
+static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
+static DEFINE_PER_CPU(unsigned long, last_count);
+static DEFINE_PER_CPU(atomic_t, tt_enabled);
+
+#define define_one_ro(_name)                                                 \
+        static SYSDEV_ATTR(_name, 0444, show_##_name, NULL)
+
+#define define_show_func(name)                                               \
+static ssize_t show_##name(struct sys_device *dev, char *buf)                \
+{                                                                            \
+	unsigned int cpu = dev->id;                                          \
+	ssize_t ret;                                                         \
+                                                                             \
+	preempt_disable();              /* CPU hotplug */                    \
+	if (cpu_online(cpu))                                                 \
+		ret = sprintf(buf, "%lu\n",                                  \
+			      per_cpu(thermal_throttle_##name, cpu));        \
+	else                                                                 \
+		ret = 0;                                                     \
+	preempt_enable();                                                    \
+                                                                             \
+	return ret;                                                          \
+}
+
+define_show_func(count);
+define_one_ro(count);
+
+static struct attribute *thermal_throttle_attrs[] = {
+	&attr_count.attr,
+	NULL
+};
+
+static struct attribute_group thermal_throttle_attr_group = {
+	.attrs = thermal_throttle_attrs,
+	.name = "thermal_throttle"
+};
+
+/* tt_report_timer_tick -
+ *    This timer was started by the interrupt to prevent spamming the logs.
+ *    When it fires, we clear the flag to allow another log update.
+ */
+static void tt_report_timer_tick(unsigned long cpu)
+{
+	atomic_set(&per_cpu(tt_report_flag, cpu), 0);
+}
+
+/* thermal_throttle_count_init -
+ *    Initialize all the internal state variables. The inits to 0 are not
+ *    that important since the BSS is always 0 in kernel.
+ *
+ *    This function should be called ONLY if thermal throttle detection was
+ *    enabled on this 'cpu'.
+ */
+void __cpuinit thermal_throttle_count_init(unsigned int cpu)
+{
+	per_cpu(last_time, cpu)  = INITIAL_JIFFIES;
+	per_cpu(thermal_throttle_count, cpu) = 0;
+	per_cpu(last_count, cpu) = 0;
+
+	/* initialize the check-flag-timer */
+	init_timer(&per_cpu(tt_report_timer, cpu));
+	per_cpu(tt_report_timer, cpu).function = tt_report_timer_tick;
+	per_cpu(tt_report_timer, cpu).data = cpu;
+
+	/* mark us as enabled */
+	atomic_set(&per_cpu(tt_enabled, cpu), 1);
+	return;
+}
+
+/* thermal_throttle_inc_count -
+ *    This function is normally called by the thermal interrupt if
+ *    the code determines that we just entered into a thermal event.
+ */
+void thermal_throttle_inc_count(void)
+{
+	__get_cpu_var(thermal_throttle_count)++;
+}
+
+/* thermal_throttle_process_event -
+ *    This function is normally called by the thermal interrupt,
+ *    after the thermal event has been counted by calling
+ *    thermal_throttle_inc_count().
+ *
+ *    evinfo is a pointer to astructure where event info will be filled in.
+ *    Currently that's just the cpu#, time since last logged event, and
+ *    number of skipped (silently counted) events since last logged
+ *    event.
+ *
+ *    Since the thermal interrupt normally gets called both when the thermal
+ *    event begins and once the event has ended, the caller passes in a
+ *    boolean 'curr' to specify whether or not it is currently IN the
+ *    thermal event.
+ *
+ *  Returns: 0 : Event should NOT be further logged, i.e. still in
+ *               "timeout" from previous log message.
+ *           1 : Event should be logged, and a message has been printed.
+ */
+int thermal_throttle_process_event(struct therm_throt_event_info *evinfo,
+				   int curr)
+{
+	unsigned int cpu = smp_processor_id();
+	__u64 intvl;
+	__u64 intvl_cnt;
+
+	/* Prevent flooding the logs with thermal entries */
+	if (atomic_read(&__get_cpu_var(tt_report_flag)))
+		return 0;
+
+	/* set the flag, and add the timer that will clear it */
+	atomic_set(&__get_cpu_var(tt_report_flag), 1);
+	__get_cpu_var(tt_report_timer).expires = jiffies + CHECK_INTERVAL;
+	add_timer_on(&__get_cpu_var(tt_report_timer), cpu);
+
+	/* get the interval */
+	intvl = (jiffies - __get_cpu_var(last_time)) / HZ;
+
+	/* get the count for the interval */
+	intvl_cnt = __get_cpu_var(thermal_throttle_count) -
+		    __get_cpu_var(last_count);
+
+	/* check > 1 because the current event should not be used for
+	 * historical report */
+	if (intvl_cnt > 1) {
+		printk(KERN_CRIT "CPU%d: Temperature exceeded threshold %llu "
+		       "time(s) over last %llu second(s)\n", cpu, intvl_cnt,
+		       intvl);
+	}
+
+	/* if we just entered the thermal event */
+	if (curr) {
+		printk(KERN_CRIT "CPU%d: Temperature above threshold, "
+		       "cpu clock throttled\n", cpu);
+		add_taint(TAINT_MACHINE_CHECK);
+	} else {
+		printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
+	}
+
+	/* update the values for next time */
+	__get_cpu_var(last_count) = __get_cpu_var(thermal_throttle_count);
+	__get_cpu_var(last_time)  = jiffies;
+
+	/* fill in the info structure */
+	if (evinfo) {
+		evinfo->interval = intvl;
+		evinfo->interval_count = intvl_cnt;
+		evinfo->cpu = cpu;
+	}
+
+	return 1;
+}
+
+/* Add/Remove thermal_throttle interface for CPU device */
+static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev)
+{
+	sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev)
+{
+	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+	return 0;
+}
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
+						   unsigned long action,
+						   void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+		case CPU_ONLINE:
+			thermal_throttle_add_dev(sys_dev);
+			break;
+		case CPU_DEAD:
+			thermal_throttle_remove_dev(sys_dev);
+			break;
+	}ss
+	return NOTIFY_OK;
+}
+
+static struct notifier_block thermal_throttle_cpu_notifier =
+{
+	.notifier_call = thermal_throttle_cpu_callback,
+};
+#endif
+
+static __init int thermal_throttle_init_device(void)
+{
+	unsigned int cpu = 0;
+
+	/* connect to sysfs */
+	for_each_online_cpu(cpu) {
+		preempt_disable();
+		if (cpu_online(cpu)) {
+			if (atomic_read(&per_cpu(tt_enabled, cpu)))
+				thermal_throttle_add_dev(get_cpu_sysdev(cpu));
+		}
+		preempt_enable();
+	}
+
+#ifdef CONFIG_HOTPLUG_CPU
+	register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
+#endif
+	return 0;
+}
+
+device_initcall(thermal_throttle_init_device);
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index b5aaeaf..d97cec0 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -11,7 +11,7 @@ obj-y	:= process.o signal.o entry.o trap
 		pci-dma.o pci-nommu.o alternative.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-obj-$(CONFIG_X86_MCE)         += mce.o
+obj-$(CONFIG_X86_MCE)		+= mce.o therm_throt.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd.o
 obj-$(CONFIG_MTRR)		+= ../../i386/kernel/cpu/mtrr/
@@ -45,6 +45,7 @@ obj-y				+= intel_cacheinfo.o
 
 CFLAGS_vsyscall.o		:= $(PROFILING) -g0
 
+therm_throt-y                   += ../../i386/kernel/cpu/mcheck/therm_throt.o
 bootflag-y			+= ../../i386/kernel/bootflag.o
 cpuid-$(subst m,y,$(CONFIG_X86_CPUID))  += ../../i386/kernel/cpuid.o
 topology-y                     += ../../i386/kernel/topology.o
@@ -54,4 +55,3 @@ quirks-y			+= ../../i386/kernel/quirks.o
 i8237-y				+= ../../i386/kernel/i8237.o
 msr-$(subst m,y,$(CONFIG_X86_MSR))  += ../../i386/kernel/msr.o
 alternative-y			+= ../../i386/kernel/alternative.o
-
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 4e017fb..d331c85 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -22,6 +22,7 @@ #include <linux/ctype.h>
 #include <asm/processor.h> 
 #include <asm/msr.h>
 #include <asm/mce.h>
+#include <asm/therm_throt.h>
 #include <asm/kdebug.h>
 #include <asm/uaccess.h>
 #include <asm/smp.h>
@@ -500,6 +501,55 @@ static struct miscdevice mce_log_device 
 	&mce_chrdev_ops,
 };
 
+/* mce_log_thermal_throttle_event -
+ *     Add an mce_log entry for the thermal throttle event 'evinfo'. 'status'
+ *     should be whatever the user wants to put into the mce.status field,
+ *     and historically has been the register value of the
+ *     MSR_IA32_THERMAL_STATUS.
+ *
+ *     This will typically be called from a thermal throttle interrupt
+ *     if the event should be logged.
+ */
+void mce_log_thermal_throttle_event(__u64 status,
+				    struct therm_throt_event_info *evinfo)
+{
+	struct mce m;
+
+	memset(&m, 0, sizeof(m));
+	m.cpu = evinfo->cpu;
+	m.bank = MCE_THERMAL_BANK;
+	m.status = status;
+	rdtscll(m.tsc);
+
+	/* save the delta of the throttle count to let the consumer of this
+	 * information know how many throttle instances have been seen.
+	 *
+	 * 'misc' is organized below as follows:
+	 * bits 15:0  - Contains a capped value of thermal events for last
+	 *              interval.
+	 * bits 31:16 - Time in seconds to qualify the interval
+	 *              (capped at 0xffff)
+	 *
+	 * We cap events count at 16 bits to leave room in 'misc' for future
+	 * extensions to this report */
+
+	/* cap values in m.misc for mce_log purposes */
+
+	if (evinfo->interval > 0xffff)
+		m.misc = 0xffff;
+	else
+		m.misc = evinfo->interval;
+	m.misc <<= 16;
+
+	if (unlikely(evinfo->interval_count > 0xffff))
+		m.misc |= 0xffff;
+	else
+		m.misc |= evinfo->interval_count;
+
+	mce_log(&m);
+	return;
+}
+
 /* 
  * Old style boot options parsing. Only for compatibility. 
  */
diff --git a/arch/x86_64/kernel/mce_intel.c b/arch/x86_64/kernel/mce_intel.c
index 8f533d2..d658f42 100644
--- a/arch/x86_64/kernel/mce_intel.c
+++ b/arch/x86_64/kernel/mce_intel.c
@@ -11,36 +11,27 @@ #include <asm/msr.h>
 #include <asm/mce.h>
 #include <asm/hw_irq.h>
 #include <asm/idle.h>
-
-static DEFINE_PER_CPU(unsigned long, next_check);
+#include <asm/therm_throt.h>
 
 asmlinkage void smp_thermal_interrupt(void)
 {
-	struct mce m;
+	__u64 msr_val;
+	struct therm_throt_event_info event;
 
 	ack_APIC_irq();
 
 	exit_idle();
 	irq_enter();
-	if (time_before(jiffies, __get_cpu_var(next_check)))
-		goto done;
-
-	__get_cpu_var(next_check) = jiffies + HZ*300;
-	memset(&m, 0, sizeof(m));
-	m.cpu = smp_processor_id();
-	m.bank = MCE_THERMAL_BANK;
-	rdtscll(m.tsc);
-	rdmsrl(MSR_IA32_THERM_STATUS, m.status);
-	if (m.status & 0x1) {
-		printk(KERN_EMERG
-			"CPU%d: Temperature above threshold, cpu clock throttled\n", m.cpu);
-		add_taint(TAINT_MACHINE_CHECK);
-	} else {
-		printk(KERN_EMERG "CPU%d: Temperature/speed normal\n", m.cpu);
+
+	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+	if (msr_val & 0x1)
+		thermal_throttle_inc_count();
+
+	/* process the event, and log to mce_log if necessary */
+	if (thermal_throttle_process_event(&event, (msr_val & 1))) {
+		mce_log_thermal_throttle_event(msr_val, &event);
 	}
 
-	mce_log(&m);
-done:
 	irq_exit();
 }
 
@@ -88,6 +79,9 @@ static void __cpuinit intel_init_thermal
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
 
+	/* init thermal throttle counting before re-enabling the interrupt */
+	thermal_throttle_count_init(cpu);
+
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
diff --git a/include/asm-i386/therm_throt.h b/include/asm-i386/therm_throt.h
new file mode 100644
index 0000000..228122c
--- /dev/null
+++ b/include/asm-i386/therm_throt.h
@@ -0,0 +1,17 @@
+#ifndef __ASM_I386_THERM_THROT_H__
+#define __ASM_I386_THERM_THROT_H__ 1
+
+#include <asm/types.h>
+
+struct therm_throt_event_info {
+	__u64 interval;
+	__u64 interval_count;
+	unsigned int cpu;
+};
+
+void __cpuinit thermal_throttle_count_init(unsigned int cpu);
+void thermal_throttle_inc_count(void);
+int thermal_throttle_process_event(struct therm_throt_event_info *evinfo,
+				   int curr);
+
+#endif /* __ASM_I386_THERM_THROT_H__ */
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h
index d13687d..0ba1b4d 100644
--- a/include/asm-x86_64/mce.h
+++ b/include/asm-x86_64/mce.h
@@ -3,6 +3,7 @@ #define _ASM_MCE_H 1
 
 #include <asm/ioctls.h>
 #include <asm/types.h>
+#include <asm/therm_throt.h>
 
 /* 
  * Machine Check support for x86
@@ -99,6 +100,9 @@ static inline void mce_amd_feature_init(
 }
 #endif
 
+void mce_log_thermal_throttle_event(__u64 status,
+				    struct therm_throt_event_info *evinfo);
+
 extern atomic_t mce_entry;
 
 #endif
diff --git a/include/asm-x86_64/therm_throt.h b/include/asm-x86_64/therm_throt.h
new file mode 100644
index 0000000..5aac059
--- /dev/null
+++ b/include/asm-x86_64/therm_throt.h
@@ -0,0 +1 @@
+#include <asm-i386/therm_throt.h>
-- 
1.4.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons.
  2006-09-15  0:42 [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons Dima Zavin
@ 2006-09-15  6:59 ` Andi Kleen
  2006-09-15 20:33   ` Dmitriy Zavin
  0 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2006-09-15  6:59 UTC (permalink / raw)
  To: Dima Zavin; +Cc: linux-kernel, akpm, zwane

On Friday 15 September 2006 02:42, Dima Zavin wrote:
> - Maintain a count of thermal throttle events and export them to
> /sys/devices/system/cpu/cpuX/thermal_throttle/count - Factor out the
> thermal throttle event handling (i.e. logging) to a separate file,
> therm_throt.c under i386/kernel/cpu/mcheck.

Please make a separate patch to do any refactoring. Otherwise
nobody can evaluate your changes. Then any other changes on top of it.


> +static DEFINE_PER_CPU(atomic_t, tt_report_flag);
> +static DEFINE_PER_CPU(struct timer_list, tt_report_timer);
> +
> +static DEFINE_PER_CPU(unsigned long, last_time);
> +static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
> +static DEFINE_PER_CPU(unsigned long, last_count);
> +static DEFINE_PER_CPU(atomic_t, tt_enabled);

Do we really need all of this per CPU? I think global variables would
be just fine.

> +#define define_one_ro(_name)                                              
>   \ +        static SYSDEV_ATTR(_name, 0444, show_##_name, NULL)
> +
> +#define define_show_func(name)                                            

The names are too generic

> +/* thermal_throttle_count_init -
> + *    Initialize all the internal state variables. The inits to 0 are not
> + *    that important since the BSS is always 0 in kernel.
> + *
> + *    This function should be called ONLY if thermal throttle detection
> was + *    enabled on this 'cpu'.

I don't think your comments are actually correct kerneldoc, so the scripts
parsing it would break.

> + */
> +void __cpuinit thermal_throttle_count_init(unsigned int cpu)
> +{
> +	per_cpu(last_time, cpu)  = INITIAL_JIFFIES;
> +	per_cpu(thermal_throttle_count, cpu) = 0;
> +	per_cpu(last_count, cpu) = 0;

Most of this can be just a normal initializer for the variable

> +	/* initialize the check-flag-timer */
> +	init_timer(&per_cpu(tt_report_timer, cpu));
> +	per_cpu(tt_report_timer, cpu).function = tt_report_timer_tick;
> +	per_cpu(tt_report_timer, cpu).data = cpu;
> +
> +	/* mark us as enabled */
> +	atomic_set(&per_cpu(tt_enabled, cpu), 1);
> +	return;

Useless return;

> +}
> +
> +/* thermal_throttle_inc_count -
> + *    This function is normally called by the thermal interrupt if
> + *    the code determines that we just entered into a thermal event.
> + */
> +void thermal_throttle_inc_count(void)
> +{
> +	__get_cpu_var(thermal_throttle_count)++;
> +}

Can you please just expand that in the caller. It looks like overabstraction.

> +
> +/* thermal_throttle_process_event -
> + *    This function is normally called by the thermal interrupt,
> + *    after the thermal event has been counted by calling
> + *    thermal_throttle_inc_count().
> + *
> + *    evinfo is a pointer to astructure where event info will be filled
> in. + *    Currently that's just the cpu#, time since last logged event,
> and + *    number of skipped (silently counted) events since last logged +
> *    event.
> + *
> + *    Since the thermal interrupt normally gets called both when the
> thermal + *    event begins and once the event has ended, the caller passes
> in a + *    boolean 'curr' to specify whether or not it is currently IN the
> + *    thermal event.
> + *
> + *  Returns: 0 : Event should NOT be further logged, i.e. still in
> + *               "timeout" from previous log message.
> + *           1 : Event should be logged, and a message has been printed.
> + */
> +int thermal_throttle_process_event(struct therm_throt_event_info *evinfo,
> +				   int curr)
> +{
> +	unsigned int cpu = smp_processor_id();
> +	__u64 intvl;
> +	__u64 intvl_cnt;
> +
> +	/* Prevent flooding the logs with thermal entries */
> +	if (atomic_read(&__get_cpu_var(tt_report_flag)))
> +		return 0;

Isn't the test the wrong way around?

> +
> +	/* set the flag, and add the timer that will clear it */
> +	atomic_set(&__get_cpu_var(tt_report_flag), 1);
> +	__get_cpu_var(tt_report_timer).expires = jiffies + CHECK_INTERVAL;
> +	add_timer_on(&__get_cpu_var(tt_report_timer), cpu);

I'm not sure what you need the timer for. Can't just just drive
that from the interrupt handlers and check time stamps to do 
rate limiting? Then another timer wouldn't be needed.

> +	/* get the interval */
> +	intvl = (jiffies - __get_cpu_var(last_time)) / HZ;

Are you sure this handles jiffies wrapping correctly?

> +#ifdef CONFIG_HOTPLUG_CPU
> +static __cpuinit int thermal_throttle_remove_dev(struct sys_device *
> sys_dev) +{
> +	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
> +	return 0;
> +}

You don't remove the timer?

> +/* Get notified when a cpu comes on/off. Be hotplug friendly. */

And don't initialize it? Better get rid of it anyways.

> +static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block
> *nfb, +						   unsigned long action,
> +						   void *hcpu)
> +{
> +	unsigned int cpu = (unsigned long)hcpu;
> +	struct sys_device *sys_dev;
> +
> +	sys_dev = get_cpu_sysdev(cpu);
> +	switch (action) {
> +		case CPU_ONLINE:
> +			thermal_throttle_add_dev(sys_dev);
> +			break;
> +		case CPU_DEAD:
> +			thermal_throttle_remove_dev(sys_dev);
> +			break;
> +	}ss
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block thermal_throttle_cpu_notifier =
> +{
> +	.notifier_call = thermal_throttle_cpu_callback,
> +};
> +#endif
> +
> +static __init int thermal_throttle_init_device(void)
> +{
> +	unsigned int cpu = 0;
> +
> +	/* connect to sysfs */
> +	for_each_online_cpu(cpu) {
> +		preempt_disable();
> +		if (cpu_online(cpu)) {

If you put the preempt_disable around the loop you don't need a second
check and you even fix a race with new CPUs.

> +/* mce_log_thermal_throttle_event -
> + *     Add an mce_log entry for the thermal throttle event 'evinfo'.
> 'status' + *     should be whatever the user wants to put into the
> mce.status field, + *     and historically has been the register value of
> the
> + *     MSR_IA32_THERMAL_STATUS.
> + *
> + *     This will typically be called from a thermal throttle interrupt
> + *     if the event should be logged.
> + */
> +void mce_log_thermal_throttle_event(__u64 status,
> +				    struct therm_throt_event_info *evinfo)
> +{

Instead of having this evinfo structure you could just directly
fill in struct mces in the caller.

> +	struct mce m;
> +
> +	memset(&m, 0, sizeof(m));
> +	m.cpu = evinfo->cpu;
> +	m.bank = MCE_THERMAL_BANK;

So how should user space distingush that event from other thermal events? 

> +	m.status = status;
> +	rdtscll(m.tsc);
> +
> +	/* save the delta of the throttle count to let the consumer of this
> +	 * information know how many throttle instances have been seen.
> +	 *
> +	 * 'misc' is organized below as follows:
> +	 * bits 15:0  - Contains a capped value of thermal events for last
> +	 *              interval.
> +	 * bits 31:16 - Time in seconds to qualify the interval
> +	 *              (capped at 0xffff)
> +	 *
> +	 * We cap events count at 16 bits to leave room in 'misc' for future
> +	 * extensions to this report */

I trust you will be also submitting a mcelog patch to decode all this? 

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons.
  2006-09-15  6:59 ` Andi Kleen
@ 2006-09-15 20:33   ` Dmitriy Zavin
  2006-09-16  4:34     ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitriy Zavin @ 2006-09-15 20:33 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, akpm, zwane

Andi,

>
> Please make a separate patch to do any refactoring. Otherwise
> nobody can evaluate your changes. Then any other changes on top of it.

Will do.

> > +static DEFINE_PER_CPU(atomic_t, tt_report_flag);
> > +static DEFINE_PER_CPU(struct timer_list, tt_report_timer);
> > +
> > +static DEFINE_PER_CPU(unsigned long, last_time);
> > +static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
> > +static DEFINE_PER_CPU(unsigned long, last_count);
> > +static DEFINE_PER_CPU(atomic_t, tt_enabled);
>
> Do we really need all of this per CPU? I think global variables would
> be just fine.

Each cpu can throttle at different times and have different counts, so
that is necessary on a per cpu basis. Maybe the enable can be shared,
since it is highly unlikely that one cpu will have thermal throttle
enabled and others not. But there can be BIOS bugs where it might
register it's own SMIs for some cpu's and not others.

> > +/* thermal_throttle_count_init -
> > + *    Initialize all the internal state variables. The inits to 0 are not
> > + *    that important since the BSS is always 0 in kernel.
> > + *
> > + *    This function should be called ONLY if thermal throttle detection
> > was + *    enabled on this 'cpu'.
>
> I don't think your comments are actually correct kerneldoc, so the scripts
> parsing it would break.

My fault, I haven't pushed anything upstream before. Will fix.

>
> > + */
> > +void __cpuinit thermal_throttle_count_init(unsigned int cpu)
> > +{
> > +     per_cpu(last_time, cpu)  = INITIAL_JIFFIES;
> > +     per_cpu(thermal_throttle_count, cpu) = 0;
> > +     per_cpu(last_count, cpu) = 0;
>
> Most of this can be just a normal initializer for the variable

The zeros yes, I will remove that. What about the last_time to INITIAL_JIFFIES?

> > +}
> > +
> > +/* thermal_throttle_inc_count -
> > + *    This function is normally called by the thermal interrupt if
> > + *    the code determines that we just entered into a thermal event.
> > + */
> > +void thermal_throttle_inc_count(void)
> > +{
> > +     __get_cpu_var(thermal_throttle_count)++;
> > +}
>
> Can you please just expand that in the caller. It looks like overabstraction.

Then the caller has to maintain the thermal counter, which is what I
don't want. I wanted the managmente code to be separate. I can make
this an inline in the header if you want to get rid of the function
call.

> > +
> > +     /* Prevent flooding the logs with thermal entries */
> > +     if (atomic_read(&__get_cpu_var(tt_report_flag)))
> > +             return 0;
>
> Isn't the test the wrong way around?

No. When the flag is cleared, the event should be reported (this way
the flag has the right initial value when its declared). If the flag
is set, i skip the report. The timer will clear the flag.

> I'm not sure what you need the timer for. Can't just just drive
> that from the interrupt handlers and check time stamps to do
> rate limiting? Then another timer wouldn't be needed.

Problem is that the timestamps are in jiffies. You could potentially
have very long periods of time in between thermal events, assuming the
box has a long uptime. On  32bit systems, if the time delta in jiffies
between 2 events is >= 0x80000000, the time elapsed will be negative,
and will prevent you from logging events until jiffies wrap all the
way around (the counter will still increment though). The timer takes
care of clearing the flag after 5 minutes.

>
> > +     /* get the interval */
> > +     intvl = (jiffies - __get_cpu_var(last_time)) / HZ;
>
> Are you sure this handles jiffies wrapping correctly?

Not sure. Will fix.

> > +#ifdef CONFIG_HOTPLUG_CPU
> > +static __cpuinit int thermal_throttle_remove_dev(struct sys_device *
> > sys_dev) +{
> > +     sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
> > +     return 0;
> > +}
>
> You don't remove the timer?

Oops. Will fix. At this point, does the PER_CPU variable still exist?

> > +/* Get notified when a cpu comes on/off. Be hotplug friendly. */
>
> And don't initialize it? Better get rid of it anyways.

The timer is initialized in thermal_throttle_count_init.

> > +     /* connect to sysfs */
> > +     for_each_online_cpu(cpu) {
> > +             preempt_disable();
> > +             if (cpu_online(cpu)) {
>
> If you put the preempt_disable around the loop you don't need a second
> check and you even fix a race with new CPUs.

Thanks, will fix.

> > +/* mce_log_thermal_throttle_event -
> > + *     Add an mce_log entry for the thermal throttle event 'evinfo'.
> > 'status' + *     should be whatever the user wants to put into the
> > mce.status field, + *     and historically has been the register value of
> > the
> > + *     MSR_IA32_THERMAL_STATUS.
> > + *
> > + *     This will typically be called from a thermal throttle interrupt
> > + *     if the event should be logged.
> > + */
> > +void mce_log_thermal_throttle_event(__u64 status,
> > +                                 struct therm_throt_event_info *evinfo)
> > +{
>
> Instead of having this evinfo structure you could just directly
> fill in struct mces in the caller.

But the caller won't know what to fill the struct mce with since this
function does the logic of figuring out the thermal event info. I
can't have this function take "struct mce *" since that doesn't exist
on i386. I could have it accept pointers to values as arguments, but
that's messy.

> > +     struct mce m;
> > +
> > +     memset(&m, 0, sizeof(m));
> > +     m.cpu = evinfo->cpu;
> > +     m.bank = MCE_THERMAL_BANK;
>
> So how should user space distingush that event from other thermal events?

What do you mean? This is already what x86_64 code does today for
thermal throttle events.

> > +     m.status = status;
> > +     rdtscll(m.tsc);
> > +
> > +     /* save the delta of the throttle count to let the consumer of this
> > +      * information know how many throttle instances have been seen.
> > +      *
> > +      * 'misc' is organized below as follows:
> > +      * bits 15:0  - Contains a capped value of thermal events for last
> > +      *              interval.
> > +      * bits 31:16 - Time in seconds to qualify the interval
> > +      *              (capped at 0xffff)
> > +      *
> > +      * We cap events count at 16 bits to leave room in 'misc' for future
> > +      * extensions to this report */
>
> I trust you will be also submitting a mcelog patch to decode all this?

Oops. Yes, I will add a patch for that as well.

Thank you very much for your feedback.

--Dima

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons.
  2006-09-15 20:33   ` Dmitriy Zavin
@ 2006-09-16  4:34     ` Andi Kleen
  2006-09-18 22:21       ` Dmitriy Zavin
  0 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2006-09-16  4:34 UTC (permalink / raw)
  To: Dmitriy Zavin; +Cc: linux-kernel, akpm, zwane

On Friday 15 September 2006 22:33, Dmitriy Zavin wrote:

> > > +static DEFINE_PER_CPU(atomic_t, tt_report_flag);
> > > +static DEFINE_PER_CPU(struct timer_list, tt_report_timer);
> > > +
> > > +static DEFINE_PER_CPU(unsigned long, last_time);
> > > +static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
> > > +static DEFINE_PER_CPU(unsigned long, last_count);
> > > +static DEFINE_PER_CPU(atomic_t, tt_enabled);
> >
> > Do we really need all of this per CPU? I think global variables would
> > be just fine.
>
> Each cpu can throttle at different times and have different counts, so

The counts should be per CPU, but everything else (timestamp, enabled) 
etc. can be just global.  You just don't want the logs flooded with events,
but if the rate limiting is global or local doesn't make much difference
That would give much simpler code and I believe
is sufficient for basically everybody
 
Currently your patch is a bit too large for the relatively simple things
it tries to attempt, so such ways to simplify it and slim it down are needed.
If you have other ideas to make it simpler that would be appreciated too.

> > > + */
> > > +void __cpuinit thermal_throttle_count_init(unsigned int cpu)
> > > +{
> > > +     per_cpu(last_time, cpu)  = INITIAL_JIFFIES;
> > > +     per_cpu(thermal_throttle_count, cpu) = 0;
> > > +     per_cpu(last_count, cpu) = 0;
> >
> > Most of this can be just a normal initializer for the variable
>
> The zeros yes, I will remove that. What about the last_time to
> INITIAL_JIFFIES?

Should work as a initializer too.

> Then the caller has to maintain the thermal counter, which is what I
> don't want. I wanted the managmente code to be separate. I can make
> this an inline in the header if you want to get rid of the function
> call.

The function call doesn't matter, it is just that we don't want overabstracted
kernel in the Linux kernel and your patch currently definitely has too many
functions doing very simple things. That makes it harder to read etc.

> > > +
> > > +     /* Prevent flooding the logs with thermal entries */
> > > +     if (atomic_read(&__get_cpu_var(tt_report_flag)))
> > > +             return 0;
> >
> > Isn't the test the wrong way around?
>
> No. When the flag is cleared, the event should be reported (this way
> the flag has the right initial value when its declared). If the flag
> is set, i skip the report. The timer will clear the flag.

Looks like a quite strange way to do things, but ok.

> > I'm not sure what you need the timer for. Can't just just drive
> > that from the interrupt handlers and check time stamps to do
> > rate limiting? Then another timer wouldn't be needed.
>
> Problem is that the timestamps are in jiffies. You could potentially
> have very long periods of time in between thermal events, assuming the
> box has a long uptime. On  32bit systems, if the time delta in jiffies
> between 2 events is >= 0x80000000, the time elapsed will be negative,
> and will prevent you from logging events until jiffies wrap all the
> way around (the counter will still increment though). The timer takes
> care of clearing the flag after 5 minutes.

We have a 64bit jiffies for that now on 32bit too.

But Jiffies wrap is several years. Even if we get a bogus overheat event
after two years it isn't really a issue. Please just get rid of the timer

> > > +#ifdef CONFIG_HOTPLUG_CPU
> > > +static __cpuinit int thermal_throttle_remove_dev(struct sys_device *
> > > sys_dev) +{
> > > +     sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
> > > +     return 0;
> > > +}
> >
> > You don't remove the timer?
>
> Oops. Will fix. At this point, does the PER_CPU variable still exist?

Yes, they always exist for all possible CPUs.

But better just get rid of the timer completely.

> > > +/* mce_log_thermal_throttle_event -
> > > + *     Add an mce_log entry for the thermal throttle event 'evinfo'.
> > > 'status' + *     should be whatever the user wants to put into the
> > > mce.status field, + *     and historically has been the register value
> > > of the
> > > + *     MSR_IA32_THERMAL_STATUS.
> > > + *
> > > + *     This will typically be called from a thermal throttle interrupt
> > > + *     if the event should be logged.
> > > + */
> > > +void mce_log_thermal_throttle_event(__u64 status,
> > > +                                 struct therm_throt_event_info
> > > *evinfo) +{
> >
> > Instead of having this evinfo structure you could just directly
> > fill in struct mces in the caller.
>
> But the caller won't know what to fill the struct mce with since this
> function does the logic of figuring out the thermal event info. I
> can't have this function take "struct mce *" since that doesn't exist
> on i386. I could have it accept pointers to values as arguments, but
> that's messy.

Then either define struct mce for i386 or use two different functions
for i386/x86-64.

>
> > > +     struct mce m;
> > > +
> > > +     memset(&m, 0, sizeof(m));
> > > +     m.cpu = evinfo->cpu;
> > > +     m.bank = MCE_THERMAL_BANK;
> >
> > So how should user space distingush that event from other thermal events?
>
> What do you mean? This is already what x86_64 code does today for
> thermal throttle events.

It should be just clear where it came from since you added a new way
to generate them.

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons.
  2006-09-16  4:34     ` Andi Kleen
@ 2006-09-18 22:21       ` Dmitriy Zavin
  2006-09-19  6:11         ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitriy Zavin @ 2006-09-18 22:21 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, akpm

[dropping Zwane from CC since the address I have is wrong]

> > Each cpu can throttle at different times and have different counts, so
>
> The counts should be per CPU, but everything else (timestamp, enabled)
> etc. can be just global.  You just don't want the logs flooded with events,
> but if the rate limiting is global or local doesn't make much difference
> That would give much simpler code and I believe
> is sufficient for basically everybody

This will make you skip logging events for other CPUs once one has
throttled, which is important to me. In that case, one has to rely on
just the counts.

But I still think that it is important to report and rate limit on a
per CPU basis, which is how it is already done in the upstream kernel
(and has for a while), so it wouldn't be spamming logs any more than
it already does.

> Currently your patch is a bit too large for the relatively simple things
> it tries to attempt, so such ways to simplify it and slim it down are needed.
> If you have other ideas to make it simpler that would be appreciated too.

You are right, it was my newbie mistake. I am splitting it up into
logical, smaller chunks that are easier to review.

> > Then the caller has to maintain the thermal counter, which is what I
> > don't want. I wanted the managmente code to be separate. I can make
> > this an inline in the header if you want to get rid of the function
> > call.
>
> The function call doesn't matter, it is just that we don't want overabstracted
> kernel in the Linux kernel and your patch currently definitely has too many
> functions doing very simple things. That makes it harder to read etc.

I will roll the counter bump into process_event() so the user just calls that.

> > > I'm not sure what you need the timer for. Can't just just drive
> > > that from the interrupt handlers and check time stamps to do
> > > rate limiting? Then another timer wouldn't be needed.
> >
> > Problem is that the timestamps are in jiffies. You could potentially
> > have very long periods of time in between thermal events, assuming the
> > box has a long uptime. On  32bit systems, if the time delta in jiffies
> > between 2 events is >= 0x80000000, the time elapsed will be negative,
> > and will prevent you from logging events until jiffies wrap all the
> > way around (the counter will still increment though). The timer takes
> > care of clearing the flag after 5 minutes.
>
> But Jiffies wrap is several years. Even if we get a bogus overheat event
> after two years it isn't really a issue. Please just get rid of the timer

We won't get a bogus overheat event. The problem is that we will SKIP
reporting that event, and further events alltogether, until jiffies
pass the threshold. Then we will HAVE to rely on someone watching the
counter(s) to know that something has happened.

I understand your uneasiness with the timer, and will try to find a
different way.

> We have a 64bit jiffies for that now on 32bit too.

The problem with using jiffies_64 is that the time_before/time_after
macros in linux/jiffies.h always typecast to "long" which is not 64
bits on 32bit systems. So it will get truncated, and behave as 32bits
and won't solve the problem

> > > Instead of having this evinfo structure you could just directly
> > > fill in struct mces in the caller.
> >
> > But the caller won't know what to fill the struct mce with since this
> > function does the logic of figuring out the thermal event info. I
> > can't have this function take "struct mce *" since that doesn't exist
> > on i386. I could have it accept pointers to values as arguments, but
> > that's messy.
>
> Then either define struct mce for i386 or use two different functions
> for i386/x86-64.

I'll add a patch to pull out mcelog related stuff into mce_log.c, and
share that between i386/x86_64. Put mcelog.c to
arch/i386/kernel/cpu/mcheck, and have x86_64 just compile that in
directly. Does that sound like a workable solution? There's no need to
maintain identical code in 2 places.

> > > > +     struct mce m;
> > > > +
> > > > +     memset(&m, 0, sizeof(m));
> > > > +     m.cpu = evinfo->cpu;
> > > > +     m.bank = MCE_THERMAL_BANK;
> > >
> > > So how should user space distingush that event from other thermal events?
> >
> > What do you mean? This is already what x86_64 code does today for
> > thermal throttle events.
>
> It should be just clear where it came from since you added a new way
> to generate them.

I didn't add a new way to generate them :). Detection/generation of
the log entry is identical, just the actual code moved to a different
file.

Thanks.

--Dima

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons.
  2006-09-18 22:21       ` Dmitriy Zavin
@ 2006-09-19  6:11         ` Andi Kleen
  2006-09-19 21:14           ` Dmitriy Zavin
  0 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2006-09-19  6:11 UTC (permalink / raw)
  To: Dmitriy Zavin; +Cc: linux-kernel, akpm

On Tuesday 19 September 2006 00:21, Dmitriy Zavin wrote:
> > > Each cpu can throttle at different times and have different counts, so
> >
> > The counts should be per CPU, but everything else (timestamp, enabled)
> > etc. can be just global.  You just don't want the logs flooded with events,
> > but if the rate limiting is global or local doesn't make much difference
> > That would give much simpler code and I believe
> > is sufficient for basically everybody
> 
> This will make you skip logging events for other CPUs once one has
> throttled, which is important to me. In that case, one has to rely on
> just the counts.

Ok if you want complex features like this you'll have to get rid
of some other code in your patch. 

One possibility is to keep only some state per CPU,
but do the sys configuration globally, but that's probably not 
enough.

Currently the bloat:feature ratio is imho not good enough at least.

> > Currently your patch is a bit too large for the relatively simple things
> > it tries to attempt, so such ways to simplify it and slim it down are needed.
> > If you have other ideas to make it simpler that would be appreciated too.
> 
> You are right, it was my newbie mistake. I am splitting it up into
> logical, smaller chunks that are easier to review.

It's not just smaller chunks, the end result should be simple too


> > We have a 64bit jiffies for that now on 32bit too.
> 
> The problem with using jiffies_64 is that the time_before/time_after
> macros in linux/jiffies.h always typecast to "long" which is not 64
> bits on 32bit systems. So it will get truncated, and behave as 32bits
> and won't solve the problem

Then please submit a separate patch to add time_before/after64 instead of trying
to work around that.
 
> > > > Instead of having this evinfo structure you could just directly
> > > > fill in struct mces in the caller.
> > >
> > > But the caller won't know what to fill the struct mce with since this
> > > function does the logic of figuring out the thermal event info. I
> > > can't have this function take "struct mce *" since that doesn't exist
> > > on i386. I could have it accept pointers to values as arguments, but
> > > that's messy.
> >
> > Then either define struct mce for i386 or use two different functions
> > for i386/x86-64.
> 
> I'll add a patch to pull out mcelog related stuff into mce_log.c, and
> share that between i386/x86_64. Put mcelog.c to
> arch/i386/kernel/cpu/mcheck

Please keep it in x86-64.

> , and have x86_64 just compile that in 
> directly. Does that sound like a workable solution? There's no need to 
> maintain identical code in 2 places.

It depends on how ugly that patch is. Unification is fine as long
as it improves something, but if it requires weird hacks again it's a net loss.

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons.
  2006-09-19  6:11         ` Andi Kleen
@ 2006-09-19 21:14           ` Dmitriy Zavin
  0 siblings, 0 replies; 7+ messages in thread
From: Dmitriy Zavin @ 2006-09-19 21:14 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, akpm

> Ok if you want complex features like this you'll have to get rid
> of some other code in your patch.
...
> Currently the bloat:feature ratio is imho not good enough at least.

You are right. I was trying to do too much, unnecessarily. The count
is enough to be exported, and if someone cares to do interval
monitoring (etc.) they can write a daemon that watches it (since the
original patch was in second granularity, userspace can easily handle
that). If it turns out that we do need interval tracking, that can be
added at a later time. I'll have an updated series of patches to
address the issues.

>
> > > We have a 64bit jiffies for that now on 32bit too.
> >
> > The problem with using jiffies_64 is that the time_before/time_after
> > macros in linux/jiffies.h always typecast to "long" which is not 64
> > bits on 32bit systems. So it will get truncated, and behave as 32bits
> > and won't solve the problem
>
> Then please submit a separate patch to add time_before/after64 instead of trying
> to work around that.

Will do.

> > > > > Instead of having this evinfo structure you could just directly
> > > > > fill in struct mces in the caller.
> > > >
> > > > But the caller won't know what to fill the struct mce with since this
> > > > function does the logic of figuring out the thermal event info. I
> > > > can't have this function take "struct mce *" since that doesn't exist
> > > > on i386. I could have it accept pointers to values as arguments, but
> > > > that's messy.
> > >
> > > Then either define struct mce for i386 or use two different functions
> > > for i386/x86-64.
> >
> > I'll add a patch to pull out mcelog related stuff into mce_log.c, and
> > share that between i386/x86_64. Put mcelog.c to
> > arch/i386/kernel/cpu/mcheck
>
> Please keep it in x86-64.
>
> > , and have x86_64 just compile that in
> > directly. Does that sound like a workable solution? There's no need to
> > maintain identical code in 2 places.
>
> It depends on how ugly that patch is. Unification is fine as long
> as it improves something, but if it requires weird hacks again it's a net loss.

I decided against it. The patch would in fact be pretty ugly. Going
back in archives, I saw there were several attempts to do this, and it
was never successful. The pain of doing this would outweigh any use i
have for it at the moment.

Thanks for your insights.

--Dima

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2006-09-19 21:14 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-15  0:42 [PATCH] x86_64/i386: Rework thermal throttling detection/handling code for Intel P4/Xeons Dima Zavin
2006-09-15  6:59 ` Andi Kleen
2006-09-15 20:33   ` Dmitriy Zavin
2006-09-16  4:34     ` Andi Kleen
2006-09-18 22:21       ` Dmitriy Zavin
2006-09-19  6:11         ` Andi Kleen
2006-09-19 21:14           ` Dmitriy Zavin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox