linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/16] turbostat updates
@ 2016-02-27  9:00 Len Brown
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
  0 siblings, 1 reply; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:00 UTC (permalink / raw)
  To: linux-pm

This patch series makes turbostat useful on Xeon x200 machines,
where it previously displayed erroneous results.

This series also adds some features, as requested by users.

Let me know if you see any troubles with it.

This series is also available directly via git:

git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbosat

thanks,
-Len Brown, Intel Open Source Technology Center


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 01/16] tools/power turbostat: decode more CPUID fields
  2016-02-27  9:00 [PATCH 0/16] turbostat updates Len Brown
@ 2016-02-27  9:00 ` Len Brown
  2016-02-27  9:00   ` [PATCH 02/16] tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency Len Brown
                     ` (14 more replies)
  0 siblings, 15 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:00 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

for debugging, dump a few more fields:

CPUID(1): SSE3 MONITOR EIST TM2 TSC MSR ACPI-TM TM

cpu0: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MONITOR)

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0dac7e0..7ef8b9f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2673,6 +2673,19 @@ guess:
 
 	return 0;
 }
+
+void decode_misc_enable_msr(void)
+{
+	unsigned long long msr;
+
+	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
+		fprintf(stderr, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
+			base_cpu, msr,
+			msr & (1 << 3) ? "TCC" : "",
+			msr & (1 << 16) ? "EIST" : "",
+			msr & (1 << 18) ? "MONITOR" : "");
+}
+
 void process_cpuid()
 {
 	unsigned int eax, ebx, ecx, edx, max_level;
@@ -2696,9 +2709,19 @@ void process_cpuid()
 	if (family == 6 || family == 0xf)
 		model += ((fms >> 16) & 0xf) << 4;
 
-	if (debug)
+	if (debug) {
 		fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
 			max_level, family, model, stepping, family, model, stepping);
+		fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n",
+			ecx & (1 << 0) ? "SSE3" : "-",
+			ecx & (1 << 3) ? "MONITOR" : "-",
+			ecx & (1 << 7) ? "EIST" : "-",
+			ecx & (1 << 8) ? "TM2" : "-",
+			edx & (1 << 4) ? "TSC" : "-",
+			edx & (1 << 5) ? "MSR" : "-",
+			edx & (1 << 22) ? "ACPI-TM" : "-",
+			edx & (1 << 29) ? "TM" : "-");
+	}
 
 	if (!(edx & (1 << 5)))
 		errx(1, "CPUID: no MSR");
@@ -2739,6 +2762,9 @@ void process_cpuid()
 			do_ptm ? "" : "No ",
 			has_epb ? "" : "No ");
 
+	if (debug)
+		decode_misc_enable_msr();
+
 	if (max_level > 0x15) {
 		unsigned int eax_crystal;
 		unsigned int ebx_tsc;
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 02/16] tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
@ 2016-02-27  9:00   ` Len Brown
  2016-02-27  9:00   ` [PATCH 03/16] x86 msr-index: Simplify syntax for HWP fields Len Brown
                     ` (13 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:00 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

This CPUID leaf is available on Skylake:

CPUID(0x16): base_mhz: 1500 max_mhz: 2200 bus_mhz: 100

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7ef8b9f..4c4470f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2688,7 +2688,7 @@ void decode_misc_enable_msr(void)
 
 void process_cpuid()
 {
-	unsigned int eax, ebx, ecx, edx, max_level;
+	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
 	unsigned int fms, family, model, stepping;
 
 	eax = ebx = ecx = edx = 0;
@@ -2732,9 +2732,9 @@ void process_cpuid()
 	 * This check is valid for both Intel and AMD.
 	 */
 	ebx = ecx = edx = 0;
-	__get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx);
+	__get_cpuid(0x80000000, &max_extended_level, &ebx, &ecx, &edx);
 
-	if (max_level >= 0x80000007) {
+	if (max_extended_level >= 0x80000007) {
 
 		/*
 		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
@@ -2765,7 +2765,7 @@ void process_cpuid()
 	if (debug)
 		decode_misc_enable_msr();
 
-	if (max_level > 0x15) {
+	if (max_level >= 0x15) {
 		unsigned int eax_crystal;
 		unsigned int ebx_tsc;
 
@@ -2799,6 +2799,19 @@ void process_cpuid()
 			}
 		}
 	}
+	if (max_level >= 0x16) {
+		unsigned int base_mhz, max_mhz, bus_mhz, edx;
+
+		/*
+		 * CPUID 16H Base MHz, Max MHz, Bus MHz
+		 */
+		base_mhz = max_mhz = bus_mhz = edx = 0;
+
+		__get_cpuid(0x16, &base_mhz, &max_mhz, &bus_mhz, &edx);
+		if (debug)
+			fprintf(stderr, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
+				base_mhz, max_mhz, bus_mhz);
+	}
 
 	if (has_aperf)
 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
@@ -3151,7 +3164,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(stderr, "turbostat version 4.8 26-Sep, 2015"
+	fprintf(stderr, "turbostat version 4.9 22 Nov, 2015"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 03/16] x86 msr-index: Simplify syntax for HWP fields
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
  2016-02-27  9:00   ` [PATCH 02/16] tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency Len Brown
@ 2016-02-27  9:00   ` Len Brown
  2016-02-27  9:00   ` [PATCH 04/16] tools/power turbostat: decode HWP registers Len Brown
                     ` (12 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:00 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

syntax only, no functional change

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 690b402..5c5e7e5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -230,10 +230,10 @@
 #define HWP_PACKAGE_LEVEL_REQUEST_BIT	(1<<11)
 
 /* IA32_HWP_CAPABILITIES */
-#define HWP_HIGHEST_PERF(x)		(x & 0xff)
-#define HWP_GUARANTEED_PERF(x)		((x & (0xff << 8)) >>8)
-#define HWP_MOSTEFFICIENT_PERF(x)	((x & (0xff << 16)) >>16)
-#define HWP_LOWEST_PERF(x)		((x & (0xff << 24)) >>24)
+#define HWP_HIGHEST_PERF(x)		(((x) >> 0) & 0xff)
+#define HWP_GUARANTEED_PERF(x)		(((x) >> 8) & 0xff)
+#define HWP_MOSTEFFICIENT_PERF(x)	(((x) >> 16) & 0xff)
+#define HWP_LOWEST_PERF(x)		(((x) >> 24) & 0xff)
 
 /* IA32_HWP_REQUEST */
 #define HWP_MIN_PERF(x) 		(x & 0xff)
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 04/16] tools/power turbostat: decode HWP registers
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
  2016-02-27  9:00   ` [PATCH 02/16] tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency Len Brown
  2016-02-27  9:00   ` [PATCH 03/16] x86 msr-index: Simplify syntax for HWP fields Len Brown
@ 2016-02-27  9:00   ` Len Brown
  2016-02-27  9:00   ` [PATCH 05/16] tools/power turbostat: Decode MSR_MISC_PWR_MGMT Len Brown
                     ` (11 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:00 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

 # turbostat --debug
...
CPUID(6): ... HWP, HWPnotify, HWPwindow, HWPepp, HWPpkg ...
...
cpu0: MSR_PM_ENABLE: 0x00000001 (HWP)
cpu0: MSR_HWP_CAPABILITIES: 0x01050916 (high 0x16 guar 0x9 eff 0x5 low 0x1)
cpu0: MSR_HWP_REQUEST: 0x80001604 (min 0x4 max 0x16 des 0x0 epp 0x80 window 0x0 pkg 0x0)
cpu0: MSR_HWP_INTERRUPT: 0x00000001 (EN_Guaranteed_Perf_Change, Dis_Excursion_Min)
cpu0: MSR_HWP_STATUS: 0x00000000 (No-Guaranteed_Perf_Change, No-Excursion_Min)

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 124 ++++++++++++++++++++++++++++++++--
 1 file changed, 118 insertions(+), 6 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4c4470f..af3b955 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -98,6 +98,12 @@ unsigned int crystal_hz;
 unsigned long long tsc_hz;
 int base_cpu;
 double discover_bclk(unsigned int family, unsigned int model);
+unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
+			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
+unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
+unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
+unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
+unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
 
 #define RAPL_PKG		(1 << 0)
 					/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -2041,6 +2047,97 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	return 0;
 }
+/*
+ * print_hwp()
+ * Decode the MSR_HWP_CAPABILITIES
+ */
+int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	unsigned long long msr;
+	int cpu;
+
+	if (!has_hwp)
+		return 0;
+
+	cpu = t->cpu_id;
+
+	/* MSR_HWP_CAPABILITIES is per-package */
+	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+		return 0;
+
+	if (cpu_migrate(cpu)) {
+		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		return -1;
+	}
+
+	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
+		return 0;
+
+	fprintf(stderr, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
+		cpu, msr, (msr & (1 << 0)) ? "" : "No-");
+
+	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
+	if ((msr & (1 << 0)) == 0)
+		return 0;
+
+	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
+		return 0;
+
+	fprintf(stderr, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
+			"(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
+			cpu, msr,
+			(unsigned int)HWP_HIGHEST_PERF(msr),
+			(unsigned int)HWP_GUARANTEED_PERF(msr),
+			(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
+			(unsigned int)HWP_LOWEST_PERF(msr));
+
+	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
+		return 0;
+
+	fprintf(stderr, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
+			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
+			cpu, msr,
+			(unsigned int)(((msr) >> 0) & 0xff),
+			(unsigned int)(((msr) >> 8) & 0xff),
+			(unsigned int)(((msr) >> 16) & 0xff),
+			(unsigned int)(((msr) >> 24) & 0xff),
+			(unsigned int)(((msr) >> 32) & 0xff3),
+			(unsigned int)(((msr) >> 42) & 0x1));
+
+	if (has_hwp_pkg) {
+		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
+			return 0;
+
+		fprintf(stderr, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
+			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
+			cpu, msr,
+			(unsigned int)(((msr) >> 0) & 0xff),
+			(unsigned int)(((msr) >> 8) & 0xff),
+			(unsigned int)(((msr) >> 16) & 0xff),
+			(unsigned int)(((msr) >> 24) & 0xff),
+			(unsigned int)(((msr) >> 32) & 0xff3));
+	}
+	if (has_hwp_notify) {
+		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
+			return 0;
+
+		fprintf(stderr, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
+			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
+			cpu, msr,
+			((msr) & 0x1) ? "EN" : "Dis",
+			((msr) & 0x2) ? "EN" : "Dis");
+	}
+	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
+		return 0;
+
+	fprintf(stderr, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
+			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
+			cpu, msr,
+			((msr) & 0x1) ? "" : "No-",
+			((msr) & 0x2) ? "" : "No-");
+
+	return 0;
+}
 
 /*
  * print_perf_limit()
@@ -2686,6 +2783,7 @@ void decode_misc_enable_msr(void)
 			msr & (1 << 18) ? "MONITOR" : "");
 }
 
+
 void process_cpuid()
 {
 	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
@@ -2753,14 +2851,25 @@ void process_cpuid()
 	has_aperf = ecx & (1 << 0);
 	do_dts = eax & (1 << 0);
 	do_ptm = eax & (1 << 6);
+	has_hwp = eax & (1 << 7);
+	has_hwp_notify = eax & (1 << 8);
+	has_hwp_activity_window = eax & (1 << 9);
+	has_hwp_epp = eax & (1 << 10);
+	has_hwp_pkg = eax & (1 << 11);
 	has_epb = ecx & (1 << 3);
 
 	if (debug)
-		fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n",
-			has_aperf ? "" : "No ",
-			do_dts ? "" : "No ",
-			do_ptm ? "" : "No ",
-			has_epb ? "" : "No ");
+		fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
+			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
+			has_aperf ? "" : "No-",
+			do_dts ? "" : "No-",
+			do_ptm ? "" : "No-",
+			has_hwp ? "" : "No-",
+			has_hwp_notify ? "" : "No-",
+			has_hwp_activity_window ? "" : "No-",
+			has_hwp_epp ? "" : "No-",
+			has_hwp_pkg ? "" : "No-",
+			has_epb ? "" : "No-");
 
 	if (debug)
 		decode_misc_enable_msr();
@@ -3088,6 +3197,9 @@ void turbostat_init()
 
 
 	if (debug)
+		for_all_cpus(print_hwp, ODD_COUNTERS);
+
+	if (debug)
 		for_all_cpus(print_epb, ODD_COUNTERS);
 
 	if (debug)
@@ -3164,7 +3276,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(stderr, "turbostat version 4.9 22 Nov, 2015"
+	fprintf(stderr, "turbostat version 4.10 10 Dec, 2015"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 05/16] tools/power turbostat: Decode MSR_MISC_PWR_MGMT
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (2 preceding siblings ...)
  2016-02-27  9:00   ` [PATCH 04/16] tools/power turbostat: decode HWP registers Len Brown
@ 2016-02-27  9:00   ` Len Brown
  2016-02-27  9:01   ` [PATCH 06/16] tools/power turbostat: allow sub-sec intervals Len Brown
                     ` (10 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:00 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

This MSR is helpful to show if P-state HW coordination
is enabled or disabled.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index af3b955..c600340 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2783,6 +2783,26 @@ void decode_misc_enable_msr(void)
 			msr & (1 << 18) ? "MONITOR" : "");
 }
 
+/*
+ * Decode MSR_MISC_PWR_MGMT
+ *
+ * Decode the bits according to the Nehalem documentation
+ * bit[0] seems to continue to have same meaning going forward
+ * bit[1] less so...
+ */
+void decode_misc_pwr_mgmt_msr(void)
+{
+	unsigned long long msr;
+
+	if (!do_nhm_platform_info)
+		return;
+
+	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
+		fprintf(stderr, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
+			base_cpu, msr,
+			msr & (1 << 0) ? "DIS" : "EN",
+			msr & (1 << 1) ? "EN" : "DIS");
+}
 
 void process_cpuid()
 {
@@ -2936,6 +2956,9 @@ void process_cpuid()
 	do_slm_cstates = is_slm(family, model);
 	do_knl_cstates  = is_knl(family, model);
 
+	if (debug)
+		decode_misc_pwr_mgmt_msr();
+
 	rapl_probe(family, model);
 	perf_limit_reasons_probe(family, model);
 
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 06/16] tools/power turbostat: allow sub-sec intervals
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (3 preceding siblings ...)
  2016-02-27  9:00   ` [PATCH 05/16] tools/power turbostat: Decode MSR_MISC_PWR_MGMT Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-28  3:43     ` Doug Smythies
  2016-02-27  9:01   ` [PATCH 07/16] tools/power turbostat: Intel Xeon x200: fix erroneous bclk value Len Brown
                     ` (9 subsequent siblings)
  14 siblings, 1 reply; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

turbostat -i interval_sec

will sample and display statistics every interval_sec.
interval_sec used to be a whole number of seconds,
but now we accept a decimal, as small as 0.001 sec (1 ms).

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |  2 +-
 tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 622db68..8a9727b 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -34,7 +34,7 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 \fB--debug\fP displays additional system configuration information.  Invoking this parameter
 more than once may also enable internal turbostat debug information.
 .PP
-\fB--interval seconds\fP overrides the default 5-second measurement interval.
+\fB--interval decimal_seconds\fP overrides the default 5.0 second measurement interval.
 .PP
 \fB--help\fP displays usage for the most common parameters.
 .PP
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c600340..e411cc4 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -38,12 +38,13 @@
 #include <string.h>
 #include <ctype.h>
 #include <sched.h>
+#include <time.h>
 #include <cpuid.h>
 #include <linux/capability.h>
 #include <errno.h>
 
 char *proc_stat = "/proc/stat";
-unsigned int interval_sec = 5;
+struct timespec interval_ts = {5, 0};
 unsigned int debug;
 unsigned int rapl_joules;
 unsigned int summary_only;
@@ -1728,7 +1729,7 @@ restart:
 			re_initialize();
 			goto restart;
 		}
-		sleep(interval_sec);
+		nanosleep(&interval_ts, NULL);
 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
 		if (retval < -1) {
 			exit(retval);
@@ -1742,7 +1743,7 @@ restart:
 		compute_average(EVEN_COUNTERS);
 		format_all_counters(EVEN_COUNTERS);
 		flush_stdout();
-		sleep(interval_sec);
+		nanosleep(&interval_ts, NULL);
 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
 		if (retval < -1) {
 			exit(retval);
@@ -3347,7 +3348,18 @@ void cmdline(int argc, char **argv)
 			help();
 			exit(1);
 		case 'i':
-			interval_sec = atoi(optarg);
+			{
+				double interval = strtod(optarg, NULL);
+
+				if (interval < 0.001) {
+					fprintf(stderr, "interval %f seconds is too small\n",
+						interval);
+					exit(2);
+				}
+
+				interval_ts.tv_sec = interval;
+				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
+			}
 			break;
 		case 'J':
 			rapl_joules++;
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 07/16] tools/power turbostat: Intel Xeon x200: fix erroneous bclk value
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (4 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 06/16] tools/power turbostat: allow sub-sec intervals Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 08/16] tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding Len Brown
                     ` (8 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Chrzaniuk, Hubert, Len Brown

From: "Chrzaniuk, Hubert" <hubert.chrzaniuk@intel.com>

x200 does not enable any way to programmatically obtain bus clock
speed. Bclk for the architecture has a fixed value of 100 MHz.
At the same time x200 cannot be included in has_snb_msrs since
it does not support C7 idle state.

prior to this patch, MHz values reported on this chip
were erroneously calculated using bclk of 133MHz,
causing MHz values to be reported 33% higher than actual.

Signed-off-by: Hubert Chrzaniuk <hubert.chrzaniuk@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index e411cc4..652d08d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2697,7 +2697,7 @@ double slm_bclk(void)
 
 double discover_bclk(unsigned int family, unsigned int model)
 {
-	if (has_snb_msrs(family, model))
+	if (has_snb_msrs(family, model) || is_knl(family, model))
 		return 100.00;
 	else if (is_slm(family, model))
 		return slm_bclk();
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 08/16] tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (5 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 07/16] tools/power turbostat: Intel Xeon x200: fix erroneous bclk value Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 09/16] tools/power turbostat: re-name "%Busy" field to "Busy%" Len Brown
                     ` (7 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Hubert Chrzaniuk, Len Brown

From: Hubert Chrzaniuk <hubert.chrzaniuk@intel.com>

Following changes have been made:
- changed MSR_NHM_TURBO_RATIO_LIMIT to MSR_TURBO_RATIO_LIMIT in debug print
  for consistency with Developer Manual
- updated definition of bitfields in MSR_TURBO_RATIO_LIMIT and appropriate
  parsing code
- added x200 to list of architectures that do not support Nahlem compatible
  definition of MSR_TURBO_RATIO_LIMIT register (x200 has the register but
  bits definition is custom)
- fixed typo in code that parses MSR_TURBO_RATIO_LIMIT
  (logical instead of bitwise operator)
- changed MSR_TURBO_RATIO_LIMIT parsing algorithm so the print out had the
  same order as implementations for other platforms

Signed-off-by: Hubert Chrzaniuk <hubert.chrzaniuk@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 53 +++++++++++++++++------------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 652d08d..4e5386d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1328,21 +1328,23 @@ dump_nhm_turbo_ratio_limits(void)
 static void
 dump_knl_turbo_ratio_limits(void)
 {
-	int cores;
-	unsigned int ratio;
+	const unsigned int buckets_no = 7;
+
 	unsigned long long msr;
-	int delta_cores;
-	int delta_ratio;
-	int i;
+	int delta_cores, delta_ratio;
+	int i, b_nr;
+	unsigned int cores[buckets_no];
+	unsigned int ratio[buckets_no];
 
 	get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
 
-	fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n",
+	fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
 		base_cpu, msr);
 
 	/**
 	 * Turbo encoding in KNL is as follows:
-	 * [7:0] -- Base value of number of active cores of bucket 1.
+	 * [0] -- Reserved
+	 * [7:1] -- Base value of number of active cores of bucket 1.
 	 * [15:8] -- Base value of freq ratio of bucket 1.
 	 * [20:16] -- +ve delta of number of active cores of bucket 2.
 	 * i.e. active cores of bucket 2 =
@@ -1361,29 +1363,25 @@ dump_knl_turbo_ratio_limits(void)
 	 * [60:56]-- +ve delta of number of active cores of bucket 7.
 	 * [63:61]-- -ve delta of freq ratio of bucket 7.
 	 */
-	cores = msr & 0xFF;
-	ratio = (msr >> 8) && 0xFF;
-	if (ratio > 0)
-		fprintf(stderr,
-			"%d * %.0f = %.0f MHz max turbo %d active cores\n",
-			ratio, bclk, ratio * bclk, cores);
-
-	for (i = 16; i < 64; i = i + 8) {
+
+	b_nr = 0;
+	cores[b_nr] = (msr & 0xFF) >> 1;
+	ratio[b_nr] = (msr >> 8) & 0xFF;
+
+	for (i = 16; i < 64; i += 8) {
 		delta_cores = (msr >> i) & 0x1F;
-		delta_ratio = (msr >> (i + 5)) && 0x7;
-		if (!delta_cores || !delta_ratio)
-			return;
-		cores = cores + delta_cores;
-		ratio = ratio - delta_ratio;
-
-		/** -ve ratios will make successive ratio calculations
-		 * negative. Hence return instead of carrying on.
-		 */
-		if (ratio > 0)
+		delta_ratio = (msr >> (i + 5)) & 0x7;
+
+		cores[b_nr + 1] = cores[b_nr] + delta_cores;
+		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
+		b_nr++;
+	}
+
+	for (i = buckets_no - 1; i >= 0; i--)
+		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
 			fprintf(stderr,
 				"%d * %.0f = %.0f MHz max turbo %d active cores\n",
-				ratio, bclk, ratio * bclk, cores);
-	}
+				ratio[i], bclk, ratio[i] * bclk, cores[i]);
 }
 
 static void
@@ -1896,6 +1894,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 	/* Nehalem compatible, but do not include turbo-ratio limit support */
 	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
 	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
+	case 0x57:	/* PHI - Knights Landing (different MSR definition) */
 		return 0;
 	default:
 		return 1;
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 09/16] tools/power turbostat: re-name "%Busy" field to "Busy%"
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (6 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 08/16] tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 10/16] tools/power turbostat: add --out option for saving output in a file Len Brown
                     ` (6 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

some tools processing turbostat output
have difficulty with items that begin with %...

Reported-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 16 ++++++++--------
 tools/power/x86/turbostat/turbostat.c |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 8a9727b..1104373 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -61,7 +61,7 @@ displays the statistics gathered since it was forked.
 .nf
 \fBCPU\fP Linux CPU (logical processor) number.  Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
 \fBAVG_MHz\fP number of cycles executed divided by time elapsed.
-\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
+\fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
 \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
 \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
 .fi
@@ -89,7 +89,7 @@ Without any parameters, turbostat displays statistics ever 5 seconds.
 for turbostat to fork).
 .nf
 [root@hsw]# ./turbostat
-     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz
+     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
        -     488   12.51    3898    3498
        0       0    0.01    3885    3498
        4    3897   99.99    3898    3498
@@ -145,7 +145,7 @@ cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
 cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
 cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
 cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
-    Core     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp PkgWatt CorWatt GFXWatt
+    Core     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp PkgWatt CorWatt GFXWatt
        -       -     493   12.64    3898    3498       0   12.64    0.00    0.00   74.72      47      47   21.62   13.74    0.00
        0       0       4    0.11    3894    3498       0   99.89    0.00    0.00    0.00      47      47   21.62   13.74    0.00
        0       4    3897   99.98    3898    3498       0    0.02
@@ -178,7 +178,7 @@ until ^C while the other CPUs are mostly idle:
 .nf
 root@hsw: turbostat cat /dev/zero > /dev/null
 ^C
-     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz
+     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
        -     482   12.51    3854    3498
        0       0    0.01    1960    3498
        4       0    0.00    2128    3498
@@ -192,12 +192,12 @@ root@hsw: turbostat cat /dev/zero > /dev/null
 
 .fi
 Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
-The first row shows the average MHz and %Busy across all the processors in the system.
+The first row shows the average MHz and Busy% across all the processors in the system.
 
 Note that the Avg_MHz column reflects the total number of cycles executed
-divided by the measurement interval.  If the %Busy column is 100%,
+divided by the measurement interval.  If the Busy% column is 100%,
 then the processor was running at that speed the entire interval.
-The Avg_MHz multiplied by the %Busy results in the Bzy_MHz --
+The Avg_MHz multiplied by the Busy% results in the Bzy_MHz --
 which is the average frequency while the processor was executing --
 not including any non-busy idle time.
 
@@ -233,7 +233,7 @@ in the brand string in /proc/cpuinfo.  On a system where
 the TSC stops in idle, TSC_MHz will drop
 below the processor's base frequency.
 
-%Busy = MPERF_delta/TSC_delta
+Busy% = MPERF_delta/TSC_delta
 
 Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
 
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4e5386d..947239b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -293,7 +293,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 /*
  * Example Format w/ field column widths:
  *
- *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     SMI   %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
+ *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
  * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
  */
 
@@ -308,7 +308,7 @@ void print_header(void)
 	if (has_aperf)
 		outp += sprintf(outp, " Avg_MHz");
 	if (has_aperf)
-		outp += sprintf(outp, "   %%Busy");
+		outp += sprintf(outp, "   Busy%%");
 	if (has_aperf)
 		outp += sprintf(outp, " Bzy_MHz");
 	outp += sprintf(outp, " TSC_MHz");
@@ -511,7 +511,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "%8.0f",
 			1.0 / units * t->aperf / interval_float);
 
-	/* %Busy */
+	/* Busy% */
 	if (has_aperf) {
 		if (!skip_c0)
 			outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 10/16] tools/power turbostat: add --out option for saving output in a file
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (7 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 09/16] tools/power turbostat: re-name "%Busy" field to "Busy%" Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 11/16] tools/power turbostat: fix compiler warnings Len Brown
                     ` (5 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

By default...

Turbostat --debug gconfiguration info goes to stderr.

In FORK mode, turbostat statistics go to stderr.

In PERIODIC mode, turbostat statistics go to stdout.

These defaults do not change, but an option "--out file"
will send all output above only to the specified file.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |  14 +-
 tools/power/x86/turbostat/turbostat.c | 281 ++++++++++++++++++----------------
 2 files changed, 160 insertions(+), 135 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 1104373..e76a0133 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -36,6 +36,9 @@ more than once may also enable internal turbostat debug information.
 .PP
 \fB--interval decimal_seconds\fP overrides the default 5.0 second measurement interval.
 .PP
+\fB--out output_file\fP turbostat output is written to the specified output_file.
+The file is truncated if it already exists, and it is created if it does not exist.
+.PP
 \fB--help\fP displays usage for the most common parameters.
 .PP
 \fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts.
@@ -83,10 +86,11 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
 \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
 .fi
 .PP
-.SH EXAMPLE
+.SH PERIODIC EXAMPLE
 Without any parameters, turbostat displays statistics ever 5 seconds.
-(override interval with "-i sec" option, or specify a command
-for turbostat to fork).
+Periodic output goes to stdout, by default, unless --out is used to specify an output file.
+The 5-second interval can be changed with th "-i sec" option.
+Or a command may be specified as in "FORK EXAMPLE" below.
 .nf
 [root@hsw]# ./turbostat
      CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
@@ -171,7 +175,9 @@ The --debug option adds additional columns to the measurement ouput, including C
 See the field definitions above.
 .SH FORK EXAMPLE
 If turbostat is invoked with a command, it will fork that command
-and output the statistics gathered when the command exits.
+and output the statistics gathered after the command exits.
+In this case, turbostat output goes to stderr, by default.
+Output can instead be saved to a file using the --out option.
 eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
 until ^C while the other CPUs are mostly idle:
 
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 947239b..5f9f41a 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -44,6 +44,7 @@
 #include <errno.h>
 
 char *proc_stat = "/proc/stat";
+FILE *outf;
 struct timespec interval_ts = {5, 0};
 unsigned int debug;
 unsigned int rapl_joules;
@@ -652,15 +653,24 @@ done:
 	return 0;
 }
 
-void flush_stdout()
+void flush_output_stdout(void)
 {
-	fputs(output_buffer, stdout);
-	fflush(stdout);
+	FILE *filep;
+
+	if (outf == stderr)
+		filep = stdout;
+	else
+		filep = outf;
+
+	fputs(output_buffer, filep);
+	fflush(filep);
+
 	outp = output_buffer;
 }
-void flush_stderr()
+void flush_output_stderr(void)
 {
-	fputs(output_buffer, stderr);
+	fputs(output_buffer, outf);
+	fflush(outf);
 	outp = output_buffer;
 }
 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -752,9 +762,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 		} else {
 
 			if (!aperf_mperf_unstable) {
-				fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
-				fprintf(stderr, "* Frequency results do not cover entire interval *\n");
-				fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
+				fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname);
+				fprintf(outf, "* Frequency results do not cover entire interval *\n");
+				fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n");
 
 				aperf_mperf_unstable = 1;
 			}
@@ -789,7 +799,8 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 	}
 
 	if (old->mperf == 0) {
-		if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id);
+		if (debug > 1)
+			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
 		old->mperf = 1;	/* divide by 0 protection */
 	}
 
@@ -989,7 +1000,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	unsigned long long msr;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -1182,18 +1193,18 @@ dump_nhm_platform_info(void)
 
 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
 
-	fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
 
 	ratio = (msr >> 40) & 0xFF;
-	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
+	fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
 		ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
-	fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
+	fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
 		ratio, bclk, ratio * bclk);
 
 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
-	fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
+	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
 		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
 
 	return;
@@ -1207,16 +1218,16 @@ dump_hsw_turbo_ratio_limits(void)
 
 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
 
-	fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1229,46 +1240,46 @@ dump_ivt_turbo_ratio_limits(void)
 
 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
 
-	fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 48) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 40) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 32) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 24) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 16) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1281,46 +1292,46 @@ dump_nhm_turbo_ratio_limits(void)
 
 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
 
-	fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 48) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 40) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 32) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 24) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 16) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
+		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1338,7 +1349,7 @@ dump_knl_turbo_ratio_limits(void)
 
 	get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
 
-	fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
+	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
 		base_cpu, msr);
 
 	/**
@@ -1379,7 +1390,7 @@ dump_knl_turbo_ratio_limits(void)
 
 	for (i = buckets_no - 1; i >= 0; i--)
 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
-			fprintf(stderr,
+			fprintf(outf,
 				"%d * %.0f = %.0f MHz max turbo %d active cores\n",
 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
 }
@@ -1394,9 +1405,9 @@ dump_nhm_cst_cfg(void)
 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
 
-	fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
 
-	fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
+	fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
@@ -1413,41 +1424,41 @@ dump_config_tdp(void)
 	unsigned long long msr;
 
 	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
-	fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
-	fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF);
+	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
+	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF);
 
 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
-	fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
 	if (msr) {
-		fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF);
-		fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF);
-		fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF);
-		fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF);
+		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF);
+		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF);
+		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF);
+		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF);
 	}
-	fprintf(stderr, ")\n");
+	fprintf(outf, ")\n");
 
 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
-	fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
 	if (msr) {
-		fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF);
-		fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF);
-		fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF);
-		fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF);
+		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF);
+		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF);
+		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF);
+		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF);
 	}
-	fprintf(stderr, ")\n");
+	fprintf(outf, ")\n");
 
 	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
-	fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
 	if ((msr) & 0x3)
-		fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
-	fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1);
-	fprintf(stderr, ")\n");
+		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
+	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
+	fprintf(outf, ")\n");
 	
 	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
-	fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
-	fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F);
-	fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1);
-	fprintf(stderr, ")\n");
+	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
+	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F);
+	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
+	fprintf(outf, ")\n");
 }
 
 void free_all_buffers(void)
@@ -1486,7 +1497,7 @@ void free_all_buffers(void)
  */
 FILE *fopen_or_die(const char *path, const char *mode)
 {
-	FILE *filep = fopen(path, "r");
+	FILE *filep = fopen(path, mode);
 	if (!filep)
 		err(1, "%s: open failed", path);
 	return filep;
@@ -1740,7 +1751,7 @@ restart:
 		for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
 		compute_average(EVEN_COUNTERS);
 		format_all_counters(EVEN_COUNTERS);
-		flush_stdout();
+		flush_output_stdout();
 		nanosleep(&interval_ts, NULL);
 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
 		if (retval < -1) {
@@ -1754,7 +1765,7 @@ restart:
 		for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
 		compute_average(ODD_COUNTERS);
 		format_all_counters(ODD_COUNTERS);
-		flush_stdout();
+		flush_output_stdout();
 	}
 }
 
@@ -2022,7 +2033,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return 0;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -2043,7 +2054,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		epb_string = "custom";
 		break;
 	}
-	fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
+	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
 
 	return 0;
 }
@@ -2066,14 +2077,14 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return 0;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
 	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
 		return 0;
 
-	fprintf(stderr, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
+	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
 		cpu, msr, (msr & (1 << 0)) ? "" : "No-");
 
 	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
@@ -2083,7 +2094,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
 		return 0;
 
-	fprintf(stderr, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
+	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
 			"(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
 			cpu, msr,
 			(unsigned int)HWP_HIGHEST_PERF(msr),
@@ -2094,7 +2105,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
 		return 0;
 
-	fprintf(stderr, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
+	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
 			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
 			cpu, msr,
 			(unsigned int)(((msr) >> 0) & 0xff),
@@ -2108,7 +2119,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
 			return 0;
 
-		fprintf(stderr, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
+		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
 			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
 			cpu, msr,
 			(unsigned int)(((msr) >> 0) & 0xff),
@@ -2121,7 +2132,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
 			return 0;
 
-		fprintf(stderr, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
+		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
 			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
 			cpu, msr,
 			((msr) & 0x1) ? "EN" : "Dis",
@@ -2130,7 +2141,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
 		return 0;
 
-	fprintf(stderr, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
+	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
 			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
 			cpu, msr,
 			((msr) & 0x1) ? "" : "No-",
@@ -2154,14 +2165,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 		return 0;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
 	if (do_core_perf_limit_reasons) {
 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
-		fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
-		fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
+		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
 			(msr & 1 << 15) ? "bit15, " : "",
 			(msr & 1 << 14) ? "bit14, " : "",
 			(msr & 1 << 13) ? "Transitions, " : "",
@@ -2176,7 +2187,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 			(msr & 1 << 2) ? "bit2, " : "",
 			(msr & 1 << 1) ? "ThermStatus, " : "",
 			(msr & 1 << 0) ? "PROCHOT, " : "");
-		fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
 			(msr & 1 << 31) ? "bit31, " : "",
 			(msr & 1 << 30) ? "bit30, " : "",
 			(msr & 1 << 29) ? "Transitions, " : "",
@@ -2195,8 +2206,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 	}
 	if (do_gfx_perf_limit_reasons) {
 		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
-		fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
-		fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)",
+		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
 			(msr & 1 << 0) ? "PROCHOT, " : "",
 			(msr & 1 << 1) ? "ThermStatus, " : "",
 			(msr & 1 << 4) ? "Graphics, " : "",
@@ -2205,7 +2216,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 			(msr & 1 << 9) ? "GFXPwr, " : "",
 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
-		fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n",
+		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
 			(msr & 1 << 16) ? "PROCHOT, " : "",
 			(msr & 1 << 17) ? "ThermStatus, " : "",
 			(msr & 1 << 20) ? "Graphics, " : "",
@@ -2217,15 +2228,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 	}
 	if (do_ring_perf_limit_reasons) {
 		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
-		fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
-		fprintf(stderr, " (Active: %s%s%s%s%s%s)",
+		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+		fprintf(outf, " (Active: %s%s%s%s%s%s)",
 			(msr & 1 << 0) ? "PROCHOT, " : "",
 			(msr & 1 << 1) ? "ThermStatus, " : "",
 			(msr & 1 << 6) ? "VR-Therm, " : "",
 			(msr & 1 << 8) ? "Amps, " : "",
 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
-		fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n",
+		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
 			(msr & 1 << 16) ? "PROCHOT, " : "",
 			(msr & 1 << 17) ? "ThermStatus, " : "",
 			(msr & 1 << 22) ? "VR-Therm, " : "",
@@ -2348,7 +2359,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 
 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
 	if (debug)
-		fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
+		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 
 	return;
 }
@@ -2390,7 +2401,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 		return 0;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -2399,7 +2410,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 			return 0;
 
 		dts = (msr >> 16) & 0x7F;
-		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
+		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
 			cpu, msr, tcc_activation_temp - dts);
 
 #ifdef	THERM_DEBUG
@@ -2408,7 +2419,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 
 		dts = (msr >> 16) & 0x7F;
 		dts2 = (msr >> 8) & 0x7F;
-		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
 #endif
 	}
@@ -2422,7 +2433,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 
 		dts = (msr >> 16) & 0x7F;
 		resolution = (msr >> 27) & 0xF;
-		fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
 			cpu, msr, tcc_activation_temp - dts, resolution);
 
 #ifdef THERM_DEBUG
@@ -2431,7 +2442,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 
 		dts = (msr >> 16) & 0x7F;
 		dts2 = (msr >> 8) & 0x7F;
-		fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
 #endif
 	}
@@ -2441,7 +2452,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 	
 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
 {
-	fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
+	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
 		cpu, label,
 		((msr >> 15) & 1) ? "EN" : "DIS",
 		((msr >> 0) & 0x7FFF) * rapl_power_units,
@@ -2465,7 +2476,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	cpu = t->cpu_id;
 	if (cpu_migrate(cpu)) {
-		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -2473,7 +2484,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return -1;
 
 	if (debug) {
-		fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
+		fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
 			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
 			rapl_power_units, rapl_energy_units, rapl_time_units);
 	}
@@ -2483,7 +2494,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                 	return -5;
 
 
-		fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
+		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
 			cpu, msr,
 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
@@ -2496,11 +2507,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
 			return -9;
 
-		fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
+		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
 			cpu, msr, (msr >> 63) & 1 ? "": "UN");
 
 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
-		fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
+		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
 			cpu,
 			((msr >> 47) & 1) ? "EN" : "DIS",
 			((msr >> 32) & 0x7FFF) * rapl_power_units,
@@ -2512,7 +2523,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
                 	return -6;
 
-		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
+		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
 			cpu, msr,
 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
@@ -2522,7 +2533,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (do_rapl & RAPL_DRAM) {
 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
 			return -9;
-		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
+		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
 				cpu, msr, (msr >> 31) & 1 ? "": "UN");
 
 		print_power_limit_msr(cpu, msr, "DRAM Limit");
@@ -2532,7 +2543,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
 				return -7;
 
-			fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+			fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
 		}
 	}
 	if (do_rapl & RAPL_CORES) {
@@ -2540,7 +2551,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
 				return -9;
-			fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
+			fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
 			print_power_limit_msr(cpu, msr, "Cores Limit");
 		}
@@ -2550,11 +2561,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			if (get_msr(cpu, MSR_PP1_POLICY, &msr))
 				return -8;
 
-			fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
+			fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
 
 			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
 				return -9;
-			fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
+			fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
 			print_power_limit_msr(cpu, msr, "GFX Limit");
 		}
@@ -2680,16 +2691,16 @@ double slm_bclk(void)
 	double freq;
 
 	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
-		fprintf(stderr, "SLM BCLK: unknown\n");
+		fprintf(outf, "SLM BCLK: unknown\n");
 
 	i = msr & 0xf;
 	if (i >= SLM_BCLK_FREQS) {
-		fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
+		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
 		msr = 3;
 	}
 	freq = slm_freq_table[i];
 
-	fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
+	fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
 
 	return freq;
 }
@@ -2732,13 +2743,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 
 	cpu = t->cpu_id;
 	if (cpu_migrate(cpu)) {
-		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
 	if (tcc_activation_temp_override != 0) {
 		tcc_activation_temp = tcc_activation_temp_override;
-		fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n",
+		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
 			cpu, tcc_activation_temp);
 		return 0;
 	}
@@ -2753,7 +2764,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 	target_c_local = (msr >> 16) & 0xFF;
 
 	if (debug)
-		fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
+		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
 			cpu, msr, target_c_local);
 
 	if (!target_c_local)
@@ -2765,7 +2776,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 
 guess:
 	tcc_activation_temp = TJMAX_DEFAULT;
-	fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
+	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
 		cpu, tcc_activation_temp);
 
 	return 0;
@@ -2776,7 +2787,7 @@ void decode_misc_enable_msr(void)
 	unsigned long long msr;
 
 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
-		fprintf(stderr, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
+		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
 			base_cpu, msr,
 			msr & (1 << 3) ? "TCC" : "",
 			msr & (1 << 16) ? "EIST" : "",
@@ -2798,7 +2809,7 @@ void decode_misc_pwr_mgmt_msr(void)
 		return;
 
 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
-		fprintf(stderr, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
+		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
 			base_cpu, msr,
 			msr & (1 << 0) ? "DIS" : "EN",
 			msr & (1 << 1) ? "EN" : "DIS");
@@ -2817,7 +2828,7 @@ void process_cpuid()
 		genuine_intel = 1;
 
 	if (debug)
-		fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
+		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
 			(char *)&ebx, (char *)&edx, (char *)&ecx);
 
 	__get_cpuid(1, &fms, &ebx, &ecx, &edx);
@@ -2828,9 +2839,9 @@ void process_cpuid()
 		model += ((fms >> 16) & 0xf) << 4;
 
 	if (debug) {
-		fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
 			max_level, family, model, stepping, family, model, stepping);
-		fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n",
+		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s\n",
 			ecx & (1 << 0) ? "SSE3" : "-",
 			ecx & (1 << 3) ? "MONITOR" : "-",
 			ecx & (1 << 7) ? "EIST" : "-",
@@ -2879,7 +2890,7 @@ void process_cpuid()
 	has_epb = ecx & (1 << 3);
 
 	if (debug)
-		fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
+		fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
 			has_aperf ? "" : "No-",
 			do_dts ? "" : "No-",
@@ -2907,7 +2918,7 @@ void process_cpuid()
 		if (ebx_tsc != 0) {
 
 			if (debug && (ebx != 0))
-				fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
+				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
 					eax_crystal, ebx_tsc, crystal_hz);
 
 			if (crystal_hz == 0)
@@ -2923,7 +2934,7 @@ void process_cpuid()
 			if (crystal_hz) {
 				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
 				if (debug)
-					fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
+					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
 						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
 			}
 		}
@@ -2938,7 +2949,7 @@ void process_cpuid()
 
 		__get_cpuid(0x16, &base_mhz, &max_mhz, &bus_mhz, &edx);
 		if (debug)
-			fprintf(stderr, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
+			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
 				base_mhz, max_mhz, bus_mhz);
 	}
 
@@ -2973,7 +2984,7 @@ void process_cpuid()
 
 void help()
 {
-	fprintf(stderr,
+	fprintf(outf,
 	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
 	"\n"
 	"Turbostat forks the specified COMMAND and prints statistics\n"
@@ -2985,6 +2996,7 @@ void help()
 	"--help		print this help message\n"
 	"--counter msr	print 32-bit counter at address \"msr\"\n"
 	"--Counter msr	print 64-bit Counter at address \"msr\"\n"
+	"--out file	create or truncate \"file\" for all output\n"
 	"--msr msr	print 32-bit value at address \"msr\"\n"
 	"--MSR msr	print 64-bit Value at address \"msr\"\n"
 	"--version	print version information\n"
@@ -3029,7 +3041,7 @@ void topology_probe()
 		show_cpu = 1;
 
 	if (debug > 1)
-		fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
+		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
 
 	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
 	if (cpus == NULL)
@@ -3064,7 +3076,7 @@ void topology_probe()
 
 		if (cpu_is_not_present(i)) {
 			if (debug > 1)
-				fprintf(stderr, "cpu%d NOT PRESENT\n", i);
+				fprintf(outf, "cpu%d NOT PRESENT\n", i);
 			continue;
 		}
 		cpus[i].core_id = get_core_id(i);
@@ -3079,26 +3091,26 @@ void topology_probe()
 		if (siblings > max_siblings)
 			max_siblings = siblings;
 		if (debug > 1)
-			fprintf(stderr, "cpu %d pkg %d core %d\n",
+			fprintf(outf, "cpu %d pkg %d core %d\n",
 				i, cpus[i].physical_package_id, cpus[i].core_id);
 	}
 	topo.num_cores_per_pkg = max_core_id + 1;
 	if (debug > 1)
-		fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
+		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
 			max_core_id, topo.num_cores_per_pkg);
 	if (debug && !summary_only && topo.num_cores_per_pkg > 1)
 		show_core = 1;
 
 	topo.num_packages = max_package_id + 1;
 	if (debug > 1)
-		fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
+		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
 			max_package_id, topo.num_packages);
 	if (debug && !summary_only && topo.num_packages > 1)
 		show_pkg = 1;
 
 	topo.num_threads_per_core = max_siblings;
 	if (debug > 1)
-		fprintf(stderr, "max_siblings %d\n", max_siblings);
+		fprintf(outf, "max_siblings %d\n", max_siblings);
 
 	free(cpus);
 }
@@ -3207,7 +3219,7 @@ void set_base_cpu(void)
 		err(-ENODEV, "No valid cpus found");
 
 	if (debug > 1)
-		fprintf(stderr, "base_cpu = %d\n", base_cpu);
+		fprintf(outf, "base_cpu = %d\n", base_cpu);
 }
 
 void turbostat_init()
@@ -3274,9 +3286,10 @@ int fork_it(char **argv)
 	for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
 	compute_average(EVEN_COUNTERS);
 	format_all_counters(EVEN_COUNTERS);
-	flush_stderr();
 
-	fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
+	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
+
+	flush_output_stderr();
 
 	return status;
 }
@@ -3293,13 +3306,13 @@ int get_and_dump_counters(void)
 	if (status)
 		return status;
 
-	flush_stdout();
+	flush_output_stdout();
 
 	return status;
 }
 
 void print_version() {
-	fprintf(stderr, "turbostat version 4.10 10 Dec, 2015"
+	fprintf(outf, "turbostat version 4.10 10 Dec, 2015"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -3317,6 +3330,7 @@ void cmdline(int argc, char **argv)
 		{"Joules",	no_argument,		0, 'J'},
 		{"MSR",		required_argument,	0, 'M'},
 		{"msr",		required_argument,	0, 'm'},
+		{"out",		required_argument,	0, 'o'},
 		{"Package",	no_argument,		0, 'p'},
 		{"processor",	no_argument,		0, 'p'},
 		{"Summary",	no_argument,		0, 'S'},
@@ -3327,7 +3341,7 @@ void cmdline(int argc, char **argv)
 
 	progname = argv[0];
 
-	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v",
+	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
 				long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'C':
@@ -3351,7 +3365,7 @@ void cmdline(int argc, char **argv)
 				double interval = strtod(optarg, NULL);
 
 				if (interval < 0.001) {
-					fprintf(stderr, "interval %f seconds is too small\n",
+					fprintf(outf, "interval %f seconds is too small\n",
 						interval);
 					exit(2);
 				}
@@ -3369,6 +3383,9 @@ void cmdline(int argc, char **argv)
 		case 'm':
 			sscanf(optarg, "%x", &extra_msr_offset32);
 			break;
+		case 'o':
+			outf = fopen_or_die(optarg, "w");
+			break;
 		case 'P':
 			show_pkg_only++;
 			break;
@@ -3391,6 +3408,8 @@ void cmdline(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
+	outf = stderr;
+
 	cmdline(argc, argv);
 
 	if (debug)
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 11/16] tools/power turbostat: fix compiler warnings
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (8 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 10/16] tools/power turbostat: add --out option for saving output in a file Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 12/16] tools/power turbostat: make fewer systems calls Len Brown
                     ` (4 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 5f9f41a..ad5bd56 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1987,7 +1987,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
 }
 
 static void
-dump_cstate_pstate_config_info(family, model)
+dump_cstate_pstate_config_info(int family, int model)
 {
 	if (!do_nhm_platform_info)
 		return;
@@ -2250,7 +2250,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
 
-double get_tdp(model)
+double get_tdp(int model)
 {
 	unsigned long long msr;
 
@@ -2364,7 +2364,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 	return;
 }
 
-void perf_limit_reasons_probe(family, model)
+void perf_limit_reasons_probe(int family, int model)
 {
 	if (!genuine_intel)
 		return;
@@ -2974,7 +2974,7 @@ void process_cpuid()
 	perf_limit_reasons_probe(family, model);
 
 	if (debug)
-		dump_cstate_pstate_config_info();
+		dump_cstate_pstate_config_info(family, model);
 
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 12/16] tools/power turbostat: make fewer systems calls
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (9 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 11/16] tools/power turbostat: fix compiler warnings Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 13/16] tools/power turbostat: show IRQs per CPU Len Brown
                     ` (3 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

skip the open(2)/close(2) on each msr read
by keeping the /dev/cpu/*/msr files open.

The remaining read(2) is generally far fewer cycles
than the removed open(2) system call.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 51 ++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ad5bd56..2e47c2b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -45,6 +45,7 @@
 
 char *proc_stat = "/proc/stat";
 FILE *outf;
+int *fd_percpu;
 struct timespec interval_ts = {5, 0};
 unsigned int debug;
 unsigned int rapl_joules;
@@ -270,23 +271,34 @@ int cpu_migrate(int cpu)
 	else
 		return 0;
 }
-
-int get_msr(int cpu, off_t offset, unsigned long long *msr)
+int get_msr_fd(int cpu)
 {
-	ssize_t retval;
 	char pathname[32];
 	int fd;
 
+	fd = fd_percpu[cpu];
+
+	if (fd)
+		return fd;
+
 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
 	fd = open(pathname, O_RDONLY);
 	if (fd < 0)
 		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
 
-	retval = pread(fd, msr, sizeof *msr, offset);
-	close(fd);
+	fd_percpu[cpu] = fd;
+
+	return fd;
+}
+
+int get_msr(int cpu, off_t offset, unsigned long long *msr)
+{
+	ssize_t retval;
+
+	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 
 	if (retval != sizeof *msr)
-		err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
+		err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
 
 	return 0;
 }
@@ -1453,19 +1465,30 @@ dump_config_tdp(void)
 		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
 	fprintf(outf, ")\n");
-	
+
 	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
 	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
 	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F);
 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
 	fprintf(outf, ")\n");
 }
+void free_fd_percpu(void)
+{
+	int i;
+
+	for (i = 0; i < topo.max_cpu_num; ++i) {
+		if (fd_percpu[i] != 0)
+			close(fd_percpu[i]);
+	}
+
+	free(fd_percpu);
+}
 
 void free_all_buffers(void)
 {
 	CPU_FREE(cpu_present_set);
 	cpu_present_set = NULL;
-	cpu_present_set = 0;
+	cpu_present_setsize = 0;
 
 	CPU_FREE(cpu_affinity_set);
 	cpu_affinity_set = NULL;
@@ -1490,6 +1513,8 @@ void free_all_buffers(void)
 	free(output_buffer);
 	output_buffer = NULL;
 	outp = NULL;
+
+	free_fd_percpu();
 }
 
 /*
@@ -2449,7 +2474,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 
 	return 0;
 }
-	
+
 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
 {
 	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
@@ -3202,10 +3227,16 @@ void allocate_output_buffer()
 	if (outp == NULL)
 		err(-1, "calloc output buffer");
 }
-
+void allocate_fd_percpu(void)
+{
+	fd_percpu = calloc(topo.max_cpu_num, sizeof(int));
+	if (fd_percpu == NULL)
+		err(-1, "calloc fd_percpu");
+}
 void setup_all_buffers(void)
 {
 	topology_probe();
+	allocate_fd_percpu();
 	allocate_counters(&thread_even, &core_even, &package_even);
 	allocate_counters(&thread_odd, &core_odd, &package_odd);
 	allocate_output_buffer();
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 13/16] tools/power turbostat: show IRQs per CPU
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (10 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 12/16] tools/power turbostat: make fewer systems calls Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 14/16] tools/power turbostat: show GFXMHz Len Brown
                     ` (2 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

The new IRQ column shows how many interrupts have occured on each CPU
during the measurement inteval.  This information comes from
the difference between /proc/interrupts shapshots made before
and after the measurement interval.

The first row, the system summary, shows the sum of the IRQS
for all CPUs during that interval.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 126 ++++++++++++++++++++++++++++++++--
 1 file changed, 122 insertions(+), 4 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2e47c2b..c679326 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -75,6 +75,7 @@ unsigned int extra_msr_offset64;
 unsigned int extra_delta_offset32;
 unsigned int extra_delta_offset64;
 unsigned int aperf_mperf_multiplier = 1;
+int do_irq = 1;
 int do_smi;
 double bclk;
 double base_hz;
@@ -154,6 +155,7 @@ struct thread_data {
 	unsigned long long extra_delta64;
 	unsigned long long extra_msr32;
 	unsigned long long extra_delta32;
+	unsigned int irq_count;
 	unsigned int smi_count;
 	unsigned int cpu_id;
 	unsigned int flags;
@@ -221,6 +223,9 @@ struct topo_params {
 
 struct timeval tv_even, tv_odd, tv_delta;
 
+int *irq_column_2_cpu;	/* /proc/interrupts column numbers */
+int *irqs_per_cpu;		/* indexed by cpu_num */
+
 void setup_all_buffers(void);
 
 int cpu_is_not_present(int cpu)
@@ -306,8 +311,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 /*
  * Example Format w/ field column widths:
  *
- *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
- * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
+ *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
+ * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
  */
 
 void print_header(void)
@@ -338,6 +343,8 @@ void print_header(void)
 	if (!debug)
 		goto done;
 
+	if (do_irq)
+		outp += sprintf(outp, "     IRQ");
 	if (do_smi)
 		outp += sprintf(outp, "     SMI");
 
@@ -429,6 +436,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 			extra_msr_offset32, t->extra_msr32);
 		outp += sprintf(outp, "msr0x%x: %016llX\n",
 			extra_msr_offset64, t->extra_msr64);
+		if (do_irq)
+			outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
 		if (do_smi)
 			outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
 	}
@@ -562,6 +571,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (!debug)
 		goto done;
 
+	/* IRQ */
+	if (do_irq)
+		outp += sprintf(outp, "%8d", t->irq_count);
+
 	/* SMI */
 	if (do_smi)
 		outp += sprintf(outp, "%8d", t->smi_count);
@@ -827,6 +840,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 	old->extra_msr32 = new->extra_msr32;
 	old->extra_msr64 = new->extra_msr64;
 
+	if (do_irq)
+		old->irq_count = new->irq_count - old->irq_count;
+
 	if (do_smi)
 		old->smi_count = new->smi_count - old->smi_count;
 }
@@ -856,10 +872,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	t->mperf = 0;
 	t->c1 = 0;
 
-	t->smi_count = 0;
 	t->extra_delta32 = 0;
 	t->extra_delta64 = 0;
 
+	t->irq_count = 0;
+	t->smi_count = 0;
+
 	/* tells format_counters to dump all fields from this set */
 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
 
@@ -903,6 +921,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	average.threads.extra_delta32 += t->extra_delta32;
 	average.threads.extra_delta64 += t->extra_delta64;
 
+	average.threads.irq_count += t->irq_count;
+	average.threads.smi_count += t->smi_count;
+
 	/* sum per-core values only for 1st thread in core */
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		return 0;
@@ -1000,7 +1021,6 @@ static unsigned long long rdtsc(void)
 	return low | ((unsigned long long)high) << 32;
 }
 
-
 /*
  * get_counters(...)
  * migrate to cpu
@@ -1027,6 +1047,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		t->mperf = t->mperf * aperf_mperf_multiplier;
 	}
 
+	if (do_irq)
+		t->irq_count = irqs_per_cpu[cpu];
 	if (do_smi) {
 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
 			return -5;
@@ -1515,6 +1537,9 @@ void free_all_buffers(void)
 	outp = NULL;
 
 	free_fd_percpu();
+
+	free(irq_column_2_cpu);
+	free(irqs_per_cpu);
 }
 
 /*
@@ -1737,6 +1762,83 @@ int mark_cpu_present(int cpu)
 	return 0;
 }
 
+/*
+ * snapshot_proc_interrupts()
+ *
+ * read and record summary of /proc/interrupts
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_proc_interrupts(void)
+{
+	static FILE *fp;
+	int column, retval;
+
+	if (fp == NULL)
+		fp = fopen_or_die("/proc/interrupts", "r");
+	else
+		rewind(fp);
+
+	/* read 1st line of /proc/interrupts to get cpu* name for each column */
+	for (column = 0; column < topo.num_cpus; ++column) {
+		int cpu_number;
+
+		retval = fscanf(fp, " CPU%d", &cpu_number);
+		if (retval != 1)
+			break;
+
+		if (cpu_number > topo.max_cpu_num) {
+			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
+			return 1;
+		}
+
+		irq_column_2_cpu[column] = cpu_number;
+		irqs_per_cpu[cpu_number] = 0;
+	}
+
+	/* read /proc/interrupt count lines and sum up irqs per cpu */
+	while (1) {
+		int column;
+		char buf[64];
+
+		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
+		if (retval != 1)
+			break;
+
+		/* read the count per cpu */
+		for (column = 0; column < topo.num_cpus; ++column) {
+
+			int cpu_number, irq_count;
+
+			retval = fscanf(fp, " %d", &irq_count);
+			if (retval != 1)
+				break;
+
+			cpu_number = irq_column_2_cpu[column];
+			irqs_per_cpu[cpu_number] += irq_count;
+
+		}
+
+		while (getc(fp) != '\n')
+			;	/* flush interrupt description */
+
+	}
+	return 0;
+}
+
+/*
+ * snapshot /proc and /sys files
+ *
+ * return 1 if configuration restart needed, else return 0
+ */
+int snapshot_proc_sysfs_files(void)
+{
+	if (snapshot_proc_interrupts())
+		return 1;
+
+	return 0;
+}
+
 void turbostat_loop()
 {
 	int retval;
@@ -1745,6 +1847,7 @@ void turbostat_loop()
 restart:
 	restarted++;
 
+	snapshot_proc_sysfs_files();
 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
 	if (retval < -1) {
 		exit(retval);
@@ -1764,6 +1867,8 @@ restart:
 			goto restart;
 		}
 		nanosleep(&interval_ts, NULL);
+		if (snapshot_proc_sysfs_files())
+			goto restart;
 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
 		if (retval < -1) {
 			exit(retval);
@@ -1778,6 +1883,8 @@ restart:
 		format_all_counters(EVEN_COUNTERS);
 		flush_output_stdout();
 		nanosleep(&interval_ts, NULL);
+		if (snapshot_proc_sysfs_files())
+			goto restart;
 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
 		if (retval < -1) {
 			exit(retval);
@@ -3233,9 +3340,20 @@ void allocate_fd_percpu(void)
 	if (fd_percpu == NULL)
 		err(-1, "calloc fd_percpu");
 }
+void allocate_irq_buffers(void)
+{
+	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
+	if (irq_column_2_cpu == NULL)
+		err(-1, "calloc %d", topo.num_cpus);
+
+	irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int));
+	if (irqs_per_cpu == NULL)
+		err(-1, "calloc %d", topo.max_cpu_num);
+}
 void setup_all_buffers(void)
 {
 	topology_probe();
+	allocate_irq_buffers();
 	allocate_fd_percpu();
 	allocate_counters(&thread_even, &core_even, &package_even);
 	allocate_counters(&thread_odd, &core_odd, &package_odd);
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 14/16] tools/power turbostat: show GFXMHz
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (11 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 13/16] tools/power turbostat: show IRQs per CPU Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 15/16] tools/power turbostat: show GFX%rc6 Len Brown
  2016-02-27  9:01   ` [PATCH 16/16] tools/power turbostat: detect and work around syscall jitter Len Brown
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

Under the column "GFXMHz", show a snapshot of this attribute:
/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz

This is an instantaneous snapshot of what sysfs presents
at the end of the measurement interval.  turbostat does
not average or otherwise perform any math on this value.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 50 ++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c679326..d599f91 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -90,6 +90,8 @@ char *output_buffer, *outp;
 unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
+unsigned int do_gfx_mhz;
+unsigned int gfx_cur_mhz;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
 double rapl_power_units, rapl_time_units;
@@ -183,6 +185,7 @@ struct pkg_data {
 	unsigned long long pkg_any_core_c0;
 	unsigned long long pkg_any_gfxe_c0;
 	unsigned long long pkg_both_core_gfxe_c0;
+	unsigned int gfx_mhz;
 	unsigned int package_id;
 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
@@ -311,7 +314,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 /*
  * Example Format w/ field column widths:
  *
- *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
+ *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
  * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
  */
 
@@ -362,6 +365,9 @@ void print_header(void)
 	if (do_ptm)
 		outp += sprintf(outp, "  PkgTmp");
 
+	if (do_gfx_mhz)
+		outp += sprintf(outp, "  GFXMHz");
+
 	if (do_skl_residency) {
 		outp += sprintf(outp, " Totl%%C0");
 		outp += sprintf(outp, "  Any%%C0");
@@ -608,6 +614,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (do_ptm)
 		outp += sprintf(outp, "%8d", p->pkg_temp_c);
 
+	/* GFXMHz */
+	if (do_gfx_mhz)
+		outp += sprintf(outp, "%8d", p->gfx_mhz);
+
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
 	if (do_skl_residency) {
 		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
@@ -746,6 +756,8 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
 	old->pc10 = new->pc10 - old->pc10;
 	old->pkg_temp_c = new->pkg_temp_c;
 
+	old->gfx_mhz = new->gfx_mhz;
+
 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
 	DELTA_WRAP32(new->energy_cores, old->energy_cores);
 	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
@@ -909,6 +921,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	p->rapl_pkg_perf_status = 0;
 	p->rapl_dram_perf_status = 0;
 	p->pkg_temp_c = 0;
+
+	p->gfx_mhz = 0;
 }
 int sum_counters(struct thread_data *t, struct core_data *c,
 	struct pkg_data *p)
@@ -961,6 +975,8 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	average.packages.energy_cores += p->energy_cores;
 	average.packages.energy_gfx += p->energy_gfx;
 
+	average.packages.gfx_mhz = p->gfx_mhz;
+
 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
 
 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
@@ -1176,6 +1192,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -17;
 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
 	}
+	if (do_gfx_mhz)
+		p->gfx_mhz = gfx_cur_mhz;
+
 	return 0;
 }
 
@@ -1825,6 +1844,30 @@ int snapshot_proc_interrupts(void)
 	}
 	return 0;
 }
+/*
+ * snapshot_gfx_mhz()
+ *
+ * record snapshot of
+ * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_gfx_mhz(void)
+{
+	static FILE *fp;
+	int retval;
+
+	if (fp == NULL)
+		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
+	else
+		rewind(fp);
+
+	retval = fscanf(fp, "%d", &gfx_cur_mhz);
+	if (retval != 1)
+		err(1, "GFX MHz");
+
+	return 0;
+}
 
 /*
  * snapshot /proc and /sys files
@@ -1836,6 +1879,9 @@ int snapshot_proc_sysfs_files(void)
 	if (snapshot_proc_interrupts())
 		return 1;
 
+	if (do_gfx_mhz)
+		snapshot_gfx_mhz();
+
 	return 0;
 }
 
@@ -3111,6 +3157,8 @@ void process_cpuid()
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
 
+	do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
+
 	return;
 }
 
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 15/16] tools/power turbostat: show GFX%rc6
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (12 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 14/16] tools/power turbostat: show GFXMHz Len Brown
@ 2016-02-27  9:01   ` Len Brown
  2016-02-27  9:01   ` [PATCH 16/16] tools/power turbostat: detect and work around syscall jitter Len Brown
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

The column "GFX%c6" show the percentage of time the GPU
is in the "render C6" state, rc6.  Deep package C-states on several
systems depend on the GPU being in RC6.

This information comes from the counter
/sys/class/drm/card0/power/rc6_residency_ms,
as read before and after the measurement interval.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 45 +++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d599f91..9896619 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -90,6 +90,8 @@ char *output_buffer, *outp;
 unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
+unsigned int do_gfx_rc6_ms;
+unsigned long long  gfx_cur_rc6_ms;
 unsigned int do_gfx_mhz;
 unsigned int gfx_cur_mhz;
 unsigned int tcc_activation_temp;
@@ -185,6 +187,7 @@ struct pkg_data {
 	unsigned long long pkg_any_core_c0;
 	unsigned long long pkg_any_gfxe_c0;
 	unsigned long long pkg_both_core_gfxe_c0;
+	unsigned long long gfx_rc6_ms;
 	unsigned int gfx_mhz;
 	unsigned int package_id;
 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
@@ -365,6 +368,9 @@ void print_header(void)
 	if (do_ptm)
 		outp += sprintf(outp, "  PkgTmp");
 
+	if (do_gfx_rc6_ms)
+		outp += sprintf(outp, " GFX%%rc6");
+
 	if (do_gfx_mhz)
 		outp += sprintf(outp, "  GFXMHz");
 
@@ -614,6 +620,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (do_ptm)
 		outp += sprintf(outp, "%8d", p->pkg_temp_c);
 
+	/* GFXrc6 */
+	if (do_gfx_rc6_ms)
+		outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float);
+
 	/* GFXMHz */
 	if (do_gfx_mhz)
 		outp += sprintf(outp, "%8d", p->gfx_mhz);
@@ -756,6 +766,7 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
 	old->pc10 = new->pc10 - old->pc10;
 	old->pkg_temp_c = new->pkg_temp_c;
 
+	old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
 	old->gfx_mhz = new->gfx_mhz;
 
 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
@@ -922,6 +933,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	p->rapl_dram_perf_status = 0;
 	p->pkg_temp_c = 0;
 
+	p->gfx_rc6_ms = 0;
 	p->gfx_mhz = 0;
 }
 int sum_counters(struct thread_data *t, struct core_data *c,
@@ -975,6 +987,7 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	average.packages.energy_cores += p->energy_cores;
 	average.packages.energy_gfx += p->energy_gfx;
 
+	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
 	average.packages.gfx_mhz = p->gfx_mhz;
 
 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
@@ -1192,6 +1205,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -17;
 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
 	}
+
+	if (do_gfx_rc6_ms)
+		p->gfx_rc6_ms = gfx_cur_rc6_ms;
+
 	if (do_gfx_mhz)
 		p->gfx_mhz = gfx_cur_mhz;
 
@@ -1845,6 +1862,29 @@ int snapshot_proc_interrupts(void)
 	return 0;
 }
 /*
+ * snapshot_gfx_rc6_ms()
+ *
+ * record snapshot of
+ * /sys/class/drm/card0/power/rc6_residency_ms
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_gfx_rc6_ms(void)
+{
+	FILE *fp;
+	int retval;
+
+	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
+
+	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
+	if (retval != 1)
+		err(1, "GFX rc6");
+
+	fclose(fp);
+
+	return 0;
+}
+/*
  * snapshot_gfx_mhz()
  *
  * record snapshot of
@@ -1879,6 +1919,9 @@ int snapshot_proc_sysfs_files(void)
 	if (snapshot_proc_interrupts())
 		return 1;
 
+	if (do_gfx_rc6_ms)
+		snapshot_gfx_rc6_ms();
+
 	if (do_gfx_mhz)
 		snapshot_gfx_mhz();
 
@@ -3157,6 +3200,8 @@ void process_cpuid()
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
 
+	do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
+
 	do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
 
 	return;
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 16/16] tools/power turbostat: detect and work around syscall jitter
  2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
                     ` (13 preceding siblings ...)
  2016-02-27  9:01   ` [PATCH 15/16] tools/power turbostat: show GFX%rc6 Len Brown
@ 2016-02-27  9:01   ` Len Brown
  14 siblings, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-02-27  9:01 UTC (permalink / raw)
  To: linux-pm; +Cc: Len Brown

From: Len Brown <len.brown@intel.com>

The accuracy of Bzy_Mhz and Busy% depend on reading
the TSC, APERF, and MPERF close together in time.

When there is a very short measurement interval,
or a large system is profoundly idle, the changes
in APERF and MPERF may be very small.
They can be small enough that an expensive interrupt
between reading APERF and MPERF can cause the APERF/MPERF
ratio to become inaccurate, resulting in invalid
calculation and display of Bzy_MHz.

A dummy APERF read of APERF makes this problem
much more rare.  Apparently this 1st systemn call
after exiting a long stretch of idle is when we
typically see expensive timer interrupts that cause
large jitter.

For the cases that dummy APERF read fails to prevent,
we compare the latency of the APERF and MPERF reads.
If they differ by more than 2x, we re-issue them.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 51 ++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9896619..2c43527 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1059,19 +1059,68 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
 	int cpu = t->cpu_id;
 	unsigned long long msr;
+	int aperf_mperf_retry_count = 0;
 
 	if (cpu_migrate(cpu)) {
 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
+retry:
 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
 
 	if (has_aperf) {
+		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+
+		/*
+		 * The TSC, APERF and MPERF must be read together for
+		 * APERF/MPERF and MPERF/TSC to give accurate results.
+		 *
+		 * Unfortunately, APERF and MPERF are read by
+		 * individual system call, so delays may occur
+		 * between them.  If the time to read them
+		 * varies by a large amount, we re-read them.
+		 */
+
+		/*
+		 * This initial dummy APERF read has been seen to
+		 * reduce jitter in the subsequent reads.
+		 */
+
+		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+			return -3;
+
+		t->tsc = rdtsc();	/* re-read close to APERF */
+
+		tsc_before = t->tsc;
+
 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
 			return -3;
+
+		tsc_between = rdtsc();
+
 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
 			return -4;
+
+		tsc_after = rdtsc();
+
+		aperf_time = tsc_between - tsc_before;
+		mperf_time = tsc_after - tsc_between;
+
+		/*
+		 * If the system call latency to read APERF and MPERF
+		 * differ by more than 2x, then try again.
+		 */
+		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+			aperf_mperf_retry_count++;
+			if (aperf_mperf_retry_count < 5)
+				goto retry;
+			else
+				warnx("cpu%d jitter %lld %lld\n",
+					cpu, aperf_time, mperf_time);
+		}
+		aperf_mperf_retry_count = 0;
+
 		t->aperf = t->aperf * aperf_mperf_multiplier;
 		t->mperf = t->mperf * aperf_mperf_multiplier;
 	}
@@ -3554,7 +3603,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 4.10 10 Dec, 2015"
+	fprintf(outf, "turbostat version 4.11 27 Feb 2016"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-- 
2.7.1.339.g0233b80


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* RE: [PATCH 06/16] tools/power turbostat: allow sub-sec intervals
  2016-02-27  9:01   ` [PATCH 06/16] tools/power turbostat: allow sub-sec intervals Len Brown
@ 2016-02-28  3:43     ` Doug Smythies
  2016-03-01  2:07       ` Brown, Len
  2016-03-13  7:57       ` Len Brown
  0 siblings, 2 replies; 20+ messages in thread
From: Doug Smythies @ 2016-02-28  3:43 UTC (permalink / raw)
  To: 'Len Brown'; +Cc: 'Len Brown', linux-pm

Hi Len,

On 2016.02.27 01:01 Len Brown wrote:

> From: Len Brown <len.brown@intel.com>
>
> turbostat -i interval_sec
>
> will sample and display statistics every interval_sec.
> interval_sec used to be a whole number of seconds,
> but now we accept a decimal, as small as 0.001 sec (1 ms).

Shouldn't you be cautious allowing this?
At such a high sampling rate, the user can significantly
effect the system being measured with turbostat's own overhead.

I tried down to 1 millisecond, and noted greatly
reduced C6 time (my processor only goes to C6), greatly increased
busy% for all CPU's, over 4 watts extra package power, ...

>
> Signed-off-by: Len Brown <len.brown@intel.com>
> ---
> tools/power/x86/turbostat/turbostat.8 |  2 +-
> tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++----
> 2 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
> index 622db68..8a9727b 100644
> --- a/tools/power/x86/turbostat/turbostat.8
> +++ b/tools/power/x86/turbostat/turbostat.8
> @@ -34,7 +34,7 @@ name as necessary to disambiguate it from others is necessary.  Note that option
>  \fB--debug\fP displays additional system configuration information.  Invoking this parameter
>  more than once may also enable internal turbostat debug information.
>  .PP
> -\fB--interval seconds\fP overrides the default 5-second measurement interval.
> +\fB--interval decimal_seconds\fP overrides the default 5.0 second measurement interval.
>  .PP
>  \fB--help\fP displays usage for the most common parameters.
>  .PP

Don't you also need to update these lines?:

- .RB [ "\--interval seconds" ]
+ .RB [ "\--interval decimal_seconds" ]

- The 5-second interval can be changed with th "-i sec" option.
+ The 5-second interval can be changed with the "--interval decimal_seconds" option.

> diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
> index c600340..e411cc4 100644
> --- a/tools/power/x86/turbostat/turbostat.c
> +++ b/tools/power/x86/turbostat/turbostat.c
> @@ -38,12 +38,13 @@
>  #include <string.h>
>  #include <ctype.h>
>  #include <sched.h>
> +#include <time.h>
>  #include <cpuid.h>
>  #include <linux/capability.h>
>  #include <errno.h>
> 
>  char *proc_stat = "/proc/stat";
> -unsigned int interval_sec = 5;
> +struct timespec interval_ts = {5, 0};
>  unsigned int debug;
>  unsigned int rapl_joules;
>  unsigned int summary_only;
> @@ -1728,7 +1729,7 @@ restart:
>  			re_initialize();
>  			goto restart;
>  		}
> -		sleep(interval_sec);
> +		nanosleep(&interval_ts, NULL);
>  		retval = for_all_cpus(get_counters, ODD_COUNTERS);
>  		if (retval < -1) {
>  			exit(retval);
> @@ -1742,7 +1743,7 @@ restart:
>  		compute_average(EVEN_COUNTERS);
>  		format_all_counters(EVEN_COUNTERS);
>  		flush_stdout();
> -		sleep(interval_sec);
> +		nanosleep(&interval_ts, NULL);

In the limit of a very high sampling rate, doesn't the assumption
that the time spent in turbostat itself is negligible become
invalid?
And shouldn't the sleep time be compensated accordingly?

Perhaps something like (sorry for format, and not tested):

void __usleep(int us) {
   struct timespec time;

   time.tv_sec = us / 1000;
   time.tv_nsec = (us - time.tv_sec * 1000) * 1000;
   while(nanosleep(&time, &time) == -1 && errno == EINTR)
      continue;
}

unsigned long long stamp(void){
   struct timeval tv;

   gettimeofday(&tv, NULL);

   return (unsigned long long)tv.tv_sec * 1000000 + tv.tv_usec;
} /* endprocedure */

   now = stamp();
   if ((long long)(now - begin) < total ) {
      current_sleep = total - (now - begin);
      __usleep(current_sleep);
   } /* endif */
   begin += total;

Where total is the sample interval, in uSec, and begin was initialized
somewhere outside this loop.

Even if the time spent in turbostat is still almost negligible,
this will prevent sampling time error accumulation, by sleeping just
a little less every so often. However, there is also some extra
overhead, which is also undesirable.

>  		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
>  		if (retval < -1) {
>  			exit(retval);
> @@ -3347,7 +3348,18 @@ void cmdline(int argc, char **argv)
>  			help();
>  			exit(1);
>  		case 'i':
> -			interval_sec = atoi(optarg);
> +			{
> +				double interval = strtod(optarg, NULL);
> +
> +				if (interval < 0.001) {
> +					fprintf(stderr, "interval %f seconds is too small\n",
> +						interval);
> +					exit(2);
> +				}
> +
> +				interval_ts.tv_sec = interval;
> +				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
> +			}
>  			break;
>  		case 'J':
>  			rapl_joules++;
> -- 
> 2.7.1.339.g0233b80

... Doug



^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH 06/16] tools/power turbostat: allow sub-sec intervals
  2016-02-28  3:43     ` Doug Smythies
@ 2016-03-01  2:07       ` Brown, Len
  2016-03-13  7:57       ` Len Brown
  1 sibling, 0 replies; 20+ messages in thread
From: Brown, Len @ 2016-03-01  2:07 UTC (permalink / raw)
  To: Doug Smythies, 'Len Brown'; +Cc: linux-pm@vger.kernel.org

> > From: Len Brown <len.brown@intel.com>
> >
> > turbostat -i interval_sec
> >
> > will sample and display statistics every interval_sec.
> > interval_sec used to be a whole number of seconds,
> > but now we accept a decimal, as small as 0.001 sec (1 ms).
> 
> Shouldn't you be cautious allowing this?
> At such a high sampling rate, the user can significantly
> effect the system being measured with turbostat's own overhead.
>
> I tried down to 1 millisecond, and noted greatly
> reduced C6 time (my processor only goes to C6), greatly increased
> busy% for all CPU's, over 4 watts extra package power, ...

turbostat provides a 5.000 second default sampling interval.
If a user chooses to explicitly change that default,
I have to trust that they are doing it for a good reason.

I agree, measuring deep idle state residency would not be a good reason.

cheers,
-Len


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 06/16] tools/power turbostat: allow sub-sec intervals
  2016-02-28  3:43     ` Doug Smythies
  2016-03-01  2:07       ` Brown, Len
@ 2016-03-13  7:57       ` Len Brown
  1 sibling, 0 replies; 20+ messages in thread
From: Len Brown @ 2016-03-13  7:57 UTC (permalink / raw)
  To: Doug Smythies; +Cc: Len Brown, Linux PM list

Doug,

Thank you for closely reviewing these changes.

> Don't you also need to update these lines?:
>
> - .RB [ "\--interval seconds" ]
> + .RB [ "\--interval decimal_seconds" ]
>
> - The 5-second interval can be changed with th "-i sec" option.
> + The 5-second interval can be changed with the "--interval decimal_seconds" option.

You are right, this language became inconsistent.
Now that you've pointed it out, I think I like it the old way better
-- I've made them all
simply "seconds" (and the default is indicated as the decimal value 5.0)

>> +             nanosleep(&interval_ts, NULL);
>
> In the limit of a very high sampling rate, doesn't the assumption
> that the time spent in turbostat itself is negligible become
> invalid?
> And shouldn't the sleep time be compensated accordingly?

> Where total is the sample interval, in uSec, and begin was initialized
> somewhere outside this loop.
>
> Even if the time spent in turbostat is still almost negligible,
> this will prevent sampling time error accumulation, by sleeping just
> a little less every so often. However, there is also some extra
> overhead, which is also undesirable.

Turbostat isn't used to measure time, it just reports time.
So I think it is okay if the intervals drift.
There could be a lot of things that delay turbostat from running
exactly when its timeouts requested.

The important thing is that it accurately report for whatever
interval it does wake up on.

That said, you are right, for a very fast interval, the overhead
of turbostat itself becomes measurable.

The problem that I've observed is that on a very large
systems (eg. well over 100 processors), the timestamp taken
on the 1st processor is getting somewhat stale by the time
we get to the last processor.  For this reason it is on my list
to do per-cpu timetamps, which are taken when aperf/mperf
are taken.

When we get a vectored msr read inside the kernel, we can
shrink that overhead to mitigate its effect, however, I'll still
want to take a timestamp on each cpu.

thanks,
Len Brown, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2016-03-13  7:57 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-02-27  9:00 [PATCH 0/16] turbostat updates Len Brown
2016-02-27  9:00 ` [PATCH 01/16] tools/power turbostat: decode more CPUID fields Len Brown
2016-02-27  9:00   ` [PATCH 02/16] tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency Len Brown
2016-02-27  9:00   ` [PATCH 03/16] x86 msr-index: Simplify syntax for HWP fields Len Brown
2016-02-27  9:00   ` [PATCH 04/16] tools/power turbostat: decode HWP registers Len Brown
2016-02-27  9:00   ` [PATCH 05/16] tools/power turbostat: Decode MSR_MISC_PWR_MGMT Len Brown
2016-02-27  9:01   ` [PATCH 06/16] tools/power turbostat: allow sub-sec intervals Len Brown
2016-02-28  3:43     ` Doug Smythies
2016-03-01  2:07       ` Brown, Len
2016-03-13  7:57       ` Len Brown
2016-02-27  9:01   ` [PATCH 07/16] tools/power turbostat: Intel Xeon x200: fix erroneous bclk value Len Brown
2016-02-27  9:01   ` [PATCH 08/16] tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding Len Brown
2016-02-27  9:01   ` [PATCH 09/16] tools/power turbostat: re-name "%Busy" field to "Busy%" Len Brown
2016-02-27  9:01   ` [PATCH 10/16] tools/power turbostat: add --out option for saving output in a file Len Brown
2016-02-27  9:01   ` [PATCH 11/16] tools/power turbostat: fix compiler warnings Len Brown
2016-02-27  9:01   ` [PATCH 12/16] tools/power turbostat: make fewer systems calls Len Brown
2016-02-27  9:01   ` [PATCH 13/16] tools/power turbostat: show IRQs per CPU Len Brown
2016-02-27  9:01   ` [PATCH 14/16] tools/power turbostat: show GFXMHz Len Brown
2016-02-27  9:01   ` [PATCH 15/16] tools/power turbostat: show GFX%rc6 Len Brown
2016-02-27  9:01   ` [PATCH 16/16] tools/power turbostat: detect and work around syscall jitter Len Brown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).