* [PATCH 2/5] tools/power turbostat: enable turbostat to support Knights Landing (KNL)
2015-05-27 22:08 ` [PATCH 1/5] tools/power turbostat: correctly display more than 2 threads/core Len Brown
@ 2015-05-27 22:08 ` Len Brown
2015-05-27 22:08 ` [PATCH 3/5] tools/power turbostat: correctly decode of ENERGY_PERFORMANCE_BIAS Len Brown
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Len Brown @ 2015-05-27 22:08 UTC (permalink / raw)
To: linux-pm; +Cc: Dasaratharaman Chandramouli, Len Brown
From: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
Changes mainly to account for minor differences in Knights Landing(KNL):
1. KNL supports C1 and C6 core states.
2. KNL supports PC2, PC3 and PC6 package states.
3. KNL has a different encoding of the TURBO_RATIO_LIMIT MSR
Signed-off-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
arch/x86/include/uapi/asm/msr-index.h | 1 +
tools/power/x86/turbostat/turbostat.c | 105 ++++++++++++++++++++++++++++++++--
2 files changed, 102 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index c469490..3c6bb34 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -140,6 +140,7 @@
#define MSR_CORE_C3_RESIDENCY 0x000003fc
#define MSR_CORE_C6_RESIDENCY 0x000003fd
#define MSR_CORE_C7_RESIDENCY 0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
#define MSR_PKG_C2_RESIDENCY 0x0000060d
#define MSR_PKG_C8_RESIDENCY 0x00000630
#define MSR_PKG_C9_RESIDENCY 0x00000631
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d85adba..256a5e1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -52,6 +52,7 @@ unsigned int skip_c0;
unsigned int skip_c1;
unsigned int do_nhm_cstates;
unsigned int do_snb_cstates;
+unsigned int do_knl_cstates;
unsigned int do_pc2;
unsigned int do_pc3;
unsigned int do_pc6;
@@ -316,7 +317,7 @@ void print_header(void)
if (do_nhm_cstates)
outp += sprintf(outp, " CPU%%c1");
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
outp += sprintf(outp, " CPU%%c3");
if (do_nhm_cstates)
outp += sprintf(outp, " CPU%%c6");
@@ -546,7 +547,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
if (do_nhm_cstates)
outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
@@ -1018,14 +1019,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
- if (do_nhm_cstates && !do_slm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
}
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
+ } else if (do_knl_cstates) {
+ if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+ return -7;
}
if (do_snb_cstates)
@@ -1296,6 +1300,67 @@ dump_nhm_turbo_ratio_limits(void)
}
static void
+dump_knl_turbo_ratio_limits(void)
+{
+ int cores;
+ unsigned int ratio;
+ unsigned long long msr;
+ int delta_cores;
+ int delta_ratio;
+ int i;
+
+ get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+
+ fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n",
+ msr);
+
+ /**
+ * Turbo encoding in KNL is as follows:
+ * [7:0] -- Base value of number of active cores of bucket 1.
+ * [15:8] -- Base value of freq ratio of bucket 1.
+ * [20:16] -- +ve delta of number of active cores of bucket 2.
+ * i.e. active cores of bucket 2 =
+ * active cores of bucket 1 + delta
+ * [23:21] -- Negative delta of freq ratio of bucket 2.
+ * i.e. freq ratio of bucket 2 =
+ * freq ratio of bucket 1 - delta
+ * [28:24]-- +ve delta of number of active cores of bucket 3.
+ * [31:29]-- -ve delta of freq ratio of bucket 3.
+ * [36:32]-- +ve delta of number of active cores of bucket 4.
+ * [39:37]-- -ve delta of freq ratio of bucket 4.
+ * [44:40]-- +ve delta of number of active cores of bucket 5.
+ * [47:45]-- -ve delta of freq ratio of bucket 5.
+ * [52:48]-- +ve delta of number of active cores of bucket 6.
+ * [55:53]-- -ve delta of freq ratio of bucket 6.
+ * [60:56]-- +ve delta of number of active cores of bucket 7.
+ * [63:61]-- -ve delta of freq ratio of bucket 7.
+ */
+ cores = msr & 0xFF;
+ ratio = (msr >> 8) && 0xFF;
+ if (ratio > 0)
+ fprintf(stderr,
+ "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, cores);
+
+ for (i = 16; i < 64; i = i + 8) {
+ delta_cores = (msr >> i) & 0x1F;
+ delta_ratio = (msr >> (i + 5)) && 0x7;
+ if (!delta_cores || !delta_ratio)
+ return;
+ cores = cores + delta_cores;
+ ratio = ratio - delta_ratio;
+
+ /** -ve ratios will make successive ratio calculations
+ * negative. Hence return instead of carrying on.
+ */
+ if (ratio > 0)
+ fprintf(stderr,
+ "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, cores);
+ }
+}
+
+static void
dump_nhm_cst_cfg(void)
{
unsigned long long msr;
@@ -1788,6 +1853,21 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
}
}
+int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case 0x57: /* Knights Landing */
+ return 1;
+ default:
+ return 0;
+ }
+}
static void
dump_cstate_pstate_config_info(family, model)
{
@@ -1805,6 +1885,9 @@ dump_cstate_pstate_config_info(family, model)
if (has_nhm_turbo_ratio_limit(family, model))
dump_nhm_turbo_ratio_limits();
+ if (has_knl_turbo_ratio_limit(family, model))
+ dump_knl_turbo_ratio_limits();
+
dump_nhm_cst_cfg();
}
@@ -1985,6 +2068,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units)
case 0x3F: /* HSX */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
+ case 0x57: /* KNL */
return (rapl_dram_energy_units = 15.3 / 1000000);
default:
return (rapl_energy_units);
@@ -2026,6 +2110,7 @@ void rapl_probe(unsigned int family, unsigned int model)
case 0x3F: /* HSX */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
+ case 0x57: /* KNL */
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
break;
case 0x2D:
@@ -2366,6 +2451,17 @@ int is_slm(unsigned int family, unsigned int model)
return 0;
}
+int is_knl(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+ switch (model) {
+ case 0x57: /* KNL */
+ return 1;
+ }
+ return 0;
+}
+
#define SLM_BCLK_FREQS 5
double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
@@ -2576,6 +2672,7 @@ void process_cpuid()
do_c8_c9_c10 = has_hsw_msrs(family, model);
do_skl_residency = has_skl_msrs(family, model);
do_slm_cstates = is_slm(family, model);
+ do_knl_cstates = is_knl(family, model);
bclk = discover_bclk(family, model);
rapl_probe(family, model);
--
2.4.1.314.g9532ead
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 4/5] tools/power turbostat: allow running without cpu0
2015-05-27 22:08 ` [PATCH 1/5] tools/power turbostat: correctly display more than 2 threads/core Len Brown
2015-05-27 22:08 ` [PATCH 2/5] tools/power turbostat: enable turbostat to support Knights Landing (KNL) Len Brown
2015-05-27 22:08 ` [PATCH 3/5] tools/power turbostat: correctly decode of ENERGY_PERFORMANCE_BIAS Len Brown
@ 2015-05-27 22:08 ` Len Brown
2015-05-27 22:08 ` [PATCH 5/5] tools/power turbostat: update version number to 4.7 Len Brown
3 siblings, 0 replies; 6+ messages in thread
From: Len Brown @ 2015-05-27 22:08 UTC (permalink / raw)
To: linux-pm; +Cc: Prarit Bhargava, Len Brown
From: Prarit Bhargava <prarit@redhat.com>
Linux-3.7 added CONFIG_BOOTPARAM_HOTPLUG_CPU0,
allowing systems to offline cpu0.
But when cpu0 is offline, turbostat will not run:
# turbostat ls
turbostat: no /dev/cpu/0/msr
This patch replaces the hard-coded use of cpu0 in turbostat
with the current cpu, allowing it to run without a cpu0.
Fewer cross-calls may also be needed due to use of current cpu,
though this hard-coding was used only for the --debug preamble.
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
tools/power/x86/turbostat/turbostat.c | 46 +++++++++++++++++++++++------------
1 file changed, 31 insertions(+), 15 deletions(-)
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index f92211e..68e77fd 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -92,6 +92,7 @@ unsigned int do_gfx_perf_limit_reasons;
unsigned int do_ring_perf_limit_reasons;
unsigned int crystal_hz;
unsigned long long tsc_hz;
+int base_cpu;
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -1154,7 +1155,7 @@ dump_nhm_platform_info(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
+ get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
@@ -1166,7 +1167,7 @@ dump_nhm_platform_info(void)
fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
ratio, bclk, ratio * bclk);
- get_msr(0, MSR_IA32_POWER_CTL, &msr);
+ get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
msr, msr & 0x2 ? "EN" : "DIS");
@@ -1179,7 +1180,7 @@ dump_hsw_turbo_ratio_limits(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr);
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
@@ -1201,7 +1202,7 @@ dump_ivt_turbo_ratio_limits(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr);
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
@@ -1253,7 +1254,7 @@ dump_nhm_turbo_ratio_limits(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr);
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
@@ -1309,7 +1310,7 @@ dump_knl_turbo_ratio_limits(void)
int delta_ratio;
int i;
- get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+ get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n",
msr);
@@ -1365,7 +1366,7 @@ dump_nhm_cst_cfg(void)
{
unsigned long long msr;
- get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+ get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
@@ -1694,8 +1695,10 @@ restart:
void check_dev_msr()
{
struct stat sb;
+ char pathname[32];
- if (stat("/dev/cpu/0/msr", &sb))
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
@@ -1708,6 +1711,7 @@ void check_permissions()
cap_user_data_t cap_data = &cap_data_data;
extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
int do_exit = 0;
+ char pathname[32];
/* check for CAP_SYS_RAWIO */
cap_header->pid = getpid();
@@ -1722,7 +1726,8 @@ void check_permissions()
}
/* test file permissions */
- if (euidaccess("/dev/cpu/0/msr", R_OK)) {
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (euidaccess(pathname, R_OK)) {
do_exit++;
warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
}
@@ -1804,7 +1809,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
default:
return 0;
}
- get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+ get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
@@ -2043,7 +2048,7 @@ double get_tdp(model)
unsigned long long msr;
if (do_rapl & RAPL_PKG_POWER_INFO)
- if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+ if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
switch (model) {
@@ -2126,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model)
}
/* units on package 0, verify later other packages match */
- if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
+ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
return;
rapl_power_units = 1.0 / (1 << (msr & 0xF));
@@ -2471,7 +2476,7 @@ double slm_bclk(void)
unsigned int i;
double freq;
- if (get_msr(0, MSR_FSB_FREQ, &msr))
+ if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
fprintf(stderr, "SLM BCLK: unknown\n");
i = msr & 0xf;
@@ -2539,7 +2544,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
if (!do_nhm_platform_info)
goto guess;
- if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
+ if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
goto guess;
target_c_local = (msr >> 16) & 0xFF;
@@ -2913,13 +2918,24 @@ void setup_all_buffers(void)
for_all_proc_cpus(initialize_counters);
}
+void set_base_cpu(void)
+{
+ base_cpu = sched_getcpu();
+ if (base_cpu < 0)
+ err(-ENODEV, "No valid cpus found");
+
+ if (debug > 1)
+ fprintf(stderr, "base_cpu = %d\n", base_cpu);
+}
+
void turbostat_init()
{
+ setup_all_buffers();
+ set_base_cpu();
check_dev_msr();
check_permissions();
process_cpuid();
- setup_all_buffers();
if (debug)
for_all_cpus(print_epb, ODD_COUNTERS);
--
2.4.1.314.g9532ead
^ permalink raw reply related [flat|nested] 6+ messages in thread