* [PATCH 1/2] x86/mwait_idle: stop using driver_data for static flags
2013-04-16 8:40 [PATCH 0/2] x86/mwait_idle: pull in changes from Linux 3.9 Jan Beulich
@ 2013-04-16 8:47 ` Jan Beulich
2013-04-16 8:48 ` [PATCH 2/2] x86/mwait_idle: support Haswell Jan Beulich
2013-04-18 8:46 ` [PATCH 0/2] x86/mwait_idle: pull in changes from Linux 3.9 Jan Beulich
2 siblings, 0 replies; 5+ messages in thread
From: Jan Beulich @ 2013-04-16 8:47 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 5172 bytes --]
The (Linux) commit 4202735e8ab6ecfb0381631a0d0b58fefe0bd4e2
(cpuidle: Split cpuidle_state structure and move per-cpu statistics fields)
observed that the MWAIT flags for Cn on every processor to date were the
same, and created get_driver_data() to supply them.
Unfortunately, that assumption is false, going forward.
So here we restore the MWAIT flags to the cpuidle_state table.
However, instead restoring the old "driver_data" field,
we put the flags into the existing "flags" field,
where they probalby should have lived all along.
This patch does not change any operation.
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -108,6 +108,16 @@ static const struct cpuidle_state {
#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
/*
+ * MWAIT takes an 8-bit "hint" in EAX "suggesting"
+ * the C-state (top nibble) and sub-state (bottom nibble)
+ * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
+ *
+ * We store the hint at the top of our "flags" for each state.
+ */
+#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
+#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
+
+/*
* States are indexed by the cstate number,
* which is also the index into the MWAIT hint array.
* Thus C0 is a dummy.
@@ -116,18 +126,19 @@ static const struct cpuidle_state nehale
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-NHM",
+ .flags = MWAIT2flg(0x00),
.exit_latency = 3,
.target_residency = 6,
},
{ /* MWAIT C2 */
.name = "C3-NHM",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 20,
.target_residency = 80,
},
{ /* MWAIT C3 */
.name = "C6-NHM",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
.target_residency = 800,
}
@@ -137,24 +148,25 @@ static const struct cpuidle_state snb_cs
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-SNB",
+ .flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
},
{ /* MWAIT C2 */
.name = "C3-SNB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 211,
},
{ /* MWAIT C3 */
.name = "C6-SNB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 104,
.target_residency = 345,
},
{ /* MWAIT C4 */
.name = "C7-SNB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 109,
.target_residency = 345,
}
@@ -164,24 +176,25 @@ static const struct cpuidle_state ivb_cs
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-IVB",
+ .flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
},
{ /* MWAIT C2 */
.name = "C3-IVB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 59,
.target_residency = 156,
},
{ /* MWAIT C3 */
.name = "C6-IVB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 300,
},
{ /* MWAIT C4 */
.name = "C7-IVB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 87,
.target_residency = 300,
}
@@ -191,44 +204,32 @@ static const struct cpuidle_state atom_c
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-ATM",
+ .flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 4,
},
{ /* MWAIT C2 */
.name = "C2-ATM",
+ .flags = MWAIT2flg(0x10),
.exit_latency = 20,
.target_residency = 80,
},
{ /* MWAIT C3 */ },
{ /* MWAIT C4 */
.name = "C4-ATM",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 100,
.target_residency = 400,
},
{ /* MWAIT C5 */ },
{ /* MWAIT C6 */
.name = "C6-ATM",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 140,
.target_residency = 560,
}
};
-static u32 get_driver_data(unsigned int cstate)
-{
- static const u32 driver_data[] = {
- [1] /* MWAIT C1 */ = 0x00,
- [2] /* MWAIT C2 */ = 0x10,
- [3] /* MWAIT C3 */ = 0x20,
- [4] /* MWAIT C4 */ = 0x30,
- [5] /* MWAIT C5 */ = 0x40,
- [6] /* MWAIT C6 */ = 0x52,
- };
-
- return driver_data[cstate < ARRAY_SIZE(driver_data) ? cstate : 0];
-}
-
static void mwait_idle(void)
{
unsigned int cpu = smp_processor_id();
@@ -477,7 +478,7 @@ static int mwait_idle_cpu_init(struct no
cx = dev->states + dev->count;
cx->type = cstate;
- cx->address = get_driver_data(cstate);
+ cx->address = flg2MWAIT(cpuidle_state_table[cstate].flags);
cx->entry_method = ACPI_CSTATE_EM_FFH;
cx->latency = cpuidle_state_table[cstate].exit_latency;
cx->target_residency =
[-- Attachment #2: x86-mwait-idle-flags.patch --]
[-- Type: text/plain, Size: 5227 bytes --]
x86/mwait_idle: stop using driver_data for static flags
The (Linux) commit 4202735e8ab6ecfb0381631a0d0b58fefe0bd4e2
(cpuidle: Split cpuidle_state structure and move per-cpu statistics fields)
observed that the MWAIT flags for Cn on every processor to date were the
same, and created get_driver_data() to supply them.
Unfortunately, that assumption is false, going forward.
So here we restore the MWAIT flags to the cpuidle_state table.
However, instead restoring the old "driver_data" field,
we put the flags into the existing "flags" field,
where they probalby should have lived all along.
This patch does not change any operation.
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -108,6 +108,16 @@ static const struct cpuidle_state {
#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
/*
+ * MWAIT takes an 8-bit "hint" in EAX "suggesting"
+ * the C-state (top nibble) and sub-state (bottom nibble)
+ * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
+ *
+ * We store the hint at the top of our "flags" for each state.
+ */
+#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
+#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
+
+/*
* States are indexed by the cstate number,
* which is also the index into the MWAIT hint array.
* Thus C0 is a dummy.
@@ -116,18 +126,19 @@ static const struct cpuidle_state nehale
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-NHM",
+ .flags = MWAIT2flg(0x00),
.exit_latency = 3,
.target_residency = 6,
},
{ /* MWAIT C2 */
.name = "C3-NHM",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 20,
.target_residency = 80,
},
{ /* MWAIT C3 */
.name = "C6-NHM",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
.target_residency = 800,
}
@@ -137,24 +148,25 @@ static const struct cpuidle_state snb_cs
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-SNB",
+ .flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
},
{ /* MWAIT C2 */
.name = "C3-SNB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 211,
},
{ /* MWAIT C3 */
.name = "C6-SNB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 104,
.target_residency = 345,
},
{ /* MWAIT C4 */
.name = "C7-SNB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 109,
.target_residency = 345,
}
@@ -164,24 +176,25 @@ static const struct cpuidle_state ivb_cs
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-IVB",
+ .flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
},
{ /* MWAIT C2 */
.name = "C3-IVB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 59,
.target_residency = 156,
},
{ /* MWAIT C3 */
.name = "C6-IVB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 300,
},
{ /* MWAIT C4 */
.name = "C7-IVB",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 87,
.target_residency = 300,
}
@@ -191,44 +204,32 @@ static const struct cpuidle_state atom_c
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
.name = "C1-ATM",
+ .flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 4,
},
{ /* MWAIT C2 */
.name = "C2-ATM",
+ .flags = MWAIT2flg(0x10),
.exit_latency = 20,
.target_residency = 80,
},
{ /* MWAIT C3 */ },
{ /* MWAIT C4 */
.name = "C4-ATM",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 100,
.target_residency = 400,
},
{ /* MWAIT C5 */ },
{ /* MWAIT C6 */
.name = "C6-ATM",
- .flags = CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 140,
.target_residency = 560,
}
};
-static u32 get_driver_data(unsigned int cstate)
-{
- static const u32 driver_data[] = {
- [1] /* MWAIT C1 */ = 0x00,
- [2] /* MWAIT C2 */ = 0x10,
- [3] /* MWAIT C3 */ = 0x20,
- [4] /* MWAIT C4 */ = 0x30,
- [5] /* MWAIT C5 */ = 0x40,
- [6] /* MWAIT C6 */ = 0x52,
- };
-
- return driver_data[cstate < ARRAY_SIZE(driver_data) ? cstate : 0];
-}
-
static void mwait_idle(void)
{
unsigned int cpu = smp_processor_id();
@@ -477,7 +478,7 @@ static int mwait_idle_cpu_init(struct no
cx = dev->states + dev->count;
cx->type = cstate;
- cx->address = get_driver_data(cstate);
+ cx->address = flg2MWAIT(cpuidle_state_table[cstate].flags);
cx->entry_method = ACPI_CSTATE_EM_FFH;
cx->latency = cpuidle_state_table[cstate].exit_latency;
cx->target_residency =
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 5+ messages in thread* [PATCH 2/2] x86/mwait_idle: support Haswell
2013-04-16 8:40 [PATCH 0/2] x86/mwait_idle: pull in changes from Linux 3.9 Jan Beulich
2013-04-16 8:47 ` [PATCH 1/2] x86/mwait_idle: stop using driver_data for static flags Jan Beulich
@ 2013-04-16 8:48 ` Jan Beulich
2013-04-18 8:46 ` [PATCH 0/2] x86/mwait_idle: pull in changes from Linux 3.9 Jan Beulich
2 siblings, 0 replies; 5+ messages in thread
From: Jan Beulich @ 2013-04-16 8:48 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 1685 bytes --]
This patch enables intel_idle to run on the next-generation Intel(R)
Microarchitecture code named "Haswell".
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -200,6 +200,34 @@ static const struct cpuidle_state ivb_cs
}
};
+static const struct cpuidle_state hsw_cstates[MWAIT_MAX_NUM_CSTATES] = {
+ { /* MWAIT C0 */ },
+ { /* MWAIT C1 */
+ .name = "C1-HSW",
+ .flags = MWAIT2flg(0x00),
+ .exit_latency = 2,
+ .target_residency = 2,
+ },
+ { /* MWAIT C2 */
+ .name = "C3-HSW",
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 33,
+ .target_residency = 100,
+ },
+ { /* MWAIT C3 */
+ .name = "C6-HSW",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 133,
+ .target_residency = 400,
+ },
+ { /* MWAIT C4 */
+ .name = "C7s-HSW",
+ .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 166,
+ .target_residency = 500,
+ },
+};
+
static const struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
@@ -348,6 +376,10 @@ static const struct idle_cpu idle_cpu_iv
.state_table = ivb_cstates,
};
+static const struct idle_cpu idle_cpu_hsw = {
+ .state_table = hsw_cstates,
+};
+
#define ICPU(model, cpu) { 6, model, &idle_cpu_##cpu }
static struct intel_idle_id {
@@ -367,6 +399,10 @@ static struct intel_idle_id {
ICPU(0x2d, snb),
ICPU(0x3a, ivb),
ICPU(0x3e, ivb),
+ ICPU(0x3c, hsw),
+ ICPU(0x3f, hsw),
+ ICPU(0x45, hsw),
+ ICPU(0x46, hsw),
{}
};
[-- Attachment #2: x86-mwait-idle-Haswell.patch --]
[-- Type: text/plain, Size: 1714 bytes --]
x86/mwait_idle: support Haswell
This patch enables intel_idle to run on the next-generation Intel(R)
Microarchitecture code named "Haswell".
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -200,6 +200,34 @@ static const struct cpuidle_state ivb_cs
}
};
+static const struct cpuidle_state hsw_cstates[MWAIT_MAX_NUM_CSTATES] = {
+ { /* MWAIT C0 */ },
+ { /* MWAIT C1 */
+ .name = "C1-HSW",
+ .flags = MWAIT2flg(0x00),
+ .exit_latency = 2,
+ .target_residency = 2,
+ },
+ { /* MWAIT C2 */
+ .name = "C3-HSW",
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 33,
+ .target_residency = 100,
+ },
+ { /* MWAIT C3 */
+ .name = "C6-HSW",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 133,
+ .target_residency = 400,
+ },
+ { /* MWAIT C4 */
+ .name = "C7s-HSW",
+ .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 166,
+ .target_residency = 500,
+ },
+};
+
static const struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
{ /* MWAIT C0 */ },
{ /* MWAIT C1 */
@@ -348,6 +376,10 @@ static const struct idle_cpu idle_cpu_iv
.state_table = ivb_cstates,
};
+static const struct idle_cpu idle_cpu_hsw = {
+ .state_table = hsw_cstates,
+};
+
#define ICPU(model, cpu) { 6, model, &idle_cpu_##cpu }
static struct intel_idle_id {
@@ -367,6 +399,10 @@ static struct intel_idle_id {
ICPU(0x2d, snb),
ICPU(0x3a, ivb),
ICPU(0x3e, ivb),
+ ICPU(0x3c, hsw),
+ ICPU(0x3f, hsw),
+ ICPU(0x45, hsw),
+ ICPU(0x46, hsw),
{}
};
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 5+ messages in thread