* [PATCH v4 1/2] perf, x86: Implement event scheduler helper functions
2011-11-18 11:35 [PATCH v4 0/2] perf, x86: handle overlapping counters Robert Richter
@ 2011-11-18 11:35 ` Robert Richter
2011-12-06 9:44 ` [tip:perf/core] " tip-bot for Robert Richter
2011-11-18 11:35 ` [PATCH v4 2/2] perf, x86: Fix event scheduler for constraints with overlapping counters Robert Richter
` (2 subsequent siblings)
3 siblings, 1 reply; 9+ messages in thread
From: Robert Richter @ 2011-11-18 11:35 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Stephane Eranian, Ingo Molnar, Andi Kleen, LKML, Robert Richter
This patch introduces x86 perf scheduler code helper functions. We
need this to later add more complex functionality to support
overlapping counter constraints (next patch).
The algorithm is modified so that the range of weight values is now
generated from the constraints. There shouldn't be other functional
changes.
With the helper functions the scheduler is controlled. There are
functions to initialize, traverse the event list, find unused counters
etc. The scheduler keeps its own state.
V3:
* Added macro for_each_set_bit_cont().
* Changed functions interfaces of perf_sched_find_counter() and
perf_sched_next_event() to use bool as return value.
* Added some comments to make code better understandable.
V4:
* Fix broken event assignment if weight of the first event is not
wmin (perf_sched_init()).
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/kernel/cpu/perf_event.c | 185 +++++++++++++++++++++++++++-----------
include/linux/bitops.h | 10 ++-
2 files changed, 140 insertions(+), 55 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6408910..02b9020 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -488,18 +488,145 @@ static inline int is_x86_event(struct perf_event *event)
return event->pmu == &pmu;
}
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+ int weight;
+ int event; /* event index */
+ int counter; /* counter index */
+ int unassigned; /* number of events to be assigned left */
+ unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+struct perf_sched {
+ int max_weight;
+ int max_events;
+ struct event_constraint **constraints;
+ struct sched_state state;
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+ int num, int wmin, int wmax)
+{
+ int idx;
+
+ memset(sched, 0, sizeof(*sched));
+ sched->max_events = num;
+ sched->max_weight = wmax;
+ sched->constraints = c;
+
+ for (idx = 0; idx < num; idx++) {
+ if (c[idx]->weight == wmin)
+ break;
+ }
+
+ sched->state.event = idx; /* start with min weight */
+ sched->state.weight = wmin;
+ sched->state.unassigned = num;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+ struct event_constraint *c;
+ int idx;
+
+ if (!sched->state.unassigned)
+ return false;
+
+ if (sched->state.event >= sched->max_events)
+ return false;
+
+ c = sched->constraints[sched->state.event];
+
+ /* Grab the first unused counter starting with idx */
+ idx = sched->state.counter;
+ for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+ if (!__test_and_set_bit(idx, sched->state.used))
+ break;
+ }
+ sched->state.counter = idx;
+
+ if (idx >= X86_PMC_IDX_MAX)
+ return false;
+
+ return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+ struct event_constraint *c;
+
+ if (!sched->state.unassigned || !--sched->state.unassigned)
+ return false;
+
+ do {
+ /* next event */
+ sched->state.event++;
+ if (sched->state.event >= sched->max_events) {
+ /* next weight */
+ sched->state.event = 0;
+ sched->state.weight++;
+ if (sched->state.weight > sched->max_weight)
+ return false;
+ }
+ c = sched->constraints[sched->state.event];
+ } while (c->weight != sched->state.weight);
+
+ sched->state.counter = 0; /* start with first counter */
+
+ return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+static int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int *assign)
+{
+ struct perf_sched sched;
+
+ perf_sched_init(&sched, constraints, n, wmin, wmax);
+
+ do {
+ if (!perf_sched_find_counter(&sched))
+ break; /* failed */
+ if (assign)
+ assign[sched.state.event] = sched.state.counter;
+ } while (perf_sched_next_event(&sched));
+
+ return sched.state.unassigned;
+}
+
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- int i, j, w, wmax, num = 0;
+ int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
- for (i = 0; i < n; i++) {
+ for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
constraints[i] = c;
+ wmin = min(wmin, c->weight);
+ wmax = max(wmax, c->weight);
}
/*
@@ -525,60 +652,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (assign)
assign[i] = hwc->idx;
}
- if (i == n)
- goto done;
-
- /*
- * begin slow path
- */
- bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-
- /*
- * weight = number of possible counters
- *
- * 1 = most constrained, only works on one counter
- * wmax = least constrained, works on any counter
- *
- * assign events to counters starting with most
- * constrained events.
- */
- wmax = x86_pmu.num_counters;
+ /* slow path */
+ if (i != n)
+ num = perf_assign_events(constraints, n, wmin, wmax, assign);
/*
- * when fixed event counters are present,
- * wmax is incremented by 1 to account
- * for one more choice
- */
- if (x86_pmu.num_counters_fixed)
- wmax++;
-
- for (w = 1, num = n; num && w <= wmax; w++) {
- /* for each event */
- for (i = 0; num && i < n; i++) {
- c = constraints[i];
- hwc = &cpuc->event_list[i]->hw;
-
- if (c->weight != w)
- continue;
-
- for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
- if (!test_bit(j, used_mask))
- break;
- }
-
- if (j == X86_PMC_IDX_MAX)
- break;
-
- __set_bit(j, used_mask);
-
- if (assign)
- assign[i] = j;
- num--;
- }
- }
-done:
- /*
* scheduling failed or is just a simulation,
* free resources if necessary
*/
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3ef66a..3c1063a 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -22,8 +22,14 @@ extern unsigned long __sw_hweight64(__u64 w);
#include <asm/bitops.h>
#define for_each_set_bit(bit, addr, size) \
- for ((bit) = find_first_bit((addr), (size)); \
- (bit) < (size); \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_cont(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
static __inline__ int get_bitmask_order(unsigned int count)
--
1.7.7
^ permalink raw reply related [flat|nested] 9+ messages in thread* [tip:perf/core] perf, x86: Implement event scheduler helper functions
2011-11-18 11:35 ` [PATCH v4 1/2] perf, x86: Implement event scheduler helper functions Robert Richter
@ 2011-12-06 9:44 ` tip-bot for Robert Richter
0 siblings, 0 replies; 9+ messages in thread
From: tip-bot for Robert Richter @ 2011-12-06 9:44 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, eranian, hpa, mingo, robert.richter, a.p.zijlstra,
tglx, mingo
Commit-ID: 1e2ad28f80b4e155678259238f51edebc19e4014
Gitweb: http://git.kernel.org/tip/1e2ad28f80b4e155678259238f51edebc19e4014
Author: Robert Richter <robert.richter@amd.com>
AuthorDate: Fri, 18 Nov 2011 12:35:21 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 6 Dec 2011 08:33:54 +0100
perf, x86: Implement event scheduler helper functions
This patch introduces x86 perf scheduler code helper functions. We
need this to later add more complex functionality to support
overlapping counter constraints (next patch).
The algorithm is modified so that the range of weight values is now
generated from the constraints. There shouldn't be other functional
changes.
With the helper functions the scheduler is controlled. There are
functions to initialize, traverse the event list, find unused counters
etc. The scheduler keeps its own state.
V3:
* Added macro for_each_set_bit_cont().
* Changed functions interfaces of perf_sched_find_counter() and
perf_sched_next_event() to use bool as return value.
* Added some comments to make code better understandable.
V4:
* Fix broken event assignment if weight of the first event is not
wmin (perf_sched_init()).
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1321616122-1533-2-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/kernel/cpu/perf_event.c | 185 +++++++++++++++++++++++++++-----------
include/linux/bitops.h | 10 ++-
2 files changed, 140 insertions(+), 55 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bda212..5a469d3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,18 +484,145 @@ static inline int is_x86_event(struct perf_event *event)
return event->pmu == &pmu;
}
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+ int weight;
+ int event; /* event index */
+ int counter; /* counter index */
+ int unassigned; /* number of events to be assigned left */
+ unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+struct perf_sched {
+ int max_weight;
+ int max_events;
+ struct event_constraint **constraints;
+ struct sched_state state;
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+ int num, int wmin, int wmax)
+{
+ int idx;
+
+ memset(sched, 0, sizeof(*sched));
+ sched->max_events = num;
+ sched->max_weight = wmax;
+ sched->constraints = c;
+
+ for (idx = 0; idx < num; idx++) {
+ if (c[idx]->weight == wmin)
+ break;
+ }
+
+ sched->state.event = idx; /* start with min weight */
+ sched->state.weight = wmin;
+ sched->state.unassigned = num;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+ struct event_constraint *c;
+ int idx;
+
+ if (!sched->state.unassigned)
+ return false;
+
+ if (sched->state.event >= sched->max_events)
+ return false;
+
+ c = sched->constraints[sched->state.event];
+
+ /* Grab the first unused counter starting with idx */
+ idx = sched->state.counter;
+ for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+ if (!__test_and_set_bit(idx, sched->state.used))
+ break;
+ }
+ sched->state.counter = idx;
+
+ if (idx >= X86_PMC_IDX_MAX)
+ return false;
+
+ return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+ struct event_constraint *c;
+
+ if (!sched->state.unassigned || !--sched->state.unassigned)
+ return false;
+
+ do {
+ /* next event */
+ sched->state.event++;
+ if (sched->state.event >= sched->max_events) {
+ /* next weight */
+ sched->state.event = 0;
+ sched->state.weight++;
+ if (sched->state.weight > sched->max_weight)
+ return false;
+ }
+ c = sched->constraints[sched->state.event];
+ } while (c->weight != sched->state.weight);
+
+ sched->state.counter = 0; /* start with first counter */
+
+ return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+static int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int *assign)
+{
+ struct perf_sched sched;
+
+ perf_sched_init(&sched, constraints, n, wmin, wmax);
+
+ do {
+ if (!perf_sched_find_counter(&sched))
+ break; /* failed */
+ if (assign)
+ assign[sched.state.event] = sched.state.counter;
+ } while (perf_sched_next_event(&sched));
+
+ return sched.state.unassigned;
+}
+
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- int i, j, w, wmax, num = 0;
+ int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
- for (i = 0; i < n; i++) {
+ for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
constraints[i] = c;
+ wmin = min(wmin, c->weight);
+ wmax = max(wmax, c->weight);
}
/*
@@ -521,59 +648,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (assign)
assign[i] = hwc->idx;
}
- if (i == n)
- goto done;
- /*
- * begin slow path
- */
+ /* slow path */
+ if (i != n)
+ num = perf_assign_events(constraints, n, wmin, wmax, assign);
- bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-
- /*
- * weight = number of possible counters
- *
- * 1 = most constrained, only works on one counter
- * wmax = least constrained, works on any counter
- *
- * assign events to counters starting with most
- * constrained events.
- */
- wmax = x86_pmu.num_counters;
-
- /*
- * when fixed event counters are present,
- * wmax is incremented by 1 to account
- * for one more choice
- */
- if (x86_pmu.num_counters_fixed)
- wmax++;
-
- for (w = 1, num = n; num && w <= wmax; w++) {
- /* for each event */
- for (i = 0; num && i < n; i++) {
- c = constraints[i];
- hwc = &cpuc->event_list[i]->hw;
-
- if (c->weight != w)
- continue;
-
- for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
- if (!test_bit(j, used_mask))
- break;
- }
-
- if (j == X86_PMC_IDX_MAX)
- break;
-
- __set_bit(j, used_mask);
-
- if (assign)
- assign[i] = j;
- num--;
- }
- }
-done:
/*
* scheduling failed or is just a simulation,
* free resources if necessary
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3ef66a..3c1063a 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -22,8 +22,14 @@ extern unsigned long __sw_hweight64(__u64 w);
#include <asm/bitops.h>
#define for_each_set_bit(bit, addr, size) \
- for ((bit) = find_first_bit((addr), (size)); \
- (bit) < (size); \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_cont(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
static __inline__ int get_bitmask_order(unsigned int count)
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v4 2/2] perf, x86: Fix event scheduler for constraints with overlapping counters
2011-11-18 11:35 [PATCH v4 0/2] perf, x86: handle overlapping counters Robert Richter
2011-11-18 11:35 ` [PATCH v4 1/2] perf, x86: Implement event scheduler helper functions Robert Richter
@ 2011-11-18 11:35 ` Robert Richter
2011-12-06 9:45 ` [tip:perf/core] " tip-bot for Robert Richter
2011-11-18 15:10 ` [PATCH v4 0/2] perf, x86: handle " Peter Zijlstra
2011-11-22 15:26 ` Peter Zijlstra
3 siblings, 1 reply; 9+ messages in thread
From: Robert Richter @ 2011-11-18 11:35 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Stephane Eranian, Ingo Molnar, Andi Kleen, LKML, Robert Richter
The current x86 event scheduler fails to resolve scheduling problems
of certain combinations of events and constraints. This happens if the
counter mask of such an event is not a subset of any other counter
mask of a constraint with an equal or higher weight, e.g. constraints
of the AMD family 15h pmu:
counter mask weight
amd_f15_PMC30 0x09 2 <--- overlapping counters
amd_f15_PMC20 0x07 3
amd_f15_PMC53 0x38 3
The scheduler does not find then an existing solution. Here is an
example:
event code counter failure possible solution
0x02E PMC[3,0] 0 3
0x043 PMC[2:0] 1 0
0x045 PMC[2:0] 2 1
0x046 PMC[2:0] FAIL 2
The event scheduler may not select the correct counter in the first
cycle because it needs to know which subsequent events will be
scheduled. It may fail to schedule the events then.
To solve this, we now save the scheduler state of events with
overlapping counter counstraints. If we fail to schedule the events
we rollback to those states and try to use another free counter.
Constraints with overlapping counters are marked with a new introduced
overlap flag. We set the overlap flag for such constraints to give the
scheduler a hint which events to select for counter rescheduling. The
EVENT_CONSTRAINT_OVERLAP() macro can be used for this.
Care must be taken as the rescheduling algorithm is O(n!) which will
increase scheduling cycles for an over-commited system dramatically.
The number of such EVENT_CONSTRAINT_OVERLAP() macros and its counter
masks must be kept at a minimum. Thus, the current stack is limited to
2 states to limit the number of loops the algorithm takes in the worst
case.
On systems with no overlapping-counter constraints, this
implementation does not increase the loop count compared to the
previous algorithm.
V2:
* Renamed redo -> overlap.
* Reimplementation using perf scheduling helper functions.
V3:
* Added WARN_ON_ONCE() if out of save states.
* Changed function interface of perf_sched_restore_state() to use bool
as return value.
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/kernel/cpu/perf_event.c | 45 ++++++++++++++++++++++++++++++++-
arch/x86/kernel/cpu/perf_event.h | 30 +++++++++++++++++++++-
arch/x86/kernel/cpu/perf_event_amd.c | 2 +-
3 files changed, 72 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 02b9020..cc9aafc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -503,11 +503,16 @@ struct sched_state {
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define SCHED_STATES_MAX 2
+
struct perf_sched {
int max_weight;
int max_events;
struct event_constraint **constraints;
struct sched_state state;
+ int saved_states;
+ struct sched_state saved[SCHED_STATES_MAX];
};
/*
@@ -533,11 +538,34 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
sched->state.unassigned = num;
}
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+ if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
+ return;
+
+ sched->saved[sched->saved_states] = sched->state;
+ sched->saved_states++;
+}
+
+static bool perf_sched_restore_state(struct perf_sched *sched)
+{
+ if (!sched->saved_states)
+ return false;
+
+ sched->saved_states--;
+ sched->state = sched->saved[sched->saved_states];
+
+ /* continue with next counter: */
+ clear_bit(sched->state.counter++, sched->state.used);
+
+ return true;
+}
+
/*
* Select a counter for the current event to schedule. Return true on
* success.
*/
-static bool perf_sched_find_counter(struct perf_sched *sched)
+static bool __perf_sched_find_counter(struct perf_sched *sched)
{
struct event_constraint *c;
int idx;
@@ -561,6 +589,19 @@ static bool perf_sched_find_counter(struct perf_sched *sched)
if (idx >= X86_PMC_IDX_MAX)
return false;
+ if (c->overlap)
+ perf_sched_save_state(sched);
+
+ return true;
+}
+
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+ while (!__perf_sched_find_counter(sched)) {
+ if (!perf_sched_restore_state(sched))
+ return false;
+ }
+
return true;
}
@@ -1254,7 +1295,7 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters);
+ 0, x86_pmu.num_counters, 0);
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d4..51a985c 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
u64 code;
u64 cmask;
int weight;
+ int overlap;
};
struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
void *kfree_on_online;
};
-#define __EVENT_CONSTRAINT(c, n, m, w) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
{ .idxmsk64 = (n) }, \
.code = (c), \
.cmask = (m), \
.weight = (w), \
+ .overlap = (o), \
}
#define EVENT_CONSTRAINT(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-commited system
+ * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
/*
* Constraint on the Event code.
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45..0397b23 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
--
1.7.7
^ permalink raw reply related [flat|nested] 9+ messages in thread* [tip:perf/core] perf, x86: Fix event scheduler for constraints with overlapping counters
2011-11-18 11:35 ` [PATCH v4 2/2] perf, x86: Fix event scheduler for constraints with overlapping counters Robert Richter
@ 2011-12-06 9:45 ` tip-bot for Robert Richter
0 siblings, 0 replies; 9+ messages in thread
From: tip-bot for Robert Richter @ 2011-12-06 9:45 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, eranian, hpa, mingo, robert.richter, a.p.zijlstra,
tglx, mingo
Commit-ID: bc1738f6ee83015f090867813dcca4d690e7917c
Gitweb: http://git.kernel.org/tip/bc1738f6ee83015f090867813dcca4d690e7917c
Author: Robert Richter <robert.richter@amd.com>
AuthorDate: Fri, 18 Nov 2011 12:35:22 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 6 Dec 2011 08:33:56 +0100
perf, x86: Fix event scheduler for constraints with overlapping counters
The current x86 event scheduler fails to resolve scheduling problems
of certain combinations of events and constraints. This happens if the
counter mask of such an event is not a subset of any other counter
mask of a constraint with an equal or higher weight, e.g. constraints
of the AMD family 15h pmu:
counter mask weight
amd_f15_PMC30 0x09 2 <--- overlapping counters
amd_f15_PMC20 0x07 3
amd_f15_PMC53 0x38 3
The scheduler does not find then an existing solution. Here is an
example:
event code counter failure possible solution
0x02E PMC[3,0] 0 3
0x043 PMC[2:0] 1 0
0x045 PMC[2:0] 2 1
0x046 PMC[2:0] FAIL 2
The event scheduler may not select the correct counter in the first
cycle because it needs to know which subsequent events will be
scheduled. It may fail to schedule the events then.
To solve this, we now save the scheduler state of events with
overlapping counter counstraints. If we fail to schedule the events
we rollback to those states and try to use another free counter.
Constraints with overlapping counters are marked with a new introduced
overlap flag. We set the overlap flag for such constraints to give the
scheduler a hint which events to select for counter rescheduling. The
EVENT_CONSTRAINT_OVERLAP() macro can be used for this.
Care must be taken as the rescheduling algorithm is O(n!) which will
increase scheduling cycles for an over-commited system dramatically.
The number of such EVENT_CONSTRAINT_OVERLAP() macros and its counter
masks must be kept at a minimum. Thus, the current stack is limited to
2 states to limit the number of loops the algorithm takes in the worst
case.
On systems with no overlapping-counter constraints, this
implementation does not increase the loop count compared to the
previous algorithm.
V2:
* Renamed redo -> overlap.
* Reimplementation using perf scheduling helper functions.
V3:
* Added WARN_ON_ONCE() if out of save states.
* Changed function interface of perf_sched_restore_state() to use bool
as return value.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1321616122-1533-3-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/kernel/cpu/perf_event.c | 45 ++++++++++++++++++++++++++++++++-
arch/x86/kernel/cpu/perf_event.h | 30 +++++++++++++++++++++-
arch/x86/kernel/cpu/perf_event_amd.c | 2 +-
3 files changed, 72 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5a469d3..fa6fdec 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -499,11 +499,16 @@ struct sched_state {
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define SCHED_STATES_MAX 2
+
struct perf_sched {
int max_weight;
int max_events;
struct event_constraint **constraints;
struct sched_state state;
+ int saved_states;
+ struct sched_state saved[SCHED_STATES_MAX];
};
/*
@@ -529,11 +534,34 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
sched->state.unassigned = num;
}
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+ if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
+ return;
+
+ sched->saved[sched->saved_states] = sched->state;
+ sched->saved_states++;
+}
+
+static bool perf_sched_restore_state(struct perf_sched *sched)
+{
+ if (!sched->saved_states)
+ return false;
+
+ sched->saved_states--;
+ sched->state = sched->saved[sched->saved_states];
+
+ /* continue with next counter: */
+ clear_bit(sched->state.counter++, sched->state.used);
+
+ return true;
+}
+
/*
* Select a counter for the current event to schedule. Return true on
* success.
*/
-static bool perf_sched_find_counter(struct perf_sched *sched)
+static bool __perf_sched_find_counter(struct perf_sched *sched)
{
struct event_constraint *c;
int idx;
@@ -557,6 +585,19 @@ static bool perf_sched_find_counter(struct perf_sched *sched)
if (idx >= X86_PMC_IDX_MAX)
return false;
+ if (c->overlap)
+ perf_sched_save_state(sched);
+
+ return true;
+}
+
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+ while (!__perf_sched_find_counter(sched)) {
+ if (!perf_sched_restore_state(sched))
+ return false;
+ }
+
return true;
}
@@ -1250,7 +1291,7 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters);
+ 0, x86_pmu.num_counters, 0);
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d4..51a985c 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
u64 code;
u64 cmask;
int weight;
+ int overlap;
};
struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
void *kfree_on_online;
};
-#define __EVENT_CONSTRAINT(c, n, m, w) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
{ .idxmsk64 = (n) }, \
.code = (c), \
.cmask = (m), \
.weight = (w), \
+ .overlap = (o), \
}
#define EVENT_CONSTRAINT(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-commited system
+ * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
/*
* Constraint on the Event code.
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45..0397b23 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v4 0/2] perf, x86: handle overlapping counters
2011-11-18 11:35 [PATCH v4 0/2] perf, x86: handle overlapping counters Robert Richter
2011-11-18 11:35 ` [PATCH v4 1/2] perf, x86: Implement event scheduler helper functions Robert Richter
2011-11-18 11:35 ` [PATCH v4 2/2] perf, x86: Fix event scheduler for constraints with overlapping counters Robert Richter
@ 2011-11-18 15:10 ` Peter Zijlstra
2011-11-18 16:32 ` Robert Richter
2011-11-22 15:26 ` Peter Zijlstra
3 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2011-11-18 15:10 UTC (permalink / raw)
To: Robert Richter; +Cc: Stephane Eranian, Ingo Molnar, Andi Kleen, LKML
On Fri, 2011-11-18 at 12:35 +0100, Robert Richter wrote:
> I know there is an ongoing discussion about the best algorithm to be
> used for this problem. Posting this version with the latest updates
> anyway...
>
> This patch set implements support for overlapping counters (patch #2).
> For this the existing x86 event scheduler is reworked by introducing
> helper functions (patch #1).
>
> Version 4 of the patch set fixes a bug in the event assignment. There was a
> problem if the first event's weight (index 0) is not the minimum weight. In
> this case the event was assigned twice leaving another event uninitialized
> which caused a GP while accessing counter msrs. Fixes this by proper
> initialzing the first event in perf_sched_init().
OK, I've queued these and tested them using something like:
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
@@ -88,13 +88,19 @@ static struct extra_reg intel_nehalem_ex
static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
{
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+// FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+// FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
+
+ EVENT_CONSTRAINT_OVERLAP(0x003c, 0x1|0x2, INTEL_ARCH_EVENT_MASK),
+ EVENT_CONSTRAINT_OVERLAP(0x00c0, 0x2|0x4, INTEL_ARCH_EVENT_MASK),
+ EVENT_CONSTRAINT_OVERLAP(0x4f2e, 0x4|0x8, INTEL_ARCH_EVENT_MASK),
+ EVENT_CONSTRAINT_OVERLAP(0x412e, 0x8|0x1, INTEL_ARCH_EVENT_MASK),
+
EVENT_CONSTRAINT_END
};
To stress the thing, you can quite easily manage to hit that
WARN_ON_ONCE() in there, but I haven't seen things explode yet.
> Robert Richter (2):
> configs: updating misc-x86_64-erda to v3.0.8
> config: updating misc-x86_64-erda.config for ASUS M4A89GTD PRO
>
> configs/misc-x86_64-erda.config | 517 ++++++++++++++++++++++++++++-----------
> 1 files changed, 379 insertions(+), 138 deletions(-)
You sure about that? :-)
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH v4 0/2] perf, x86: handle overlapping counters
2011-11-18 15:10 ` [PATCH v4 0/2] perf, x86: handle " Peter Zijlstra
@ 2011-11-18 16:32 ` Robert Richter
0 siblings, 0 replies; 9+ messages in thread
From: Robert Richter @ 2011-11-18 16:32 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Stephane Eranian, Ingo Molnar, Andi Kleen, LKML
On 18.11.11 16:10:17, Peter Zijlstra wrote:
> OK, I've queued these and tested them using something like:
> + EVENT_CONSTRAINT_OVERLAP(0x003c, 0x1|0x2, INTEL_ARCH_EVENT_MASK),
> + EVENT_CONSTRAINT_OVERLAP(0x00c0, 0x2|0x4, INTEL_ARCH_EVENT_MASK),
> + EVENT_CONSTRAINT_OVERLAP(0x4f2e, 0x4|0x8, INTEL_ARCH_EVENT_MASK),
> + EVENT_CONSTRAINT_OVERLAP(0x412e, 0x8|0x1, INTEL_ARCH_EVENT_MASK),
> +
> EVENT_CONSTRAINT_END
> };
>
> To stress the thing, you can quite easily manage to hit that
> WARN_ON_ONCE() in there, but I haven't seen things explode yet.
Yes, the above would require a stack depth of 4 in the worst case
ending up in 2^4 max loops for the above. It would become more with
higher weights.
> > Robert Richter (2):
> > configs: updating misc-x86_64-erda to v3.0.8
> > config: updating misc-x86_64-erda.config for ASUS M4A89GTD PRO
> >
> > configs/misc-x86_64-erda.config | 517 ++++++++++++++++++++++++++++-----------
> > 1 files changed, 379 insertions(+), 138 deletions(-)
>
> You sure about that? :-)
Yeah, wrong terminal... Luckily the patches are correct. ;)
Thanks,
-Robert
--
Advanced Micro Devices, Inc.
Operating System Research Center
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v4 0/2] perf, x86: handle overlapping counters
2011-11-18 11:35 [PATCH v4 0/2] perf, x86: handle overlapping counters Robert Richter
` (2 preceding siblings ...)
2011-11-18 15:10 ` [PATCH v4 0/2] perf, x86: handle " Peter Zijlstra
@ 2011-11-22 15:26 ` Peter Zijlstra
2011-11-22 16:27 ` Robert Richter
3 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2011-11-22 15:26 UTC (permalink / raw)
To: Robert Richter; +Cc: Stephane Eranian, Ingo Molnar, Andi Kleen, LKML
I stuck the below patch on top, afaict there is no reason to store
sched->state.counter when we fail, since in that case we pop a state or
go bust entirely, right?
---
Subject: perf, x86: Prefer Fixed purpose counters when scheduling
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu Nov 10 15:15:42 CET 2011
This avoids a scheduling failure for cases like:
cycles, cycles, instructions, instructions (on Core2)
Which would end up being programmed like:
PMC0, PMC1, FP-instructions, fail
Because all events will have the same weight.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/x86/kernel/cpu/perf_event.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -574,16 +574,25 @@ static bool __perf_sched_find_counter(st
c = sched->constraints[sched->state.event];
+ /* Prefer fixed purpose counters */
+ if (x86_pmu.num_counters_fixed) {
+ idx = X86_PMC_IDX_FIXED;
+ for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+ if (!__test_and_set_bit(idx, sched->state.used))
+ goto done;
+ }
+ }
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
- for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+ for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
if (!__test_and_set_bit(idx, sched->state.used))
- break;
+ goto done;
}
- sched->state.counter = idx;
- if (idx >= X86_PMC_IDX_MAX)
- return false;
+ return false;
+
+done:
+ sched->state.counter = idx;
if (c->overlap)
perf_sched_save_state(sched);
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH v4 0/2] perf, x86: handle overlapping counters
2011-11-22 15:26 ` Peter Zijlstra
@ 2011-11-22 16:27 ` Robert Richter
0 siblings, 0 replies; 9+ messages in thread
From: Robert Richter @ 2011-11-22 16:27 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Stephane Eranian, Ingo Molnar, Andi Kleen, LKML
On 22.11.11 16:26:07, Peter Zijlstra wrote:
> @@ -574,16 +574,25 @@ static bool __perf_sched_find_counter(st
>
> c = sched->constraints[sched->state.event];
>
> + /* Prefer fixed purpose counters */
> + if (x86_pmu.num_counters_fixed) {
> + idx = X86_PMC_IDX_FIXED;
> + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
> + if (!__test_and_set_bit(idx, sched->state.used))
> + goto done;
> + }
> + }
> /* Grab the first unused counter starting with idx */
> idx = sched->state.counter;
> - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
> + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
> if (!__test_and_set_bit(idx, sched->state.used))
> - break;
> + goto done;
> }
> - sched->state.counter = idx;
>
> - if (idx >= X86_PMC_IDX_MAX)
> - return false;
> + return false;
> +
> +done:
> + sched->state.counter = idx;
Yes, we don't need to save this on failure. Looks ok.
-Robert
>
> if (c->overlap)
> perf_sched_save_state(sched);
>
>
--
Advanced Micro Devices, Inc.
Operating System Research Center
^ permalink raw reply [flat|nested] 9+ messages in thread