[PATCH 0/2] perf, x86: handle overlapping counters

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/2] perf, x86: handle overlapping counters
@ 2011-09-10 14:49 Robert Richter
  2011-09-10 14:49 ` [PATCH 1/2] perf, x86: Implement event scheduler helper functions Robert Richter
  2011-09-10 14:49 ` [PATCH 2/2] perf, x86: Fix event scheduler for constraints with Robert Richter
  0 siblings, 2 replies; 5+ messages in thread
From: Robert Richter @ 2011-09-10 14:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML

This patch set implements support for overlapping counters (patch #2).
For this the existing x86 event scheduler is reworked by introducing
helper functions (patch #1).

-Robert


The following changes since commit 51887c8230d57c4d9cc68e3784c52c8f0a708655:

  Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core (2011-08-18 21:58:46 +0200)

Robert Richter (2):
      perf, x86: Implement event scheduler helper functions
      perf, x86: Fix event scheduler for constraints with overlapping counters

 arch/x86/kernel/cpu/perf_event.c     |  234 ++++++++++++++++++++++++++--------
 arch/x86/kernel/cpu/perf_event_amd.c |    2 +-
 2 files changed, 179 insertions(+), 57 deletions(-)



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] perf, x86: Implement event scheduler helper functions
  2011-09-10 14:49 [PATCH 0/2] perf, x86: handle overlapping counters Robert Richter
@ 2011-09-10 14:49 ` Robert Richter
  2011-09-14 14:43   ` Peter Zijlstra
  2011-09-10 14:49 ` [PATCH 2/2] perf, x86: Fix event scheduler for constraints with Robert Richter
  1 sibling, 1 reply; 5+ messages in thread
From: Robert Richter @ 2011-09-10 14:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter

This patch introduces x86 perf scheduler code helper functions. We
need this to later add more complex functionality to support
overlapping counter constraints (next patch).

The algorithm is modified so that the range of weight values is now
generated from the constraints. There shouldn't be other functional
changes.

With the helper functions the scheduler is controlled. There are
functions to initialize, traverse the event list, find unused counters
etc. The scheduler keeps its own state.

Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/kernel/cpu/perf_event.c |  158 +++++++++++++++++++++++++-------------
 1 files changed, 105 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 594d425..44ec767 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -790,18 +790,118 @@ static inline int is_x86_event(struct perf_event *event)
 	return event->pmu == &pmu;
 }
 
+struct sched_state {
+	int	weight;
+	int	event;
+	int	counter;
+	int	unassigned;
+	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+struct perf_sched {
+	int			max_weight;
+	int			max_events;
+	struct event_constraint	**constraints;
+	struct sched_state	state;
+};
+
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+			    int num, int wmin, int wmax)
+{
+	memset(sched, 0, sizeof(*sched));
+	sched->max_events	= num;
+	sched->max_weight	= wmax;
+	sched->constraints	= c;
+
+	sched->state.weight	= wmin;
+	sched->state.unassigned	= num;
+}
+
+static struct sched_state *perf_sched_find_counter(struct perf_sched *sched)
+{
+	struct event_constraint *c;
+	int idx;
+
+	if (!sched->state.unassigned)
+		return NULL;
+
+	c = sched->constraints[sched->state.event];
+
+	idx = sched->state.counter;
+	/* for each bit in idxmsk starting from idx */
+	while (idx < X86_PMC_IDX_MAX) {
+		idx = find_next_bit(c->idxmsk, X86_PMC_IDX_MAX, idx);
+		if (idx == X86_PMC_IDX_MAX)
+			break;
+		if (!__test_and_set_bit(idx, sched->state.used))
+			break;
+		idx++;
+	}
+	sched->state.counter = idx;
+
+	if (idx >= X86_PMC_IDX_MAX)
+		return NULL;
+
+	return &sched->state;
+}
+
+static int perf_sched_next_event(struct perf_sched *sched)
+{
+	struct event_constraint *c;
+
+	if (!sched->state.unassigned || !--sched->state.unassigned)
+		return 0;
+
+	do {
+		/* next event */
+		sched->state.event++;
+		if (sched->state.event >= sched->max_events) {
+			/* next weight */
+			sched->state.event = 0;
+			sched->state.weight++;
+			if (sched->state.weight > sched->max_weight)
+				return 0;
+		}
+		c = sched->constraints[sched->state.event];
+	} while (c->weight != sched->state.weight);
+
+	sched->state.counter = 0;	/* start with first counter */
+
+	return 1;
+}
+
+static int perf_assign_events(struct event_constraint **constraints, int n,
+			      int wmin, int wmax, int *assign)
+{
+	struct perf_sched sched;
+	struct sched_state *state;
+
+	perf_sched_init(&sched, constraints, n, wmin, wmax);
+	do {
+		state = perf_sched_find_counter(&sched);
+		if (!state)
+			break;	/* failed */
+		if (assign)
+			assign[state->event] = state->counter;
+	} while (perf_sched_next_event(&sched));
+
+	return sched.state.unassigned;
+}
+
 static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	int i, j, w, wmax, num = 0;
+	int i, wmin, wmax, num = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-	for (i = 0; i < n; i++) {
+	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
 		constraints[i] = c;
+		wmin = min(wmin, c->weight);
+		wmax = max(wmax, c->weight);
 	}
 
 	/*
@@ -827,59 +927,11 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		if (assign)
 			assign[i] = hwc->idx;
 	}
-	if (i == n)
-		goto done;
 
-	/*
-	 * begin slow path
-	 */
+	/* slow path */
+	if (i != n)
+		num = perf_assign_events(constraints, n, wmin, wmax, assign);
 
-	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-
-	/*
-	 * weight = number of possible counters
-	 *
-	 * 1    = most constrained, only works on one counter
-	 * wmax = least constrained, works on any counter
-	 *
-	 * assign events to counters starting with most
-	 * constrained events.
-	 */
-	wmax = x86_pmu.num_counters;
-
-	/*
-	 * when fixed event counters are present,
-	 * wmax is incremented by 1 to account
-	 * for one more choice
-	 */
-	if (x86_pmu.num_counters_fixed)
-		wmax++;
-
-	for (w = 1, num = n; num && w <= wmax; w++) {
-		/* for each event */
-		for (i = 0; num && i < n; i++) {
-			c = constraints[i];
-			hwc = &cpuc->event_list[i]->hw;
-
-			if (c->weight != w)
-				continue;
-
-			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
-				if (!test_bit(j, used_mask))
-					break;
-			}
-
-			if (j == X86_PMC_IDX_MAX)
-				break;
-
-			__set_bit(j, used_mask);
-
-			if (assign)
-				assign[i] = j;
-			num--;
-		}
-	}
-done:
 	/*
 	 * scheduling failed or is just a simulation,
 	 * free resources if necessary
-- 
1.7.6.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] perf, x86: Fix event scheduler for constraints with
  2011-09-10 14:49 [PATCH 0/2] perf, x86: handle overlapping counters Robert Richter
  2011-09-10 14:49 ` [PATCH 1/2] perf, x86: Implement event scheduler helper functions Robert Richter
@ 2011-09-10 14:49 ` Robert Richter
  2011-09-14 14:45   ` Peter Zijlstra
  1 sibling, 1 reply; 5+ messages in thread
From: Robert Richter @ 2011-09-10 14:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter

The current x86 event scheduler fails to resolve scheduling problems
of certain combinations of events and constraints. This happens if the
counter mask of such an event is not a subset of any other counter
mask of a constraint with an equal or higher weight, e.g. constraints
of the AMD family 15h pmu:

                        counter mask    weight

 amd_f15_PMC30          0x09            2  <--- overlapping counters
 amd_f15_PMC20          0x07            3
 amd_f15_PMC53          0x38            3

The scheduler does not find then an existing solution. Here is an
example:

 event code     counter         failure         possible solution

 0x02E          PMC[3,0]        0               3
 0x043          PMC[2:0]        1               0
 0x045          PMC[2:0]        2               1
 0x046          PMC[2:0]        FAIL            2

The event scheduler may not select the correct counter in the first
cycle because it needs to know which subsequent events will be
scheduled. It may fail to schedule the events then.

To solve this, we now save the scheduler state of events with
overlapping counter counstraints.  If we fail to schedule the events
we rollback to those states and try to use another free counter.

Constraints with overlapping counters are marked with a new introduced
overlap flag. We set the overlap flag for such constraints to give the
scheduler a hint which events to select for counter rescheduling. The
EVENT_CONSTRAINT_OVERLAP() macro can be used for this.

Care must be taken as the rescheduling algorithm is O(n!) which will
increase scheduling cycles for an over-commited system dramatically.
The number of such EVENT_CONSTRAINT_OVERLAP() macros and its counter
masks must be kept at a minimum. Thus, the current stack is limited to
2 states to limit the number of loops the algorithm takes in the worst
case.

On systems with no overlapping-counter constraints, this
implementation does not increase the loop count compared to the
previous algorithm.

V2:
* Renamed redo -> overlap.
* Reimplementation using perf scheduling helper functions.

Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/kernel/cpu/perf_event.c     |   78 ++++++++++++++++++++++++++++++++--
 arch/x86/kernel/cpu/perf_event_amd.c |    2 +-
 2 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 44ec767..096744f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -74,6 +74,7 @@ struct event_constraint {
 	u64	code;
 	u64	cmask;
 	int	weight;
+	int	overlap;
 };
 
 struct amd_nb {
@@ -133,15 +134,40 @@ struct cpu_hw_events {
 	void			*kfree_on_online;
 };
 
-#define __EVENT_CONSTRAINT(c, n, m, w) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
 	{ .idxmsk64 = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
 	.weight = (w),			\
+	.overlap = (o),			\
 }
 
 #define EVENT_CONSTRAINT(c, n, m)	\
-	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ *  c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ *  c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ *  c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-commited system
+ * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m)	\
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
 
 /*
  * Constraint on the Event code.
@@ -798,11 +824,17 @@ struct sched_state {
 	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 };
 
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define	SCHED_STATES_MAX	2
+
 struct perf_sched {
 	int			max_weight;
 	int			max_events;
 	struct event_constraint	**constraints;
 	struct sched_state	state;
+
+	int			saved_states;
+	struct sched_state	saved[SCHED_STATES_MAX];
 };
 
 static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
@@ -817,7 +849,30 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
 	sched->state.unassigned	= num;
 }
 
-static struct sched_state *perf_sched_find_counter(struct perf_sched *sched)
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+	if (sched->saved_states >= SCHED_STATES_MAX)
+		return;
+
+	sched->saved[sched->saved_states] = sched->state;
+	sched->saved_states++;
+}
+
+static int perf_sched_restore_state(struct perf_sched *sched)
+{
+	if (!sched->saved_states)
+		return 0;
+
+	sched->saved_states--;
+	sched->state = sched->saved[sched->saved_states];
+
+	/* continue with next counter: */
+	clear_bit(sched->state.counter++, sched->state.used);
+
+	return 1;
+}
+
+static struct sched_state *__perf_sched_find_counter(struct perf_sched *sched)
 {
 	struct event_constraint *c;
 	int idx;
@@ -842,9 +897,24 @@ static struct sched_state *perf_sched_find_counter(struct perf_sched *sched)
 	if (idx >= X86_PMC_IDX_MAX)
 		return NULL;
 
+	if (c->overlap)
+		perf_sched_save_state(sched);
+
 	return &sched->state;
 }
 
+static struct sched_state *perf_sched_find_counter(struct perf_sched *sched)
+{
+	struct sched_state *state;
+
+	while (!(state = __perf_sched_find_counter(sched))) {
+		if (!perf_sched_restore_state(sched))
+			break;
+	}
+
+	return state;
+}
+
 static int perf_sched_next_event(struct perf_sched *sched)
 {
 	struct event_constraint *c;
@@ -1621,7 +1691,7 @@ static int __init init_hw_perf_events(void)
 
 	unconstrained = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
-				   0, x86_pmu.num_counters);
+				   0, x86_pmu.num_counters, 0);
 
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index ee9436c..7c259e0 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -473,7 +473,7 @@ static __initconst const struct x86_pmu amd_pmu = {
 static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
 static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
 static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 
-- 
1.7.6.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] perf, x86: Implement event scheduler helper functions
  2011-09-10 14:49 ` [PATCH 1/2] perf, x86: Implement event scheduler helper functions Robert Richter
@ 2011-09-14 14:43   ` Peter Zijlstra
  0 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2011-09-14 14:43 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Stephane Eranian, LKML

On Sat, 2011-09-10 at 16:49 +0200, Robert Richter wrote:
> This patch introduces x86 perf scheduler code helper functions. We
> need this to later add more complex functionality to support
> overlapping counter constraints (next patch).
> 
> The algorithm is modified so that the range of weight values is now
> generated from the constraints. There shouldn't be other functional
> changes.
> 
> With the helper functions the scheduler is controlled. There are
> functions to initialize, traverse the event list, find unused counters
> etc. The scheduler keeps its own state.
> 
> Cc: Stephane Eranian <eranian@google.com>
> Signed-off-by: Robert Richter <robert.richter@amd.com>
> ---
>  arch/x86/kernel/cpu/perf_event.c |  158 +++++++++++++++++++++++++-------------
>  1 files changed, 105 insertions(+), 53 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 594d425..44ec767 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -790,18 +790,118 @@ static inline int is_x86_event(struct perf_event *event)
>  	return event->pmu == &pmu;
>  }
>  
> +struct sched_state {
> +	int	weight;
> +	int	event;
> +	int	counter;
> +	int	unassigned;
> +	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> +};

Maybe add a few comments here? Took me a while to figure out unassigned
is the number of unassigned events.

> +static struct sched_state *perf_sched_find_counter(struct perf_sched *sched)
> +{
> +	struct event_constraint *c;
> +	int idx;
> +
> +	if (!sched->state.unassigned)
> +		return NULL;

So bail when we're done and there's nothing left to assign.

> +	c = sched->constraints[sched->state.event];
> +
> +	idx = sched->state.counter;

Which is typically 0, but this could be a restart, at which point we
continue looking where we left off.

> +	/* for each bit in idxmsk starting from idx */
> +	while (idx < X86_PMC_IDX_MAX) {
> +		idx = find_next_bit(c->idxmsk, X86_PMC_IDX_MAX, idx);
> +		if (idx == X86_PMC_IDX_MAX)
> +			break;
> +		if (!__test_and_set_bit(idx, sched->state.used))
> +			break;
> +		idx++;
> +	}

#define for_each_set_bit_continue(bit, addr, size) 	\
	for( ; (bit) < (size);				\
	       (bit) = find_next_bit((addr), (size), (bit) + 1))

	for_each_set_bit_continue(idx, c->idxmask, X86_PMC_IDX_MAX) {
		if (!__test_and_set_bit(idx, sched->state.used))
			break;
	}

> +	sched->state.counter = idx;
> +
> +	if (idx >= X86_PMC_IDX_MAX)
> +		return NULL;

OK, so its important to assign idx to counter even if we're too big,
because of the restart, right? That wants a comment.

> +
> +	return &sched->state;
> +}
> +
> +static int perf_sched_next_event(struct perf_sched *sched)
> +{
> +	struct event_constraint *c;
> +
> +	if (!sched->state.unassigned || !--sched->state.unassigned)
> +		return 0;

Shouldn't we avoid getting here if there's nothing to do? I get
the !--unassigned case, but am a bit puzzled by the !unassigned case.

> +	do {
> +		/* next event */
> +		sched->state.event++;
> +		if (sched->state.event >= sched->max_events) {
> +			/* next weight */
> +			sched->state.event = 0;
> +			sched->state.weight++;
> +			if (sched->state.weight > sched->max_weight)
> +				return 0;
> +		}
> +		c = sched->constraints[sched->state.event];
> +	} while (c->weight != sched->state.weight);
> +
> +	sched->state.counter = 0;	/* start with first counter */
> +
> +	return 1;
> +}

fair enough..

Looks ok otherwise, just a tad hard to grok in one go.. a few comments
could go a long way. I'm sure I'll have forgotten how it works in a few
weeks.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] perf, x86: Fix event scheduler for constraints with
  2011-09-10 14:49 ` [PATCH 2/2] perf, x86: Fix event scheduler for constraints with Robert Richter
@ 2011-09-14 14:45   ` Peter Zijlstra
  0 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2011-09-14 14:45 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Stephane Eranian, LKML

On Sat, 2011-09-10 at 16:49 +0200, Robert Richter wrote:
>  struct perf_sched {
>         int                     max_weight;
>         int                     max_events;
>         struct event_constraint **constraints;
>         struct sched_state      state;
> +
> +       int                     saved_states;

This creates an ugly hole in the structure

> +       struct sched_state      saved[SCHED_STATES_MAX];
>  };
>  
>  static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
> @@ -817,7 +849,30 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
>         sched->state.unassigned = num;
>  }
>  
> -static struct sched_state *perf_sched_find_counter(struct perf_sched *sched)
> +static void perf_sched_save_state(struct perf_sched *sched)
> +{
> +       if (sched->saved_states >= SCHED_STATES_MAX)
> +               return;
> +
> +       sched->saved[sched->saved_states] = sched->state;
> +       sched->saved_states++;
> +} 

Shouldn't we fail the save_state when we're out of states? the restore
code doesn't check to see if --saved_states is within range and simply
derefs the array.



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-09-14 14:45 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-10 14:49 [PATCH 0/2] perf, x86: handle overlapping counters Robert Richter
2011-09-10 14:49 ` [PATCH 1/2] perf, x86: Implement event scheduler helper functions Robert Richter
2011-09-14 14:43   ` Peter Zijlstra
2011-09-10 14:49 ` [PATCH 2/2] perf, x86: Fix event scheduler for constraints with Robert Richter
2011-09-14 14:45   ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox