All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Stephane Eranian <eranian@google.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
	LKML <linux-kernel@vger.kernel.org>,
	mingo@elte.hu, Paul Mackerras <paulus@samba.org>,
	"David S. Miller" <davem@davemloft.net>
Subject: Re: [RFC] perf_events: ctx_flexible_sched_in() not maximizing PMU  utilization
Date: Fri, 14 May 2010 16:55:08 +0200	[thread overview]
Message-ID: <1273848908.1626.315.camel@laptop> (raw)
In-Reply-To: <AANLkTimu41pPmimFK10oQl0_Qh07vc3pJbxwNp-jDtTM@mail.gmail.com>

On Mon, 2010-05-10 at 11:41 +0200, Stephane Eranian wrote:
> Looks like a good solution, at least better than what is there right now.

Something like the below I guess, still need to make it an actual patch
though ;-)

The interesting bit is the schedule() function which tries to schedule
two queues fairly (cpu context and task context), and selects between
the two based on lag -- I'm not quite sure that that works out as
expected though, still have to do the actual math.

Also, I guess we should come up with a saner solution for the
per-task-per-cpu events (the unservicable stuff)

---


struct flexible_queue {
	struct rb_root waiting;
	struct rb_node *rb_leftmost;

	u64 min_service;
	u64 rel_avg;

	u64 clock;
	u64 running_stamp;

	struct list_head running;
	struct list_head unservicable;
	int unservicable_cpu;
	int nr_waiting;
};

enum flexible_type {
	flexible_waiting,
	flexible_running,
	flexible_unservicable,
};

struct flexible_event {
	union {
		struct rb_node rb_entry;
		struct list_head list_entry;
	};
	enum flexible_type type;

	u64 service;
};

static inline
s64 flexible_event_rel(struct flexible_queue *fq, struct flexible_event *fe)
{
	return fe->sevice - fq->min_service;
}

static void __enqueue_event(struct flexible_queue *fq, struct flexible_event *fe)
{
	struct rb_node **link = &fq->waiting.rb_node;
	struct rb_node *parent = NULL;
	struct flexible_event *entry;
	s64 rel = flexible_event_rel(fq, fe);
	int leftmost = 1;

	if (rel > 0) {
		fq->min_service += rel;
		fq->rel_avg -= fq->nr_waiting * rel;
		rel = 0;
	}

	fq->nr_waiting++;
	fq->rel_avg += rel;

	fe->type = flexible_waiting;

	while (*link) {
		parent = *link;
		entry = rb_entry(parent, struct flexible_event, rb_entry);
		/*
		 * We dont care about collisions. Nodes with
		 * the same rel stay together.
		 */
		if (rel < flexible_event_rel(fq, fe)) {
			link = &parent->rb_left;
		} else {
			link = &parent->rb_right;
			leftmost = 0;
		}
	}

	if (leftmost)
		cfs_rq->rb_leftmost = &se->rb_entry;

	rb_link_node(&fe->rb_entry, parent, link);
	rb_insert_color(&fe->rb_entry, &fq->waiting);
}

static void __dequeue_event(struct flexible_queue *fq, struct flexible_event *fe)
{
	if (fq->rb_leftmost == &fe->rb_entry) {
		struct rb_node *next_node;

		next_node = rb_next(&fe->rb_entry);
		fq->rb_leftmost = next_node;
	}

	rb_erase(&fe->rb_entry, &fq->waiting);

	fq->nr_waiting--;
	fq->rel_avg -= flexible_event_rel(fq, fe);
}

static void
flexible_event_add(struct flexible_queue *fq, struct flexible_event *fe)
{
	fe->service = fq->min_service + fq->rel_avg;
	__enqueue_event(fq, fe);
}

static void
flexible_event_del(struct flexible_queue *fq, struct flexible_event *fe)
{
	switch (fe->type) {
	case flexible_waiting:
		__dequeue_event(fq, fe);
		break;

	case flexible_running:
	case flexible_unservicable:
		list_del(&fe->list_entry);
		break;
	}
}

static void flexible_unschedule(struct flexible_queue *fq)
{
	struct flexible_event *fe, *tmp;
	s64 delta = (s64)(fq->clock - fq->running_stamp);

	list_for_each_entry_safe(fe, tmp, &fq->running, list_entry) {
		list_del(&fe->list_entry);
		fe->service += delta;
		__enqueue_event(fq, fe);
	}
}

static
s64 flexible_event_lag(struct flexible_queue *fq, struct flexible_event *fe)
{
	return fq->min_service + (fq->rel_avg / fq->nr_waiting) - fe->service;
}

static struct flexible_event *__pick_event(struct flexible_queue *fq)
{
	struct flexible_event *fe;

	if (!fq->rb_leftmost)
		return NULL;

	fe = rb_entry(fq->rb_leftmost, struct flexible_event, rb_entry);

	return fe;
}

static
int flexible_schedule(struct flexible_queue *fq1, struct flexible_queue *fq2)
{
	struct flexible_event *tmp, *fe;
	struct flexible_queue *fq;
	s64 tmp_lag, max_lag;

	if (fq->unservicable_cpu != smp_processor_id()) {
		list_for_each_entry_save(fe, tmp, &fq->unservicable, list_entry) {
			list_del(&fe->list_entry);
			flexible_event_add(fq, fe);
		}

		fq->unservicable_cpu = smp_processor_id();
	}

again:
	max_lag = 0;
	fe = NULL;

	tmp =__pick_event(fq1);
	if (tmp) {
		tmp_lag = flexible_event_lag(fq1, fe1);
		if (tmp_lag > max_lag) {
			fq = fq1;
			fe = fe1;
			max_lag = tmp_lag;
		}
	}

	tmp =__pick_event(fq2);
	if (tmp) {
		tmp_lag = flexible_event_lag(fq2, fe2);
		if (tmp_lag > max_lag) {
			fq = fq2;
			fe = fe2;
			max_lag = tmp_lag;
		}
	}

	if (!fe)
		return 1; /* no more events to schedule */

	fq->running_stamp = fq->clock;

	if (event_of(fe)->cpu != -1 && event_of(fe)->cpu != smp_processor_id()) {
		__dequeue_event(fq, fe);
		fe->type = flexible_unservicable;
		list_add(&fe->list_entry, &fq->unservicable);
		goto again; /* can't run this event, try another */
	}

	if (group_sched_in(event_of(fe), ...) == 0) {
		__dequeue_event(fq, fe);
		fe->type = flexible_running;
		list_add(&fe->list_entry, &fq->running);
		return 0; /* success */
	}

	return -EBUSY; /* doesn't fit */
}



  reply	other threads:[~2010-05-14 14:55 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-06 14:03 [RFC] perf_events: ctx_flexible_sched_in() not maximizing PMU utilization Stephane Eranian
2010-05-06 14:20 ` Peter Zijlstra
2010-05-06 14:41   ` Stephane Eranian
2010-05-06 15:08     ` Peter Zijlstra
2010-05-06 16:26       ` Stephane Eranian
2010-05-06 17:11   ` Frederic Weisbecker
2010-05-06 17:30     ` Peter Zijlstra
2010-05-07  8:25       ` Peter Zijlstra
2010-05-07  8:44         ` Peter Zijlstra
2010-05-07  9:37         ` Stephane Eranian
2010-05-07 10:06           ` Peter Zijlstra
2010-05-07 10:49             ` Stephane Eranian
2010-05-07 11:15               ` Peter Zijlstra
2010-05-10  9:41                 ` Stephane Eranian
2010-05-14 14:55                   ` Peter Zijlstra [this message]
2010-05-14 15:07                     ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1273848908.1626.315.camel@laptop \
    --to=peterz@infradead.org \
    --cc=davem@davemloft.net \
    --cc=eranian@google.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.