From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752986Ab0ENOzQ (ORCPT ); Fri, 14 May 2010 10:55:16 -0400 Received: from bombadil.infradead.org ([18.85.46.34]:58889 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751985Ab0ENOzO (ORCPT ); Fri, 14 May 2010 10:55:14 -0400 Subject: Re: [RFC] perf_events: ctx_flexible_sched_in() not maximizing PMU utilization From: Peter Zijlstra To: Stephane Eranian Cc: Frederic Weisbecker , LKML , mingo@elte.hu, Paul Mackerras , "David S. Miller" In-Reply-To: References: <1273155640.5605.300.camel@twins> <20100506171141.GA5562@nowhere> <1273167024.1642.256.camel@laptop> <1273220736.1642.318.camel@laptop> <1273226796.1642.333.camel@laptop> <1273230948.1642.351.camel@laptop> Content-Type: text/plain; charset="UTF-8" Date: Fri, 14 May 2010 16:55:08 +0200 Message-ID: <1273848908.1626.315.camel@laptop> Mime-Version: 1.0 X-Mailer: Evolution 2.28.3 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Mon, 2010-05-10 at 11:41 +0200, Stephane Eranian wrote: > Looks like a good solution, at least better than what is there right now. Something like the below I guess, still need to make it an actual patch though ;-) The interesting bit is the schedule() function which tries to schedule two queues fairly (cpu context and task context), and selects between the two based on lag -- I'm not quite sure that that works out as expected though, still have to do the actual math. Also, I guess we should come up with a saner solution for the per-task-per-cpu events (the unservicable stuff) --- struct flexible_queue { struct rb_root waiting; struct rb_node *rb_leftmost; u64 min_service; u64 rel_avg; u64 clock; u64 running_stamp; struct list_head running; struct list_head unservicable; int unservicable_cpu; int nr_waiting; }; enum flexible_type { flexible_waiting, flexible_running, flexible_unservicable, }; struct flexible_event { union { struct rb_node rb_entry; struct list_head list_entry; }; enum flexible_type type; u64 service; }; static inline s64 flexible_event_rel(struct flexible_queue *fq, struct flexible_event *fe) { return fe->sevice - fq->min_service; } static void __enqueue_event(struct flexible_queue *fq, struct flexible_event *fe) { struct rb_node **link = &fq->waiting.rb_node; struct rb_node *parent = NULL; struct flexible_event *entry; s64 rel = flexible_event_rel(fq, fe); int leftmost = 1; if (rel > 0) { fq->min_service += rel; fq->rel_avg -= fq->nr_waiting * rel; rel = 0; } fq->nr_waiting++; fq->rel_avg += rel; fe->type = flexible_waiting; while (*link) { parent = *link; entry = rb_entry(parent, struct flexible_event, rb_entry); /* * We dont care about collisions. Nodes with * the same rel stay together. */ if (rel < flexible_event_rel(fq, fe)) { link = &parent->rb_left; } else { link = &parent->rb_right; leftmost = 0; } } if (leftmost) cfs_rq->rb_leftmost = &se->rb_entry; rb_link_node(&fe->rb_entry, parent, link); rb_insert_color(&fe->rb_entry, &fq->waiting); } static void __dequeue_event(struct flexible_queue *fq, struct flexible_event *fe) { if (fq->rb_leftmost == &fe->rb_entry) { struct rb_node *next_node; next_node = rb_next(&fe->rb_entry); fq->rb_leftmost = next_node; } rb_erase(&fe->rb_entry, &fq->waiting); fq->nr_waiting--; fq->rel_avg -= flexible_event_rel(fq, fe); } static void flexible_event_add(struct flexible_queue *fq, struct flexible_event *fe) { fe->service = fq->min_service + fq->rel_avg; __enqueue_event(fq, fe); } static void flexible_event_del(struct flexible_queue *fq, struct flexible_event *fe) { switch (fe->type) { case flexible_waiting: __dequeue_event(fq, fe); break; case flexible_running: case flexible_unservicable: list_del(&fe->list_entry); break; } } static void flexible_unschedule(struct flexible_queue *fq) { struct flexible_event *fe, *tmp; s64 delta = (s64)(fq->clock - fq->running_stamp); list_for_each_entry_safe(fe, tmp, &fq->running, list_entry) { list_del(&fe->list_entry); fe->service += delta; __enqueue_event(fq, fe); } } static s64 flexible_event_lag(struct flexible_queue *fq, struct flexible_event *fe) { return fq->min_service + (fq->rel_avg / fq->nr_waiting) - fe->service; } static struct flexible_event *__pick_event(struct flexible_queue *fq) { struct flexible_event *fe; if (!fq->rb_leftmost) return NULL; fe = rb_entry(fq->rb_leftmost, struct flexible_event, rb_entry); return fe; } static int flexible_schedule(struct flexible_queue *fq1, struct flexible_queue *fq2) { struct flexible_event *tmp, *fe; struct flexible_queue *fq; s64 tmp_lag, max_lag; if (fq->unservicable_cpu != smp_processor_id()) { list_for_each_entry_save(fe, tmp, &fq->unservicable, list_entry) { list_del(&fe->list_entry); flexible_event_add(fq, fe); } fq->unservicable_cpu = smp_processor_id(); } again: max_lag = 0; fe = NULL; tmp =__pick_event(fq1); if (tmp) { tmp_lag = flexible_event_lag(fq1, fe1); if (tmp_lag > max_lag) { fq = fq1; fe = fe1; max_lag = tmp_lag; } } tmp =__pick_event(fq2); if (tmp) { tmp_lag = flexible_event_lag(fq2, fe2); if (tmp_lag > max_lag) { fq = fq2; fe = fe2; max_lag = tmp_lag; } } if (!fe) return 1; /* no more events to schedule */ fq->running_stamp = fq->clock; if (event_of(fe)->cpu != -1 && event_of(fe)->cpu != smp_processor_id()) { __dequeue_event(fq, fe); fe->type = flexible_unservicable; list_add(&fe->list_entry, &fq->unservicable); goto again; /* can't run this event, try another */ } if (group_sched_in(event_of(fe), ...) == 0) { __dequeue_event(fq, fe); fe->type = flexible_running; list_add(&fe->list_entry, &fq->running); return 0; /* success */ } return -EBUSY; /* doesn't fit */ }