From: Tianyang Chen <tiche@seas.upenn.edu>
To: xen-devel@lists.xenproject.org
Cc: dario.faggioli@citrix.com, george.dunlap@citrix.com,
Dagaen Golomb <dgolomb@seas.upenn.edu>,
Meng Xu <mengxu@cis.upenn.edu>
Subject: Re: [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model
Date: Wed, 24 Feb 2016 10:23:27 -0500 [thread overview]
Message-ID: <56CDCAEF.8000900@seas.upenn.edu> (raw)
In-Reply-To: <1454992407-5436-1-git-send-email-tiche@seas.upenn.edu>
Hey Dario: We are aiming for the next release and would appreciate it if
you can leave some comments on this version. Thanks.
Tianyang
On 2/8/2016 11:33 PM, Tianyang Chen wrote:
> Changes since v4:
> removed unnecessary replenishment queue checks in vcpu_wake()
> extended replq_remove() to all cases in vcpu_sleep()
> used _deadline_queue_insert() helper function for both queues
> _replq_insert() and _replq_remove() program timer internally
>
> Changes since v3:
> removed running queue.
> added repl queue to keep track of repl events.
> timer is now per scheduler.
> timer is init on a valid cpu in a cpupool.
>
> Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu>
> Signed-off-by: Meng Xu <mengxu@cis.upenn.edu>
> Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu>
> ---
> xen/common/sched_rt.c | 337 ++++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 251 insertions(+), 86 deletions(-)
>
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 2e5430f..1f0bb7b 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -16,6 +16,7 @@
> #include <xen/delay.h>
> #include <xen/event.h>
> #include <xen/time.h>
> +#include <xen/timer.h>
> #include <xen/perfc.h>
> #include <xen/sched-if.h>
> #include <xen/softirq.h>
> @@ -87,7 +88,7 @@
> #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
>
> #define UPDATE_LIMIT_SHIFT 10
> -#define MAX_SCHEDULE (MILLISECS(1))
> +
> /*
> * Flags
> */
> @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
> */
> static unsigned int nr_rt_ops;
>
> +/* handler for the replenishment timer */
> +static void repl_handler(void *data);
> +
> +/* checks if a timer is active or not */
> +bool_t active_timer(struct timer* t);
> +
> /*
> * Systme-wide private data, include global RunQueue/DepletedQ
> * Global lock is referenced by schedule_data.schedule_lock from all
> @@ -152,7 +159,9 @@ struct rt_private {
> struct list_head sdom; /* list of availalbe domains, used for dump */
> struct list_head runq; /* ordered list of runnable vcpus */
> struct list_head depletedq; /* unordered list of depleted vcpus */
> + struct list_head replq; /* ordered list of vcpus that need replenishment */
> cpumask_t tickled; /* cpus been tickled */
> + struct timer *repl_timer; /* replenishment timer */
> };
>
> /*
> @@ -160,6 +169,7 @@ struct rt_private {
> */
> struct rt_vcpu {
> struct list_head q_elem; /* on the runq/depletedq list */
> + struct list_head replq_elem;/* on the repl event list */
>
> /* Up-pointers */
> struct rt_dom *sdom;
> @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops)
> return &rt_priv(ops)->depletedq;
> }
>
> +static inline struct list_head *rt_replq(const struct scheduler *ops)
> +{
> + return &rt_priv(ops)->replq;
> +}
> +
> /*
> - * Queue helper functions for runq and depletedq
> + * Queue helper functions for runq, depletedq
> + * and replenishment event queue
> */
> static int
> __vcpu_on_q(const struct rt_vcpu *svc)
> @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
> return list_entry(elem, struct rt_vcpu, q_elem);
> }
>
> +static struct rt_vcpu *
> +__replq_elem(struct list_head *elem)
> +{
> + return list_entry(elem, struct rt_vcpu, replq_elem);
> +}
> +
> +static int
> +__vcpu_on_replq(const struct rt_vcpu *svc)
> +{
> + return !list_empty(&svc->replq_elem);
> +}
> +
> /*
> * Debug related code, dump vcpu/cpu information
> */
> @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
> static void
> rt_dump(const struct scheduler *ops)
> {
> - struct list_head *runq, *depletedq, *iter;
> + struct list_head *runq, *depletedq, *replq, *iter;
> struct rt_private *prv = rt_priv(ops);
> struct rt_vcpu *svc;
> struct rt_dom *sdom;
> @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)
>
> runq = rt_runq(ops);
> depletedq = rt_depletedq(ops);
> + replq = rt_replq(ops);
>
> printk("Global RunQueue info:\n");
> list_for_each( iter, runq )
> @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
> rt_dump_vcpu(ops, svc);
> }
>
> + printk("Global Replenishment Event info:\n");
> + list_for_each( iter, replq )
> + {
> + svc = __replq_elem(iter);
> + rt_dump_vcpu(ops, svc);
> + }
> +
> printk("Domain info:\n");
> list_for_each( iter, &prv->sdom )
> {
> @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
> }
>
> /*
> + * Removing a vcpu from the replenishment queue could
> + * re-program the timer for the next replenishment event
> + * if the timer is currently active
> + */
> +static inline void
> +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> + struct rt_private *prv = rt_priv(ops);
> + struct list_head *replq = rt_replq(ops);
> + struct timer* repl_timer = prv->repl_timer;
> +
> + if ( __vcpu_on_replq(svc) )
> + {
> + /*
> + * disarm the timer if removing the first replenishment event
> + * which is going to happen next
> + */
> + if( active_timer(repl_timer) )
> + {
> + struct rt_vcpu *next_repl = __replq_elem(replq->next);
> +
> + if( next_repl->cur_deadline == svc->cur_deadline )
> + repl_timer->expires = 0;
> +
> + list_del_init(&svc->replq_elem);
> +
> + /* re-arm the timer for the next replenishment event */
> + if( !list_empty(replq) )
> + {
> + struct rt_vcpu *svc_next = __replq_elem(replq->next);
> + set_timer(repl_timer, svc_next->cur_deadline);
> + }
> + }
> +
> + else
> + list_del_init(&svc->replq_elem);
> + }
> +}
> +
> +/*
> + * An utility function that inserts a vcpu to a
> + * queue based on certain order (EDF)
> + */
> +static void
> +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem),
> + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue)
> +{
> + struct list_head *iter;
> +
> + list_for_each(iter, queue)
> + {
> + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
> + if ( svc->cur_deadline <= iter_svc->cur_deadline )
> + break;
> + }
> +
> + list_add_tail(elem, iter);
> +}
> +
> +/*
> * Insert svc with budget in RunQ according to EDF:
> * vcpus with smaller deadlines go first.
> * Insert svc without budget in DepletedQ unsorted;
> @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
> {
> struct rt_private *prv = rt_priv(ops);
> struct list_head *runq = rt_runq(ops);
> - struct list_head *iter;
>
> ASSERT( spin_is_locked(&prv->lock) );
>
> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
>
> /* add svc to runq if svc still has budget */
> if ( svc->cur_budget > 0 )
> - {
> - list_for_each(iter, runq)
> - {
> - struct rt_vcpu * iter_svc = __q_elem(iter);
> - if ( svc->cur_deadline <= iter_svc->cur_deadline )
> - break;
> - }
> - list_add_tail(&svc->q_elem, iter);
> - }
> + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
> else
> - {
> list_add(&svc->q_elem, &prv->depletedq);
> - }
> }
>
> /*
> + * Insert svc into the repl even list:
> + * vcpus that needs to be repl earlier go first.
> + * scheduler private lock serializes this operation
> + * it could re-program the timer if it fires later than
> + * this vcpu's cur_deadline. Also, this is used to program
> + * the timer for the first time.
> + */
> +static void
> +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> + struct list_head *replq = rt_replq(ops);
> + struct rt_private *prv = rt_priv(ops);
> + struct timer *repl_timer = prv->repl_timer;
> +
> + ASSERT( !__vcpu_on_replq(svc) );
> +
> + _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq);
> +
> + if( repl_timer->expires == 0 ||
> + ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline ) )
> + set_timer(repl_timer,svc->cur_deadline);
> +}
> +
> +
> +/*
> * Init/Free related code
> */
> static int
> @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops)
> INIT_LIST_HEAD(&prv->sdom);
> INIT_LIST_HEAD(&prv->runq);
> INIT_LIST_HEAD(&prv->depletedq);
> + INIT_LIST_HEAD(&prv->replq);
>
> cpumask_clear(&prv->tickled);
>
> ops->sched_data = prv;
>
> + /*
> + * The timer initialization will happen later when
> + * the first pcpu is added to this pool in alloc_pdata
> + */
> + prv->repl_timer = NULL;
> +
> return 0;
>
> no_mem:
> @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops)
> xfree(_cpumask_scratch);
> _cpumask_scratch = NULL;
> }
> +
> + kill_timer(prv->repl_timer);
> + xfree(prv->repl_timer);
> +
> xfree(prv);
> }
>
> @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
> if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) )
> return NULL;
>
> + if( prv->repl_timer == NULL )
> + {
> + /* allocate the timer on the first cpu of this pool */
> + prv->repl_timer = xzalloc(struct timer);
> +
> + if(prv->repl_timer == NULL )
> + return NULL;
> +
> + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu);
> + }
> +
> /* 1 indicates alloc. succeed in schedule.c */
> return (void *)1;
> }
> @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
> return NULL;
>
> INIT_LIST_HEAD(&svc->q_elem);
> + INIT_LIST_HEAD(&svc->replq_elem);
> svc->flags = 0U;
> svc->sdom = dd;
> svc->vcpu = vc;
> @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv)
> }
>
> /*
> - * This function is called in sched_move_domain() in schedule.c
> + * It is called in sched_move_domain() and sched_init_vcpu
> + * in schedule.c
> * When move a domain to a new cpupool.
> * It inserts vcpus of moving domain to the scheduler's RunQ in
> * dest. cpupool.
> @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
> lock = vcpu_schedule_lock_irq(vc);
> if ( __vcpu_on_q(svc) )
> __q_remove(svc);
> +
> + if( __vcpu_on_replq(svc) )
> + __replq_remove(ops,svc);
> +
> vcpu_schedule_unlock_irq(lock, vc);
> }
>
> @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask)
> }
>
> /*
> - * Update vcpu's budget and
> - * sort runq by insert the modifed vcpu back to runq
> - * lock is grabbed before calling this function
> - */
> -static void
> -__repl_update(const struct scheduler *ops, s_time_t now)
> -{
> - struct list_head *runq = rt_runq(ops);
> - struct list_head *depletedq = rt_depletedq(ops);
> - struct list_head *iter;
> - struct list_head *tmp;
> - struct rt_vcpu *svc = NULL;
> -
> - list_for_each_safe(iter, tmp, runq)
> - {
> - svc = __q_elem(iter);
> - if ( now < svc->cur_deadline )
> - break;
> -
> - rt_update_deadline(now, svc);
> - /* reinsert the vcpu if its deadline is updated */
> - __q_remove(svc);
> - __runq_insert(ops, svc);
> - }
> -
> - list_for_each_safe(iter, tmp, depletedq)
> - {
> - svc = __q_elem(iter);
> - if ( now >= svc->cur_deadline )
> - {
> - rt_update_deadline(now, svc);
> - __q_remove(svc); /* remove from depleted queue */
> - __runq_insert(ops, svc); /* add to runq */
> - }
> - }
> -}
> -
> -/*
> * schedule function for rt scheduler.
> * The lock is already grabbed in schedule.c, no need to lock here
> */
> @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
> /* burn_budget would return for IDLE VCPU */
> burn_budget(ops, scurr, now);
>
> - __repl_update(ops, now);
>
> if ( tasklet_work_scheduled )
> {
> @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
> set_bit(__RTDS_delayed_runq_add, &scurr->flags);
>
> snext->last_start = now;
> +
> + ret.time = -1; /* if an idle vcpu is picked */
> if ( !is_idle_vcpu(snext->vcpu) )
> {
> if ( snext != scurr )
> @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
> snext->vcpu->processor = cpu;
> ret.migrated = 1;
> }
> +
> + ret.time = snext->budget; /* invoke the scheduler next time */
> +
> }
>
> - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
> ret.task = snext->vcpu;
>
> /* TRACE */
> @@ -914,7 +1017,7 @@ static void
> rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
> {
> struct rt_vcpu * const svc = rt_vcpu(vc);
> -
> +
> BUG_ON( is_idle_vcpu(vc) );
> SCHED_STAT_CRANK(vcpu_sleep);
>
> @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
> __q_remove(svc);
> else if ( svc->flags & RTDS_delayed_runq_add )
> clear_bit(__RTDS_delayed_runq_add, &svc->flags);
> +
> + if( __vcpu_on_replq(svc) )
> + __replq_remove(ops, svc);
> }
>
> /*
> @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
> {
> struct rt_vcpu * const svc = rt_vcpu(vc);
> s_time_t now = NOW();
> - struct rt_private *prv = rt_priv(ops);
> - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
> - struct rt_dom *sdom = NULL;
> - cpumask_t *online;
>
> BUG_ON( is_idle_vcpu(vc) );
>
> @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
> else
> SCHED_STAT_CRANK(vcpu_wake_not_runnable);
>
> + /* budget repl here is needed before inserting back to runq. If so,
> + * it should be re-inserted back to the replenishment queue.
> + */
> + if ( now >= svc->cur_deadline)
> + {
> + rt_update_deadline(now, svc);
> + __replq_remove(ops, svc);
> + }
> +
> + if( !__vcpu_on_replq(svc) )
> + __replq_insert(ops, svc);
> +
> /* If context hasn't been saved for this vcpu yet, we can't put it on
> * the Runqueue/DepletedQ. Instead, we set a flag so that it will be
> * put on the Runqueue/DepletedQ after the context has been saved.
> @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
> return;
> }
>
> - if ( now >= svc->cur_deadline)
> - rt_update_deadline(now, svc);
> -
> /* insert svc to runq/depletedq because svc is not in queue now */
> __runq_insert(ops, svc);
>
> - __repl_update(ops, now);
> -
> - ASSERT(!list_empty(&prv->sdom));
> - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> - online = cpupool_domain_cpumask(sdom->dom);
> - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */
> -
> - runq_tickle(ops, snext);
> -
> - return;
> + runq_tickle(ops, svc);
> }
>
> /*
> @@ -1087,10 +1189,6 @@ static void
> rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
> {
> struct rt_vcpu *svc = rt_vcpu(vc);
> - struct rt_vcpu *snext = NULL;
> - struct rt_dom *sdom = NULL;
> - struct rt_private *prv = rt_priv(ops);
> - cpumask_t *online;
> spinlock_t *lock = vcpu_schedule_lock_irq(vc);
>
> clear_bit(__RTDS_scheduled, &svc->flags);
> @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
> likely(vcpu_runnable(vc)) )
> {
> __runq_insert(ops, svc);
> - __repl_update(ops, NOW());
> -
> - ASSERT(!list_empty(&prv->sdom));
> - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> - online = cpupool_domain_cpumask(sdom->dom);
> - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
> -
> - runq_tickle(ops, snext);
> + runq_tickle(ops, svc);
> }
> out:
> vcpu_schedule_unlock_irq(lock, vc);
> @@ -1168,6 +1259,80 @@ rt_dom_cntl(
> return rc;
> }
>
> +/*
> + * The replenishment timer handler picks vcpus
> + * from the replq and does the actual replenishment
> + */
> +static void repl_handler(void *data){
> + unsigned long flags;
> + s_time_t now = NOW();
> + struct scheduler *ops = data;
> + struct rt_private *prv = rt_priv(ops);
> + struct list_head *replq = rt_replq(ops);
> + struct timer *repl_timer = prv->repl_timer;
> + struct list_head *iter, *tmp;
> + struct rt_vcpu *svc = NULL;
> +
> + spin_lock_irqsave(&prv->lock, flags);
> +
> + stop_timer(repl_timer);
> +
> + list_for_each_safe(iter, tmp, replq)
> + {
> + svc = __replq_elem(iter);
> +
> + if ( now >= svc->cur_deadline )
> + {
> + rt_update_deadline(now, svc);
> +
> + /*
> + * when the replenishment happens
> + * svc is either on a pcpu or on
> + * runq/depletedq
> + */
> + if( __vcpu_on_q(svc) )
> + {
> + /* put back to runq */
> + __q_remove(svc);
> + __runq_insert(ops, svc);
> + }
> +
> + /*
> + * tickle regardless where it's at
> + * because a running vcpu could have
> + * a later deadline than others after
> + * replenishment
> + */
> + runq_tickle(ops, svc);
> +
> + /* update replenishment event queue */
> + __replq_remove(ops, svc);
> + __replq_insert(ops, svc);
> + }
> +
> + else
> + break;
> + }
> +
> + /*
> + * use the vcpu that's on the top
> + * or else don't program the timer
> + */
> + if( !list_empty(replq) )
> + set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline);
> +
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> +}
> +
> +/* checks if a timer has been stopped or not */
> +bool_t active_timer(struct timer *timer)
> +{
> + ASSERT(timer->status >= TIMER_STATUS_inactive);
> + ASSERT(timer->status <= TIMER_STATUS_in_list);
> + return (timer->status >= TIMER_STATUS_in_heap);
> +}
> +
> static struct rt_private _rt_priv;
>
> static const struct scheduler sched_rtds_def = {
>
next prev parent reply other threads:[~2016-02-24 15:25 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-09 4:33 [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model Tianyang Chen
2016-02-16 3:55 ` Meng Xu
2016-02-18 1:55 ` Tianyang Chen
2016-02-24 15:23 ` Tianyang Chen [this message]
2016-02-25 2:02 ` Dario Faggioli
2016-02-25 6:15 ` Tianyang Chen
2016-02-25 10:34 ` Dario Faggioli
2016-02-25 17:29 ` Tianyang Chen
2016-02-25 17:51 ` Dario Faggioli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56CDCAEF.8000900@seas.upenn.edu \
--to=tiche@seas.upenn.edu \
--cc=dario.faggioli@citrix.com \
--cc=dgolomb@seas.upenn.edu \
--cc=george.dunlap@citrix.com \
--cc=mengxu@cis.upenn.edu \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.