From: Tianyang Chen <tiche@seas.upenn.edu>
To: xen-devel@lists.xenproject.org
Cc: dario.faggioli@citrix.com, george.dunlap@citrix.com,
Dagaen Golomb <dgolomb@seas.upenn.edu>,
Meng Xu <mengxu@cis.upenn.edu>
Subject: Re: [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model
Date: Wed, 24 Feb 2016 10:23:27 -0500 [thread overview]
Message-ID: <56CDCAEF.8000900@seas.upenn.edu> (raw)
In-Reply-To: <1454992407-5436-1-git-send-email-tiche@seas.upenn.edu>
Hey Dario: We are aiming for the next release and would appreciate it if
you can leave some comments on this version. Thanks.
Tianyang
On 2/8/2016 11:33 PM, Tianyang Chen wrote:
> Changes since v4:
> removed unnecessary replenishment queue checks in vcpu_wake()
> extended replq_remove() to all cases in vcpu_sleep()
> used _deadline_queue_insert() helper function for both queues
> _replq_insert() and _replq_remove() program timer internally
>
> Changes since v3:
> removed running queue.
> added repl queue to keep track of repl events.
> timer is now per scheduler.
> timer is init on a valid cpu in a cpupool.
>
> Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu>
> Signed-off-by: Meng Xu <mengxu@cis.upenn.edu>
> Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu>
> ---
> xen/common/sched_rt.c | 337 ++++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 251 insertions(+), 86 deletions(-)
>
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 2e5430f..1f0bb7b 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -16,6 +16,7 @@
> #include <xen/delay.h>
> #include <xen/event.h>
> #include <xen/time.h>
> +#include <xen/timer.h>
> #include <xen/perfc.h>
> #include <xen/sched-if.h>
> #include <xen/softirq.h>
> @@ -87,7 +88,7 @@
> #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
>
> #define UPDATE_LIMIT_SHIFT 10
> -#define MAX_SCHEDULE (MILLISECS(1))
> +
> /*
> * Flags
> */
> @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
> */
> static unsigned int nr_rt_ops;
>
> +/* handler for the replenishment timer */
> +static void repl_handler(void *data);
> +
> +/* checks if a timer is active or not */
> +bool_t active_timer(struct timer* t);
> +
> /*
> * Systme-wide private data, include global RunQueue/DepletedQ
> * Global lock is referenced by schedule_data.schedule_lock from all
> @@ -152,7 +159,9 @@ struct rt_private {
> struct list_head sdom; /* list of availalbe domains, used for dump */
> struct list_head runq; /* ordered list of runnable vcpus */
> struct list_head depletedq; /* unordered list of depleted vcpus */
> + struct list_head replq; /* ordered list of vcpus that need replenishment */
> cpumask_t tickled; /* cpus been tickled */
> + struct timer *repl_timer; /* replenishment timer */
> };
>
> /*
> @@ -160,6 +169,7 @@ struct rt_private {
> */
> struct rt_vcpu {
> struct list_head q_elem; /* on the runq/depletedq list */
> + struct list_head replq_elem;/* on the repl event list */
>
> /* Up-pointers */
> struct rt_dom *sdom;
> @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops)
> return &rt_priv(ops)->depletedq;
> }
>
> +static inline struct list_head *rt_replq(const struct scheduler *ops)
> +{
> + return &rt_priv(ops)->replq;
> +}
> +
> /*
> - * Queue helper functions for runq and depletedq
> + * Queue helper functions for runq, depletedq
> + * and replenishment event queue
> */
> static int
> __vcpu_on_q(const struct rt_vcpu *svc)
> @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
> return list_entry(elem, struct rt_vcpu, q_elem);
> }
>
> +static struct rt_vcpu *
> +__replq_elem(struct list_head *elem)
> +{
> + return list_entry(elem, struct rt_vcpu, replq_elem);
> +}
> +
> +static int
> +__vcpu_on_replq(const struct rt_vcpu *svc)
> +{
> + return !list_empty(&svc->replq_elem);
> +}
> +
> /*
> * Debug related code, dump vcpu/cpu information
> */
> @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
> static void
> rt_dump(const struct scheduler *ops)
> {
> - struct list_head *runq, *depletedq, *iter;
> + struct list_head *runq, *depletedq, *replq, *iter;
> struct rt_private *prv = rt_priv(ops);
> struct rt_vcpu *svc;
> struct rt_dom *sdom;
> @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)
>
> runq = rt_runq(ops);
> depletedq = rt_depletedq(ops);
> + replq = rt_replq(ops);
>
> printk("Global RunQueue info:\n");
> list_for_each( iter, runq )
> @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
> rt_dump_vcpu(ops, svc);
> }
>
> + printk("Global Replenishment Event info:\n");
> + list_for_each( iter, replq )
> + {
> + svc = __replq_elem(iter);
> + rt_dump_vcpu(ops, svc);
> + }
> +
> printk("Domain info:\n");
> list_for_each( iter, &prv->sdom )
> {
> @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
> }
>
> /*
> + * Removing a vcpu from the replenishment queue could
> + * re-program the timer for the next replenishment event
> + * if the timer is currently active
> + */
> +static inline void
> +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> + struct rt_private *prv = rt_priv(ops);
> + struct list_head *replq = rt_replq(ops);
> + struct timer* repl_timer = prv->repl_timer;
> +
> + if ( __vcpu_on_replq(svc) )
> + {
> + /*
> + * disarm the timer if removing the first replenishment event
> + * which is going to happen next
> + */
> + if( active_timer(repl_timer) )
> + {
> + struct rt_vcpu *next_repl = __replq_elem(replq->next);
> +
> + if( next_repl->cur_deadline == svc->cur_deadline )
> + repl_timer->expires = 0;
> +
> + list_del_init(&svc->replq_elem);
> +
> + /* re-arm the timer for the next replenishment event */
> + if( !list_empty(replq) )
> + {
> + struct rt_vcpu *svc_next = __replq_elem(replq->next);
> + set_timer(repl_timer, svc_next->cur_deadline);
> + }
> + }
> +
> + else
> + list_del_init(&svc->replq_elem);
> + }
> +}
> +
> +/*
> + * An utility function that inserts a vcpu to a
> + * queue based on certain order (EDF)
> + */
> +static void
> +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem),
> + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue)
> +{
> + struct list_head *iter;
> +
> + list_for_each(iter, queue)
> + {
> + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
> + if ( svc->cur_deadline <= iter_svc->cur_deadline )
> + break;
> + }
> +
> + list_add_tail(elem, iter);
> +}
> +
> +/*
> * Insert svc with budget in RunQ according to EDF:
> * vcpus with smaller deadlines go first.
> * Insert svc without budget in DepletedQ unsorted;
> @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
> {
> struct rt_private *prv = rt_priv(ops);
> struct list_head *runq = rt_runq(ops);
> - struct list_head *iter;
>
> ASSERT( spin_is_locked(&prv->lock) );
>
> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
>
> /* add svc to runq if svc still has budget */
> if ( svc->cur_budget > 0 )
> - {
> - list_for_each(iter, runq)
> - {
> - struct rt_vcpu * iter_svc = __q_elem(iter);
> - if ( svc->cur_deadline <= iter_svc->cur_deadline )
> - break;
> - }
> - list_add_tail(&svc->q_elem, iter);
> - }
> + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
> else
> - {
> list_add(&svc->q_elem, &prv->depletedq);
> - }
> }
>
> /*
> + * Insert svc into the repl even list:
> + * vcpus that needs to be repl earlier go first.
> + * scheduler private lock serializes this operation
> + * it could re-program the timer if it fires later than
> + * this vcpu's cur_deadline. Also, this is used to program
> + * the timer for the first time.
> + */
> +static void
> +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> + struct list_head *replq = rt_replq(ops);
> + struct rt_private *prv = rt_priv(ops);
> + struct timer *repl_timer = prv->repl_timer;
> +
> + ASSERT( !__vcpu_on_replq(svc) );
> +
> + _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq);
> +
> + if( repl_timer->expires == 0 ||
> + ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline ) )
> + set_timer(repl_timer,svc->cur_deadline);
> +}
> +
> +
> +/*
> * Init/Free related code
> */
> static int
> @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops)
> INIT_LIST_HEAD(&prv->sdom);
> INIT_LIST_HEAD(&prv->runq);
> INIT_LIST_HEAD(&prv->depletedq);
> + INIT_LIST_HEAD(&prv->replq);
>
> cpumask_clear(&prv->tickled);
>
> ops->sched_data = prv;
>
> + /*
> + * The timer initialization will happen later when
> + * the first pcpu is added to this pool in alloc_pdata
> + */
> + prv->repl_timer = NULL;
> +
> return 0;
>
> no_mem:
> @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops)
> xfree(_cpumask_scratch);
> _cpumask_scratch = NULL;
> }
> +
> + kill_timer(prv->repl_timer);
> + xfree(prv->repl_timer);
> +
> xfree(prv);
> }
>
> @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
> if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) )
> return NULL;
>
> + if( prv->repl_timer == NULL )
> + {
> + /* allocate the timer on the first cpu of this pool */
> + prv->repl_timer = xzalloc(struct timer);
> +
> + if(prv->repl_timer == NULL )
> + return NULL;
> +
> + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu);
> + }
> +
> /* 1 indicates alloc. succeed in schedule.c */
> return (void *)1;
> }
> @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
> return NULL;
>
> INIT_LIST_HEAD(&svc->q_elem);
> + INIT_LIST_HEAD(&svc->replq_elem);
> svc->flags = 0U;
> svc->sdom = dd;
> svc->vcpu = vc;
> @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv)
> }
>
> /*
> - * This function is called in sched_move_domain() in schedule.c
> + * It is called in sched_move_domain() and sched_init_vcpu
> + * in schedule.c
> * When move a domain to a new cpupool.
> * It inserts vcpus of moving domain to the scheduler's RunQ in
> * dest. cpupool.
> @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
> lock = vcpu_schedule_lock_irq(vc);
> if ( __vcpu_on_q(svc) )
> __q_remove(svc);
> +
> + if( __vcpu_on_replq(svc) )
> + __replq_remove(ops,svc);
> +
> vcpu_schedule_unlock_irq(lock, vc);
> }
>
> @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask)
> }
>
> /*
> - * Update vcpu's budget and
> - * sort runq by insert the modifed vcpu back to runq
> - * lock is grabbed before calling this function
> - */
> -static void
> -__repl_update(const struct scheduler *ops, s_time_t now)
> -{
> - struct list_head *runq = rt_runq(ops);
> - struct list_head *depletedq = rt_depletedq(ops);
> - struct list_head *iter;
> - struct list_head *tmp;
> - struct rt_vcpu *svc = NULL;
> -
> - list_for_each_safe(iter, tmp, runq)
> - {
> - svc = __q_elem(iter);
> - if ( now < svc->cur_deadline )
> - break;
> -
> - rt_update_deadline(now, svc);
> - /* reinsert the vcpu if its deadline is updated */
> - __q_remove(svc);
> - __runq_insert(ops, svc);
> - }
> -
> - list_for_each_safe(iter, tmp, depletedq)
> - {
> - svc = __q_elem(iter);
> - if ( now >= svc->cur_deadline )
> - {
> - rt_update_deadline(now, svc);
> - __q_remove(svc); /* remove from depleted queue */
> - __runq_insert(ops, svc); /* add to runq */
> - }
> - }
> -}
> -
> -/*
> * schedule function for rt scheduler.
> * The lock is already grabbed in schedule.c, no need to lock here
> */
> @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
> /* burn_budget would return for IDLE VCPU */
> burn_budget(ops, scurr, now);
>
> - __repl_update(ops, now);
>
> if ( tasklet_work_scheduled )
> {
> @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
> set_bit(__RTDS_delayed_runq_add, &scurr->flags);
>
> snext->last_start = now;
> +
> + ret.time = -1; /* if an idle vcpu is picked */
> if ( !is_idle_vcpu(snext->vcpu) )
> {
> if ( snext != scurr )
> @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
> snext->vcpu->processor = cpu;
> ret.migrated = 1;
> }
> +
> + ret.time = snext->budget; /* invoke the scheduler next time */
> +
> }
>
> - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
> ret.task = snext->vcpu;
>
> /* TRACE */
> @@ -914,7 +1017,7 @@ static void
> rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
> {
> struct rt_vcpu * const svc = rt_vcpu(vc);
> -
> +
> BUG_ON( is_idle_vcpu(vc) );
> SCHED_STAT_CRANK(vcpu_sleep);
>
> @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
> __q_remove(svc);
> else if ( svc->flags & RTDS_delayed_runq_add )
> clear_bit(__RTDS_delayed_runq_add, &svc->flags);
> +
> + if( __vcpu_on_replq(svc) )
> + __replq_remove(ops, svc);
> }
>
> /*
> @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
> {
> struct rt_vcpu * const svc = rt_vcpu(vc);
> s_time_t now = NOW();
> - struct rt_private *prv = rt_priv(ops);
> - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
> - struct rt_dom *sdom = NULL;
> - cpumask_t *online;
>
> BUG_ON( is_idle_vcpu(vc) );
>
> @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
> else
> SCHED_STAT_CRANK(vcpu_wake_not_runnable);
>
> + /* budget repl here is needed before inserting back to runq. If so,
> + * it should be re-inserted back to the replenishment queue.
> + */
> + if ( now >= svc->cur_deadline)
> + {
> + rt_update_deadline(now, svc);
> + __replq_remove(ops, svc);
> + }
> +
> + if( !__vcpu_on_replq(svc) )
> + __replq_insert(ops, svc);
> +
> /* If context hasn't been saved for this vcpu yet, we can't put it on
> * the Runqueue/DepletedQ. Instead, we set a flag so that it will be
> * put on the Runqueue/DepletedQ after the context has been saved.
> @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
> return;
> }
>
> - if ( now >= svc->cur_deadline)
> - rt_update_deadline(now, svc);
> -
> /* insert svc to runq/depletedq because svc is not in queue now */
> __runq_insert(ops, svc);
>
> - __repl_update(ops, now);
> -
> - ASSERT(!list_empty(&prv->sdom));
> - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> - online = cpupool_domain_cpumask(sdom->dom);
> - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */
> -
> - runq_tickle(ops, snext);
> -
> - return;
> + runq_tickle(ops, svc);
> }
>
> /*
> @@ -1087,10 +1189,6 @@ static void
> rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
> {
> struct rt_vcpu *svc = rt_vcpu(vc);
> - struct rt_vcpu *snext = NULL;
> - struct rt_dom *sdom = NULL;
> - struct rt_private *prv = rt_priv(ops);
> - cpumask_t *online;
> spinlock_t *lock = vcpu_schedule_lock_irq(vc);
>
> clear_bit(__RTDS_scheduled, &svc->flags);
> @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
> likely(vcpu_runnable(vc)) )
> {
> __runq_insert(ops, svc);
> - __repl_update(ops, NOW());
> -
> - ASSERT(!list_empty(&prv->sdom));
> - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> - online = cpupool_domain_cpumask(sdom->dom);
> - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
> -
> - runq_tickle(ops, snext);
> + runq_tickle(ops, svc);
> }
> out:
> vcpu_schedule_unlock_irq(lock, vc);
> @@ -1168,6 +1259,80 @@ rt_dom_cntl(
> return rc;
> }
>
> +/*
> + * The replenishment timer handler picks vcpus
> + * from the replq and does the actual replenishment
> + */
> +static void repl_handler(void *data){
> + unsigned long flags;
> + s_time_t now = NOW();
> + struct scheduler *ops = data;
> + struct rt_private *prv = rt_priv(ops);
> + struct list_head *replq = rt_replq(ops);
> + struct timer *repl_timer = prv->repl_timer;
> + struct list_head *iter, *tmp;
> + struct rt_vcpu *svc = NULL;
> +
> + spin_lock_irqsave(&prv->lock, flags);
> +
> + stop_timer(repl_timer);
> +
> + list_for_each_safe(iter, tmp, replq)
> + {
> + svc = __replq_elem(iter);
> +
> + if ( now >= svc->cur_deadline )
> + {
> + rt_update_deadline(now, svc);
> +
> + /*
> + * when the replenishment happens
> + * svc is either on a pcpu or on
> + * runq/depletedq
> + */
> + if( __vcpu_on_q(svc) )
> + {
> + /* put back to runq */
> + __q_remove(svc);
> + __runq_insert(ops, svc);
> + }
> +
> + /*
> + * tickle regardless where it's at
> + * because a running vcpu could have
> + * a later deadline than others after
> + * replenishment
> + */
> + runq_tickle(ops, svc);
> +
> + /* update replenishment event queue */
> + __replq_remove(ops, svc);
> + __replq_insert(ops, svc);
> + }
> +
> + else
> + break;
> + }
> +
> + /*
> + * use the vcpu that's on the top
> + * or else don't program the timer
> + */
> + if( !list_empty(replq) )
> + set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline);
> +
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> +}
> +
> +/* checks if a timer has been stopped or not */
> +bool_t active_timer(struct timer *timer)
> +{
> + ASSERT(timer->status >= TIMER_STATUS_inactive);
> + ASSERT(timer->status <= TIMER_STATUS_in_list);
> + return (timer->status >= TIMER_STATUS_in_heap);
> +}
> +
> static struct rt_private _rt_priv;
>
> static const struct scheduler sched_rtds_def = {
>
next prev parent reply other threads:[~2016-02-24 15:25 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-09 4:33 [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model Tianyang Chen
2016-02-16 3:55 ` Meng Xu
2016-02-18 1:55 ` Tianyang Chen
2016-02-24 15:23 ` Tianyang Chen [this message]
2016-02-25 2:02 ` Dario Faggioli
2016-02-25 6:15 ` Tianyang Chen
2016-02-25 10:34 ` Dario Faggioli
2016-02-25 17:29 ` Tianyang Chen
2016-02-25 17:51 ` Dario Faggioli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56CDCAEF.8000900@seas.upenn.edu \
--to=tiche@seas.upenn.edu \
--cc=dario.faggioli@citrix.com \
--cc=dgolomb@seas.upenn.edu \
--cc=george.dunlap@citrix.com \
--cc=mengxu@cis.upenn.edu \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).