* [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf
2014-06-13 19:58 [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use Josh Whitehead
@ 2014-06-13 19:58 ` Josh Whitehead
2014-06-17 15:43 ` Dario Faggioli
2014-06-17 16:06 ` Dario Faggioli
2014-06-13 19:58 ` [RFC PATCH 2/4] Add cbs parameter support to xl tool stack, remove defunct sedf parameters Josh Whitehead
` (4 subsequent siblings)
5 siblings, 2 replies; 27+ messages in thread
From: Josh Whitehead @ 2014-06-13 19:58 UTC (permalink / raw)
To: Xen-devel
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Dario Faggioli,
Ian Jackson, Robert VanVossen, Nate Studer, Josh Whitehead
---
xen/common/sched_sedf.c | 947 +++++++++--------------------------------------
1 file changed, 173 insertions(+), 774 deletions(-)
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
index 0c9011a..2ee4538 100644
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -25,24 +25,16 @@
#define CHECK(_p) ((void)0)
#endif
-#define EXTRA_NONE (0)
-#define EXTRA_AWARE (1)
-#define EXTRA_RUN_PEN (2)
-#define EXTRA_RUN_UTIL (4)
-#define EXTRA_WANT_PEN_Q (8)
-#define EXTRA_PEN_Q (0)
-#define EXTRA_UTIL_Q (1)
+#define SEDF_SOFT_TASK (1)
#define SEDF_ASLEEP (16)
-#define EXTRA_QUANTUM (MICROSECS(500))
-#define WEIGHT_PERIOD (MILLISECS(100))
-#define WEIGHT_SAFETY (MILLISECS(5))
+#define DEFAULT_PERIOD (MILLISECS(20))
+#define DEFAULT_SLICE (MILLISECS(10))
#define PERIOD_MAX MILLISECS(10000) /* 10s */
#define PERIOD_MIN (MICROSECS(10)) /* 10us */
#define SLICE_MIN (MICROSECS(5)) /* 5us */
-#define IMPLY(a, b) (!(a) || (b))
#define EQ(a, b) ((!!(a)) == (!!(b)))
@@ -58,24 +50,14 @@ struct sedf_priv_info {
struct sedf_vcpu_info {
struct vcpu *vcpu;
struct list_head list;
- struct list_head extralist[2];
/* Parameters for EDF */
s_time_t period; /* = relative deadline */
s_time_t slice; /* = worst case execution time */
-
- /* Advaced Parameters */
+ /* Note: Server bandwidth = (slice / period) */
- /* Latency Scaling */
- s_time_t period_orig;
- s_time_t slice_orig;
- s_time_t latency;
-
/* Status of domain */
int status;
- /* Weights for "Scheduling for beginners/ lazy/ etc." ;) */
- short weight;
- short extraweight;
/* Bookkeeping */
s_time_t deadl_abs;
s_time_t sched_start_abs;
@@ -84,28 +66,21 @@ struct sedf_vcpu_info {
s_time_t block_abs;
s_time_t unblock_abs;
- /* Scores for {util, block penalty}-weighted extratime distribution */
- int score[2];
- s_time_t short_block_lost_tot;
-
- /* Statistics */
- s_time_t extra_time_tot;
-
#ifdef SEDF_STATS
s_time_t block_time_tot;
- s_time_t penalty_time_tot;
int block_tot;
int short_block_tot;
int long_block_tot;
- int pen_extra_blocks;
- int pen_extra_slices;
+ s_time_t miss_time;
+ s_time_t over_time;
+ int miss_tot;
+ int over_tot;
#endif
};
struct sedf_cpu_info {
struct list_head runnableq;
struct list_head waitq;
- struct list_head extraq[2];
s_time_t current_slice_expires;
};
@@ -115,102 +90,20 @@ struct sedf_cpu_info {
#define CPU_INFO(cpu) \
((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
#define LIST(d) (&EDOM_INFO(d)->list)
-#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
#define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
#define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
-#define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
#define IDLETASK(cpu) (idle_vcpu[cpu])
#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
#define DIV_UP(x,y) (((x) + (y) - 1) / y)
-#define extra_runs(inf) ((inf->status) & 6)
-#define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1)
#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
+#define sedf_soft(edom) (EDOM_INFO(edom)->status & SEDF_SOFT_TASK)
-static void sedf_dump_cpu_state(const struct scheduler *ops, int i);
-
-static inline int extraq_on(struct vcpu *d, int i)
-{
- return ((EXTRALIST(d,i)->next != NULL) &&
- (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
-}
-
-static inline void extraq_del(struct vcpu *d, int i)
-{
- struct list_head *list = EXTRALIST(d,i);
- ASSERT(extraq_on(d,i));
- list_del(list);
- list->next = NULL;
- ASSERT(!extraq_on(d, i));
-}
-
-/*
- * Adds a domain to the queue of processes which are aware of extra time. List
- * is sorted by score, where a lower score means higher priority for an extra
- * slice. It also updates the score, by simply subtracting a fixed value from
- * each entry, in order to avoid overflow. The algorithm works by simply
- * charging each domain that recieved extratime with an inverse of its weight.
- */
-static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub)
-{
- struct list_head *cur;
- struct sedf_vcpu_info *curinf;
-
- ASSERT(!extraq_on(d,i));
-
- /*
- * Iterate through all elements to find our "hole" and on our way
- * update all the other scores.
- */
- list_for_each ( cur, EXTRAQ(d->processor, i) )
- {
- curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]);
- curinf->score[i] -= sub;
- if ( EDOM_INFO(d)->score[i] < curinf->score[i] )
- break;
- }
-
- /* cur now contains the element, before which we'll enqueue */
- list_add(EXTRALIST(d,i),cur->prev);
-
- /* Continue updating the extraq */
- if ( (cur != EXTRAQ(d->processor,i)) && sub )
- {
- for ( cur = cur->next; cur != EXTRAQ(d->processor,i); cur = cur->next )
- {
- curinf = list_entry(cur,struct sedf_vcpu_info, extralist[i]);
- curinf->score[i] -= sub;
- }
- }
-
- ASSERT(extraq_on(d,i));
-}
-static inline void extraq_check(struct vcpu *d)
-{
- if ( extraq_on(d, EXTRA_UTIL_Q) )
- {
- if ( !(EDOM_INFO(d)->status & EXTRA_AWARE) &&
- !extra_runs(EDOM_INFO(d)) )
- extraq_del(d, EXTRA_UTIL_Q);
- }
- else
- {
- if ( (EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d) )
- extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
- }
-}
-
-static inline void extraq_check_add_unblocked(struct vcpu *d, int priority)
-{
- struct sedf_vcpu_info *inf = EDOM_INFO(d);
- if ( inf->status & EXTRA_AWARE )
- /* Put on the weighted extraq without updating any scores */
- extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
-}
+static void sedf_dump_cpu_state(const struct scheduler *ops, int i);
static inline int __task_on_queue(struct vcpu *d)
{
@@ -284,11 +177,7 @@ static inline void __add_to_runqueue_sort(struct vcpu *v)
static void sedf_insert_vcpu(const struct scheduler *ops, struct vcpu *v)
{
- if ( !is_idle_vcpu(v) )
- {
- extraq_check(v);
- }
- else
+ if ( is_idle_vcpu(v) )
{
EDOM_INFO(v)->deadl_abs = 0;
EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
@@ -305,19 +194,23 @@ static void *sedf_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void
inf->vcpu = v;
- /* Every VCPU gets an equal share of extratime by default */
- inf->deadl_abs = 0;
- inf->latency = 0;
- inf->status = EXTRA_AWARE | SEDF_ASLEEP;
- inf->extraweight = 1;
- /* Upon creation all domain are best-effort */
- inf->period = WEIGHT_PERIOD;
- inf->slice = 0;
+ inf->deadl_abs = 0;
+ inf->cputime = 0;
+ inf->status = SEDF_ASLEEP;
+
+ if (v->domain->domain_id == 0)
+ {
+ /* Domain 0, needs a slice to boot the machine */
+ inf->period = DEFAULT_PERIOD;
+ inf->slice = DEFAULT_SLICE;
+ }
+ else
+ {
+ inf->period = DEFAULT_PERIOD;
+ inf->slice = 0;
+ }
- inf->period_orig = inf->period; inf->slice_orig = inf->slice;
INIT_LIST_HEAD(&(inf->list));
- INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
- INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
SCHED_STAT_CRANK(vcpu_init);
@@ -333,8 +226,6 @@ sedf_alloc_pdata(const struct scheduler *ops, int cpu)
BUG_ON(spc == NULL);
INIT_LIST_HEAD(&spc->waitq);
INIT_LIST_HEAD(&spc->runnableq);
- INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]);
- INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]);
return (void *)spc;
}
@@ -410,49 +301,59 @@ static void desched_edf_dom(s_time_t now, struct vcpu* d)
__del_from_queue(d);
- /*
- * Manage bookkeeping (i.e. calculate next deadline, memorise
- * overrun-time of slice) of finished domains.
- */
+#ifdef SEDF_STATS
+ /* Manage deadline misses */
+ if ( unlikely(inf->deadl_abs < now) )
+ {
+ inf->miss_tot++;
+ inf->miss_time += inf->cputime;
+ }
+#endif
+
+ /* Manage overruns */
if ( inf->cputime >= inf->slice )
{
inf->cputime -= inf->slice;
-
- if ( inf->period < inf->period_orig )
- {
- /* This domain runs in latency scaling or burst mode */
- inf->period *= 2;
- inf->slice *= 2;
- if ( (inf->period > inf->period_orig) ||
- (inf->slice > inf->slice_orig) )
- {
- /* Reset slice and period */
- inf->period = inf->period_orig;
- inf->slice = inf->slice_orig;
- }
- }
/* Set next deadline */
inf->deadl_abs += inf->period;
+
+ /* Ensure that the cputime is always less than slice */
+ if ( unlikely(inf->cputime > inf->slice) )
+ {
+#ifdef SEDF_STATS
+ inf->over_tot++;
+ inf->over_time += inf->cputime;
+#endif
+
+ /* Make up for the overage by pushing the deadline
+ into the future */
+ inf->deadl_abs += ((inf->cputime / inf->slice)
+ * inf->period) * 2;
+ inf->cputime -= (inf->cputime / inf->slice) * inf->slice;
+ }
+
+ /* Ensure that the start of the next period is in the future */
+ if ( unlikely(PERIOD_BEGIN(inf) < now) )
+ inf->deadl_abs +=
+ (DIV_UP(now - PERIOD_BEGIN(inf),
+ inf->period)) * inf->period;
}
/* Add a runnable domain to the waitqueue */
if ( sedf_runnable(d) )
{
- __add_to_waitqueue_sort(d);
- }
- else
- {
- /* We have a blocked realtime task -> remove it from exqs too */
- if ( extraq_on(d, EXTRA_PEN_Q) )
- extraq_del(d, EXTRA_PEN_Q);
- if ( extraq_on(d, EXTRA_UTIL_Q) )
- extraq_del(d, EXTRA_UTIL_Q);
+ if( sedf_soft(d) )
+ {
+ __add_to_runqueue_sort(d);
+ }
+ else
+ {
+ __add_to_waitqueue_sort(d);
+ }
}
-
+
ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
- ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
- sedf_runnable(d)));
}
@@ -498,217 +399,12 @@ static void update_queues(
/* Put them back into the queue */
__add_to_waitqueue_sort(curinf->vcpu);
}
- else if ( unlikely((curinf->deadl_abs < now) ||
- (curinf->cputime > curinf->slice)) )
- {
- /*
- * We missed the deadline or the slice was already finished.
- * Might hapen because of dom_adj.
- */
- printk("\tDomain %i.%i exceeded it's deadline/"
- "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
- " cputime: %"PRIu64"\n",
- curinf->vcpu->domain->domain_id,
- curinf->vcpu->vcpu_id,
- curinf->deadl_abs, curinf->slice, now,
- curinf->cputime);
- __del_from_queue(curinf->vcpu);
-
- /* Common case: we miss one period */
- curinf->deadl_abs += curinf->period;
-
- /*
- * If we are still behind: modulo arithmetic, force deadline
- * to be in future and aligned to period borders.
- */
- if ( unlikely(curinf->deadl_abs < now) )
- curinf->deadl_abs +=
- DIV_UP(now - curinf->deadl_abs,
- curinf->period) * curinf->period;
- ASSERT(curinf->deadl_abs >= now);
-
- /* Give a fresh slice */
- curinf->cputime = 0;
- if ( PERIOD_BEGIN(curinf) > now )
- __add_to_waitqueue_sort(curinf->vcpu);
- else
- __add_to_runqueue_sort(curinf->vcpu);
- }
else
break;
}
}
-/*
- * removes a domain from the head of the according extraQ and
- * requeues it at a specified position:
- * round-robin extratime: end of extraQ
- * weighted ext.: insert in sorted list by score
- * if the domain is blocked / has regained its short-block-loss
- * time it is not put on any queue.
- */
-static void desched_extra_dom(s_time_t now, struct vcpu *d)
-{
- struct sedf_vcpu_info *inf = EDOM_INFO(d);
- int i = extra_get_cur_q(inf);
- unsigned long oldscore;
-
- ASSERT(extraq_on(d, i));
-
- /* Unset all running flags */
- inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
- /* Fresh slice for the next run */
- inf->cputime = 0;
- /* Accumulate total extratime */
- inf->extra_time_tot += now - inf->sched_start_abs;
- /* Remove extradomain from head of the queue. */
- extraq_del(d, i);
-
- /* Update the score */
- oldscore = inf->score[i];
- if ( i == EXTRA_PEN_Q )
- {
- /* Domain was running in L0 extraq */
- /* reduce block lost, probably more sophistication here!*/
- /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
- inf->short_block_lost_tot -= now - inf->sched_start_abs;
-#if 0
- /* KAF: If we don't exit short-blocking state at this point
- * domain0 can steal all CPU for up to 10 seconds before
- * scheduling settles down (when competing against another
- * CPU-bound domain). Doing this seems to make things behave
- * nicely. Noone gets starved by default.
- */
- if ( inf->short_block_lost_tot <= 0 )
-#endif
- {
- /* We have (over-)compensated our block penalty */
- inf->short_block_lost_tot = 0;
- /* We don't want a place on the penalty queue anymore! */
- inf->status &= ~EXTRA_WANT_PEN_Q;
- goto check_extra_queues;
- }
-
- /*
- * We have to go again for another try in the block-extraq,
- * the score is not used incremantally here, as this is
- * already done by recalculating the block_lost
- */
- inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
- inf->short_block_lost_tot;
- oldscore = 0;
- }
- else
- {
- /*
- * Domain was running in L1 extraq => score is inverse of
- * utilization and is used somewhat incremental!
- */
- if ( !inf->extraweight )
- {
- /* NB: use fixed point arithmetic with 10 bits */
- inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
- inf->slice;
- }
- else
- {
- /*
- * Conversion between realtime utilisation and extrawieght:
- * full (ie 100%) utilization is equivalent to 128 extraweight
- */
- inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
- }
- }
-
- check_extra_queues:
- /* Adding a runnable domain to the right queue and removing blocked ones */
- if ( sedf_runnable(d) )
- {
- /* Add according to score: weighted round robin */
- if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
- ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
- extraq_add_sort_update(d, i, oldscore);
- }
- else
- {
- /* Remove this blocked domain from the waitq! */
- __del_from_queue(d);
- /* Make sure that we remove a blocked domain from the other
- * extraq too. */
- if ( i == EXTRA_PEN_Q )
- {
- if ( extraq_on(d, EXTRA_UTIL_Q) )
- extraq_del(d, EXTRA_UTIL_Q);
- }
- else
- {
- if ( extraq_on(d, EXTRA_PEN_Q) )
- extraq_del(d, EXTRA_PEN_Q);
- }
- }
-
- ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
- ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
- sedf_runnable(d)));
-}
-
-
-static struct task_slice sedf_do_extra_schedule(
- s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
-{
- struct task_slice ret = { 0 };
- struct sedf_vcpu_info *runinf;
- ASSERT(end_xt > now);
-
- /* Enough time left to use for extratime? */
- if ( end_xt - now < EXTRA_QUANTUM )
- goto return_idle;
-
- if ( !list_empty(extraq[EXTRA_PEN_Q]) )
- {
- /*
- * We still have elements on the level 0 extraq
- * => let those run first!
- */
- runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
- struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]);
- runinf->status |= EXTRA_RUN_PEN;
- ret.task = runinf->vcpu;
- ret.time = EXTRA_QUANTUM;
-#ifdef SEDF_STATS
- runinf->pen_extra_slices++;
-#endif
- }
- else
- {
- if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
- {
- /* Use elements from the normal extraqueue */
- runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
- struct sedf_vcpu_info,
- extralist[EXTRA_UTIL_Q]);
- runinf->status |= EXTRA_RUN_UTIL;
- ret.task = runinf->vcpu;
- ret.time = EXTRA_QUANTUM;
- }
- else
- goto return_idle;
- }
-
- ASSERT(ret.time > 0);
- ASSERT(sedf_runnable(ret.task));
- return ret;
-
- return_idle:
- ret.task = IDLETASK(cpu);
- ret.time = end_xt - now;
- ASSERT(ret.time > 0);
- ASSERT(sedf_runnable(ret.task));
- return ret;
-}
-
-
static int sedf_init(struct scheduler *ops)
{
struct sedf_priv_info *prv;
@@ -748,8 +444,6 @@ static struct task_slice sedf_do_schedule(
struct list_head *runq = RUNQ(cpu);
struct list_head *waitq = WAITQ(cpu);
struct sedf_vcpu_info *inf = EDOM_INFO(current);
- struct list_head *extraq[] = {
- EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)};
struct sedf_vcpu_info *runinf, *waitinf;
struct task_slice ret;
@@ -770,15 +464,7 @@ static struct task_slice sedf_do_schedule(
if ( inf->status & SEDF_ASLEEP )
inf->block_abs = now;
- if ( unlikely(extra_runs(inf)) )
- {
- /* Special treatment of domains running in extra time */
- desched_extra_dom(now, current);
- }
- else
- {
- desched_edf_dom(now, current);
- }
+ desched_edf_dom(now, current);
check_waitq:
update_queues(now, runq, waitq);
@@ -820,12 +506,9 @@ static struct task_slice sedf_do_schedule(
else
{
waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
- /*
- * We could not find any suitable domain
- * => look for domains that are aware of extratime
- */
- ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
- extraq, cpu);
+
+ ret.task = IDLETASK(cpu);
+ ret.time = PERIOD_BEGIN(waitinf) - now;
}
/*
@@ -833,11 +516,8 @@ static struct task_slice sedf_do_schedule(
* still can happen!!!
*/
if ( ret.time < 0)
- {
printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
ret.time);
- ret.time = EXTRA_QUANTUM;
- }
ret.migrated = 0;
@@ -848,7 +528,6 @@ static struct task_slice sedf_do_schedule(
return ret;
}
-
static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
{
if ( is_idle_vcpu(d) )
@@ -864,13 +543,35 @@ static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
{
if ( __task_on_queue(d) )
__del_from_queue(d);
- if ( extraq_on(d, EXTRA_UTIL_Q) )
- extraq_del(d, EXTRA_UTIL_Q);
- if ( extraq_on(d, EXTRA_PEN_Q) )
- extraq_del(d, EXTRA_PEN_Q);
}
}
+/*
+ * Compares two domains in the relation of whether the one is allowed to
+ * interrupt the others execution.
+ * It returns true (!=0) if a switch to the other domain is good.
+ * Priority scheme is as follows:
+ * EDF: early deadline > late deadline
+ */
+static inline int should_switch(struct vcpu *cur,
+ struct vcpu *other,
+ s_time_t now)
+{
+ struct sedf_vcpu_info *cur_inf, *other_inf;
+ cur_inf = EDOM_INFO(cur);
+ other_inf = EDOM_INFO(other);
+
+ /* Always interrupt idle domain. */
+ if ( is_idle_vcpu(cur) )
+ return 1;
+
+ /* Check whether we need to make an earlier scheduling decision */
+ if ( PERIOD_BEGIN(other_inf) <
+ CPU_INFO(other->processor)->current_slice_expires )
+ return 1;
+
+ return 0;
+}
/*
* This function wakes up a domain, i.e. moves them into the waitqueue
@@ -904,8 +605,6 @@ static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
*
* -this also doesn't disturb scheduling, but might lead to the fact, that
* the domain can't finish it's workload in the period
- * -in addition to that the domain can be treated prioritised when
- * extratime is available
* -addition: experiments have shown that this may have a HUGE impact on
* performance of other domains, becaus it can lead to excessive context
* switches
@@ -931,10 +630,6 @@ static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
* DRB______D___URRRR___D...<prev [Thread] next>
* (D) <- old deadline was here
* -problem: deadlines don't occur isochronous anymore
- * Part 2c (Improved Atropos design)
- * -when a domain unblocks it is given a very short period (=latency hint)
- * and slice length scaled accordingly
- * -both rise again to the original value (e.g. get doubled every period)
*
* 3. Unconservative (i.e. incorrect)
* -to boost the performance of I/O dependent domains it would be possible
@@ -944,136 +639,6 @@ static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
* -either behaviour can lead to missed deadlines in other domains as
* opposed to approaches 1,2a,2b
*/
-static void unblock_short_extra_support(
- struct sedf_vcpu_info* inf, s_time_t now)
-{
- /*
- * This unblocking scheme tries to support the domain, by assigning it
- * a priority in extratime distribution according to the loss of time
- * in this slice due to blocking
- */
- s_time_t pen;
-
- /* No more realtime execution in this period! */
- inf->deadl_abs += inf->period;
- if ( likely(inf->block_abs) )
- {
- /* Treat blocked time as consumed by the domain */
- /*inf->cputime += now - inf->block_abs;*/
- /*
- * Penalty is time the domain would have
- * had if it continued to run.
- */
- pen = (inf->slice - inf->cputime);
- if ( pen < 0 )
- pen = 0;
- /* Accumulate all penalties over the periods */
- /*inf->short_block_lost_tot += pen;*/
- /* Set penalty to the current value */
- inf->short_block_lost_tot = pen;
- /* Not sure which one is better.. but seems to work well... */
-
- if ( inf->short_block_lost_tot )
- {
- inf->score[0] = (inf->period << 10) /
- inf->short_block_lost_tot;
-#ifdef SEDF_STATS
- inf->pen_extra_blocks++;
-#endif
- if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
- /* Remove domain for possible resorting! */
- extraq_del(inf->vcpu, EXTRA_PEN_Q);
- else
- /*
- * Remember that we want to be on the penalty q
- * so that we can continue when we (un-)block
- * in penalty-extratime
- */
- inf->status |= EXTRA_WANT_PEN_Q;
-
- /* (re-)add domain to the penalty extraq */
- extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
- }
- }
-
- /* Give it a fresh slice in the next period! */
- inf->cputime = 0;
-}
-
-
-static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
-{
- /* Conservative 2b */
-
- /* Treat the unblocking time as a start of a new period */
- inf->deadl_abs = now + inf->period;
- inf->cputime = 0;
-}
-
-
-#define DOMAIN_EDF 1
-#define DOMAIN_EXTRA_PEN 2
-#define DOMAIN_EXTRA_UTIL 3
-#define DOMAIN_IDLE 4
-static inline int get_run_type(struct vcpu* d)
-{
- struct sedf_vcpu_info* inf = EDOM_INFO(d);
- if (is_idle_vcpu(d))
- return DOMAIN_IDLE;
- if (inf->status & EXTRA_RUN_PEN)
- return DOMAIN_EXTRA_PEN;
- if (inf->status & EXTRA_RUN_UTIL)
- return DOMAIN_EXTRA_UTIL;
- return DOMAIN_EDF;
-}
-
-
-/*
- * Compares two domains in the relation of whether the one is allowed to
- * interrupt the others execution.
- * It returns true (!=0) if a switch to the other domain is good.
- * Current Priority scheme is as follows:
- * EDF > L0 (penalty based) extra-time >
- * L1 (utilization) extra-time > idle-domain
- * In the same class priorities are assigned as following:
- * EDF: early deadline > late deadline
- * L0 extra-time: lower score > higher score
- */
-static inline int should_switch(struct vcpu *cur,
- struct vcpu *other,
- s_time_t now)
-{
- struct sedf_vcpu_info *cur_inf, *other_inf;
- cur_inf = EDOM_INFO(cur);
- other_inf = EDOM_INFO(other);
-
- /* Check whether we need to make an earlier scheduling decision */
- if ( PERIOD_BEGIN(other_inf) <
- CPU_INFO(other->processor)->current_slice_expires )
- return 1;
-
- /* No timing-based switches need to be taken into account here */
- switch ( get_run_type(cur) )
- {
- case DOMAIN_EDF:
- /* Do not interrupt a running EDF domain */
- return 0;
- case DOMAIN_EXTRA_PEN:
- /* Check whether we also want the L0 ex-q with lower score */
- return ((other_inf->status & EXTRA_WANT_PEN_Q) &&
- (other_inf->score[EXTRA_PEN_Q] <
- cur_inf->score[EXTRA_PEN_Q]));
- case DOMAIN_EXTRA_UTIL:
- /* Check whether we want the L0 extraq. Don't
- * switch if both domains want L1 extraq. */
- return !!(other_inf->status & EXTRA_WANT_PEN_Q);
- case DOMAIN_IDLE:
- return 1;
- }
-
- return 1;
-}
-
static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
{
s_time_t now = NOW();
@@ -1087,8 +652,6 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
ASSERT(!sedf_runnable(d));
inf->status &= ~SEDF_ASLEEP;
- ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
- ASSERT(!extraq_on(d, EXTRA_PEN_Q));
if ( unlikely(inf->deadl_abs == 0) )
{
@@ -1100,62 +663,65 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
inf->block_tot++;
#endif
- if ( unlikely(now < PERIOD_BEGIN(inf)) )
- {
- /* Unblocking in extra-time! */
- if ( inf->status & EXTRA_WANT_PEN_Q )
+ if ( sedf_soft(d) )
+ {
+ /* Apply CBS rule
+ * Where:
+ * c == Remaining server slice == (inf->slice - cpu_time)
+ * d == Server (vcpu) deadline == inf->deadl_abs
+ * r == Wake-up time of vcpu == now
+ * U == Server (vcpu) bandwidth == (inf->slice / inf->period)
+ *
+ * if c>=(d-r)*U --->
+ * (inf->slice - cputime) >= (inf->deadl_abs - now) * inf->period
+ *
+ * If true, push deadline back by one period and refresh slice, else
+ * use current slice and deadline.
+ */
+ if((inf->slice - inf->cputime) >=
+ ((inf->deadl_abs - now) * (inf->slice / inf->period)))
{
- /*
- * We have a domain that wants compensation
- * for block penalty and did just block in
- * its compensation time. Give it another
- * chance!
- */
- extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
+ /* Push back deadline by one period */
+ inf->deadl_abs += inf->period;
+ inf->cputime = 0;
}
- extraq_check_add_unblocked(d, 0);
- }
- else
- {
+
+ /* In CBS we don't care if the period has begun,
+ * the task doesn't have to wait for its period
+ * because it'll never request more than its slice
+ * for any given period.
+ */
+ __add_to_runqueue_sort(d);
+ }
+ else {
+ /* Task is a hard task, treat accordingly */
+#ifdef SEDF_STATS
if ( now < inf->deadl_abs )
{
/* Short blocking */
-#ifdef SEDF_STATS
inf->short_block_tot++;
-#endif
- unblock_short_extra_support(inf, now);
-
- extraq_check_add_unblocked(d, 1);
}
else
{
- /* Long unblocking */
-#ifdef SEDF_STATS
+ /* Long unblocking, someone is going to miss their deadline. */
inf->long_block_tot++;
+ }
#endif
- unblock_long_cons_b(inf, now);
- extraq_check_add_unblocked(d, 1);
- }
+ if ( PERIOD_BEGIN(inf) > now )
+ __add_to_waitqueue_sort(d);
+ else
+ __add_to_runqueue_sort(d);
}
-
- if ( PERIOD_BEGIN(inf) > now )
- __add_to_waitqueue_sort(d);
- else
- __add_to_runqueue_sort(d);
#ifdef SEDF_STATS
/* Do some statistics here... */
if ( inf->block_abs != 0 )
{
inf->block_time_tot += now - inf->block_abs;
- inf->penalty_time_tot +=
- PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
}
#endif
- /* Sanity check: make sure each extra-aware domain IS on the util-q! */
- ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
ASSERT(__task_on_queue(d));
/*
* Check whether the awakened task needs to invoke the do_schedule
@@ -1170,35 +736,27 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
}
-
/* Print a lot of useful information about a domains in the system */
static void sedf_dump_domain(struct vcpu *d)
{
printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
d->is_running ? 'T':'F');
- printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
- " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
- EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
- EDOM_INFO(d)->weight,
- EDOM_INFO(d)->score[EXTRA_UTIL_Q],
- (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
- EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
+ printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64,
+ EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs);
#ifdef SEDF_STATS
- if ( EDOM_INFO(d)->block_time_tot != 0 )
- printk(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
- EDOM_INFO(d)->block_time_tot);
+ printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64,
+ EDOM_INFO(d)->miss_tot, EDOM_INFO(d)->miss_time,
+ EDOM_INFO(d)->over_tot, EDOM_INFO(d)->over_time);
+
if ( EDOM_INFO(d)->block_tot != 0 )
- printk("\n blks=%u sh=%u (%u%%) (shex=%i "\
- "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
+ printk("\n blks=%u sh=%u (%u%%) "\
+ "l=%u (%u%%) avg: b=%"PRIu64,
EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
(EDOM_INFO(d)->short_block_tot * 100) / EDOM_INFO(d)->block_tot,
- EDOM_INFO(d)->pen_extra_blocks,
- EDOM_INFO(d)->pen_extra_slices,
EDOM_INFO(d)->long_block_tot,
(EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
- (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
- (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
+ (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot);
#endif
printk("\n");
}
@@ -1234,30 +792,6 @@ static void sedf_dump_cpu_state(const struct scheduler *ops, int i)
sedf_dump_domain(d_inf->vcpu);
}
- queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
- printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
- (unsigned long)queue, (unsigned long) queue->next,
- (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue )
- {
- d_inf = list_entry(list, struct sedf_vcpu_info,
- extralist[EXTRA_PEN_Q]);
- printk("%3d: ",loop++);
- sedf_dump_domain(d_inf->vcpu);
- }
-
- queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
- printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
- (unsigned long)queue, (unsigned long) queue->next,
- (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue )
- {
- d_inf = list_entry(list, struct sedf_vcpu_info,
- extralist[EXTRA_UTIL_Q]);
- printk("%3d: ",loop++);
- sedf_dump_domain(d_inf->vcpu);
- }
-
loop = 0;
printk("\nnot on Q\n");
@@ -1279,199 +813,69 @@ static void sedf_dump_cpu_state(const struct scheduler *ops, int i)
}
-/* Adjusts periods and slices of the domains accordingly to their weights */
-static int sedf_adjust_weights(struct cpupool *c, int nr_cpus, int *sumw, s_time_t *sumt)
-{
- struct vcpu *p;
- struct domain *d;
- unsigned int cpu;
-
- /*
- * Sum across all weights. Notice that no runq locking is needed
- * here: the caller holds sedf_priv_info.lock and we're not changing
- * anything that is accessed during scheduling.
- */
- rcu_read_lock(&domlist_read_lock);
- for_each_domain_in_cpupool( d, c )
- {
- for_each_vcpu( d, p )
- {
- if ( (cpu = p->processor) >= nr_cpus )
- continue;
-
- if ( EDOM_INFO(p)->weight )
- {
- sumw[cpu] += EDOM_INFO(p)->weight;
- }
- else
- {
- /*
- * Don't modify domains who don't have a weight, but sum
- * up the time they need, projected to a WEIGHT_PERIOD,
- * so that this time is not given to the weight-driven
- * domains
- */
-
- /* Check for overflows */
- ASSERT((WEIGHT_PERIOD < ULONG_MAX)
- && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
- sumt[cpu] +=
- (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
- EDOM_INFO(p)->period_orig;
- }
- }
- }
- rcu_read_unlock(&domlist_read_lock);
-
- /*
- * Adjust all slices (and periods) to the new weight. Unlike above, we
- * need to take thr runq lock for the various VCPUs: we're modyfing
- * slice and period which are referenced during scheduling.
- */
- rcu_read_lock(&domlist_read_lock);
- for_each_domain_in_cpupool( d, c )
- {
- for_each_vcpu ( d, p )
- {
- if ( (cpu = p->processor) >= nr_cpus )
- continue;
- if ( EDOM_INFO(p)->weight )
- {
- /* Interrupts already off */
- spinlock_t *lock = vcpu_schedule_lock(p);
-
- EDOM_INFO(p)->period_orig =
- EDOM_INFO(p)->period = WEIGHT_PERIOD;
- EDOM_INFO(p)->slice_orig =
- EDOM_INFO(p)->slice =
- (EDOM_INFO(p)->weight *
- (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
-
- vcpu_schedule_unlock(lock, p);
- }
- }
- }
- rcu_read_unlock(&domlist_read_lock);
-
- return 0;
-}
-
-
/* Set or fetch domain scheduling parameters */
static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
{
struct sedf_priv_info *prv = SEDF_PRIV(ops);
unsigned long flags;
- unsigned int nr_cpus = cpumask_last(&cpu_online_map) + 1;
- int *sumw = xzalloc_array(int, nr_cpus);
- s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
+ s_time_t now = NOW();
struct vcpu *v;
int rc = 0;
/*
* Serialize against the pluggable scheduler lock to protect from
* concurrent updates. We need to take the runq lock for the VCPUs
- * as well, since we are touching extraweight, weight, slice and
- * period. As in sched_credit2.c, runq locks nest inside the
- * pluggable scheduler lock.
+ * as well, since we are touching slice and period.
+ *
+ * As in sched_credit2.c, runq locks nest inside the pluggable scheduler
+ * lock.
*/
spin_lock_irqsave(&prv->lock, flags);
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
- /*
- * These are used in sedf_adjust_weights() but have to be allocated in
- * this function, as we need to avoid nesting xmem_pool_alloc's lock
- * within our prv->lock.
- */
- if ( !sumw || !sumt )
+ /* Check for sane parameters */
+ if ( !op->u.sedf.period )
{
- /* Check for errors here, the _getinfo branch doesn't care */
- rc = -ENOMEM;
+ printk("Period Not set");
+ rc = -EINVAL;
goto out;
}
- /* Check for sane parameters */
- if ( !op->u.sedf.period && !op->u.sedf.weight )
+ /*
+ * Sanity checking
+ */
+ if ( (op->u.sedf.period > PERIOD_MAX) ||
+ (op->u.sedf.period < PERIOD_MIN) ||
+ (op->u.sedf.slice > op->u.sedf.period) ||
+ (op->u.sedf.slice < SLICE_MIN) )
{
+ printk("Insane Parameters: period: %lu\tbudget: %lu\n", op->u.sedf.period, op->u.sedf.slice);
rc = -EINVAL;
goto out;
}
- if ( op->u.sedf.weight )
+ /* Time-driven domains */
+ for_each_vcpu ( p, v )
{
- if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
- (!op->u.sedf.period) )
+ spinlock_t *lock = vcpu_schedule_lock(v);
+
+ EDOM_INFO(v)->period = op->u.sedf.period;
+ EDOM_INFO(v)->slice = op->u.sedf.slice;
+ if(op->u.sedf.soft)
{
- /* Weight-driven domains with extratime only */
- for_each_vcpu ( p, v )
- {
- /* (Here and everywhere in the following) IRQs are already off,
- * hence vcpu_spin_lock() is the one. */
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- EDOM_INFO(v)->extraweight = op->u.sedf.weight;
- EDOM_INFO(v)->weight = 0;
- EDOM_INFO(v)->slice = 0;
- EDOM_INFO(v)->period = WEIGHT_PERIOD;
- vcpu_schedule_unlock(lock, v);
- }
+ EDOM_INFO(v)->status |= SEDF_SOFT_TASK;
}
else
{
- /* Weight-driven domains with real-time execution */
- for_each_vcpu ( p, v )
+ /* Correct deadline when switching from a soft to hard vcpu */
+ if( unlikely((EDOM_INFO(v)->deadl_abs - now) >= (EDOM_INFO(v)->period * 3)) )
{
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- EDOM_INFO(v)->weight = op->u.sedf.weight;
- vcpu_schedule_unlock(lock, v);
+ EDOM_INFO(v)->deadl_abs = (now - EDOM_INFO(v)->cputime) + (2 * EDOM_INFO(v)->period);
}
+
+ EDOM_INFO(v)->status &= (~SEDF_SOFT_TASK);
}
- }
- else
- {
- /*
- * Sanity checking: note that disabling extra weight requires
- * that we set a non-zero slice.
- */
- if ( (op->u.sedf.period > PERIOD_MAX) ||
- (op->u.sedf.period < PERIOD_MIN) ||
- (op->u.sedf.slice > op->u.sedf.period) ||
- (op->u.sedf.slice < SLICE_MIN) )
- {
- rc = -EINVAL;
- goto out;
- }
-
- /* Time-driven domains */
- for_each_vcpu ( p, v )
- {
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- EDOM_INFO(v)->weight = 0;
- EDOM_INFO(v)->extraweight = 0;
- EDOM_INFO(v)->period_orig =
- EDOM_INFO(v)->period = op->u.sedf.period;
- EDOM_INFO(v)->slice_orig =
- EDOM_INFO(v)->slice = op->u.sedf.slice;
- vcpu_schedule_unlock(lock, v);
- }
- }
-
- rc = sedf_adjust_weights(p->cpupool, nr_cpus, sumw, sumt);
- if ( rc )
- goto out;
-
- for_each_vcpu ( p, v )
- {
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- EDOM_INFO(v)->status =
- (EDOM_INFO(v)->status &
- ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
- EDOM_INFO(v)->latency = op->u.sedf.latency;
- extraq_check(v);
vcpu_schedule_unlock(lock, v);
}
}
@@ -1485,17 +889,12 @@ static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen
op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
- op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
- op->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency;
- op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
+ op->u.sedf.soft = sedf_soft(p->vcpu[0]);
}
out:
spin_unlock_irqrestore(&prv->lock, flags);
- xfree(sumt);
- xfree(sumw);
-
return rc;
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf
2014-06-13 19:58 ` [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf Josh Whitehead
@ 2014-06-17 15:43 ` Dario Faggioli
2014-06-26 20:17 ` Joshua Whitehead
2014-06-17 16:06 ` Dario Faggioli
1 sibling, 1 reply; 27+ messages in thread
From: Dario Faggioli @ 2014-06-17 15:43 UTC (permalink / raw)
To: Josh Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
[-- Attachment #1.1: Type: text/plain, Size: 3582 bytes --]
Again about patch splitting.
In this patch you are killing a lot of things, which I agree about
killing. But then you also add what's required to implement the CBS
algorithm. That is really really really really hard to review.
It's probably not possible to come down to a patch with only '-', but
still I think this patch should at least be split in two. :-)
In one (the first) you remove all the stuff we won't to be part of the
algorithm any longer, like the extraqueue, the weight, the latency, that
super-ugly thing it does at vcpu wakeup time, etc.
In the other (the second) you implement the CBS algorithm on top of
what's remaining.
While doing this, be careful that, in order to avoid bisectability of
the tree, the code base must always compile, even with only the first
patch, in the above example. Given how OSSTEST (and it's bisector)
works, I think it's best if you can confirm that, whatever patch you've
got on top, the system boots.
On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
> ---
>
So, empty changelog.
I guess this is because it's an RFC, so, yeah, no big deal, especially
considering you really did a great job in the cover letter.
However, I really recommend to put something up there, even for very
early revisions.
Oh, BTW, even if you really don't want to put any description or
changelog of any sort (which you really should, though), we at least
need the 'Signed-off-by:' thing.
> diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
> index 0c9011a..2ee4538 100644
> --- a/xen/common/sched_sedf.c
> +++ b/xen/common/sched_sedf.c
> @@ -58,24 +50,14 @@ struct sedf_priv_info {
> struct sedf_vcpu_info {
> struct vcpu *vcpu;
> struct list_head list;
> - struct list_head extralist[2];
>
> /* Parameters for EDF */
> s_time_t period; /* = relative deadline */
> s_time_t slice; /* = worst case execution time */
> -
> - /* Advaced Parameters */
> + /* Note: Server bandwidth = (slice / period) */
>
> - /* Latency Scaling */
> - s_time_t period_orig;
> - s_time_t slice_orig;
> - s_time_t latency;
> -
> /* Status of domain */
> int status;
> - /* Weights for "Scheduling for beginners/ lazy/ etc." ;) */
> - short weight;
> - short extraweight;
>
About these (so weight and related, latency and related), this is where
I, _FOR_NOW_, would put a bunch of /* TODOs */, with some explanation of
what's going on.
If it were me doing this, I would, most likely:
1. leave the parameters alone here
2. temporarily kill any handling and usage of them in the rest of the
file
3. stick a TODO right here explaining 2.
4. once the support and handling of the parameters will be back, come
back here, and remove the TODO
Of course, 4 depends on what we decide to do with 'weight' and
'latency'... which is one more reason to keep this in stand-by, but make
sure this does not stop further development.
Also, I'd say the series should remain an RFC, until the TODOs are gone.
As I said, it's very hard to review the patch like this... Let me know
if you agree in splitting it, in which case, I'd rather have a look at
that version when it'll be ready.
Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf
2014-06-17 15:43 ` Dario Faggioli
@ 2014-06-26 20:17 ` Joshua Whitehead
2014-06-28 2:19 ` Dario Faggioli
0 siblings, 1 reply; 27+ messages in thread
From: Joshua Whitehead @ 2014-06-26 20:17 UTC (permalink / raw)
To: Dario Faggioli
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
On 6/17/2014 11:43 AM, Dario Faggioli wrote:
> Again about patch splitting.
>
> In this patch you are killing a lot of things, which I agree about
> killing. But then you also add what's required to implement the CBS
> algorithm. That is really really really really hard to review.
>
> It's probably not possible to come down to a patch with only '-', but
> still I think this patch should at least be split in two. :-)
>
> In one (the first) you remove all the stuff we won't to be part of the
> algorithm any longer, like the extraqueue, the weight, the latency, that
> super-ugly thing it does at vcpu wakeup time, etc.
>
> In the other (the second) you implement the CBS algorithm on top of
> what's remaining.
>
> While doing this, be careful that, in order to avoid bisectability of
> the tree, the code base must always compile, even with only the first
> patch, in the above example. Given how OSSTEST (and it's bisector)
> works, I think it's best if you can confirm that, whatever patch you've
> got on top, the system boots.
>
This is good to know- we actually have something very much along these lines in
our local repos. We have a separate branch containing only the removal of the
extraneous bits of SEDF which does compile, and is what we used to implement CBS
on in the first place. At some point we chose to combine the two operations for
the patch submission and in retrospect I'm not sure why. So we should be able
to add an additional patch to the series (essentially splitting this patch) in
V2 that does as you mentioned above- separating the removal of parts from SEDF
and the implementation of CBS.
> On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
>> ---
>>
> So, empty changelog.
>
> I guess this is because it's an RFC, so, yeah, no big deal, especially
> considering you really did a great job in the cover letter.
>
> However, I really recommend to put something up there, even for very
> early revisions.
>
> Oh, BTW, even if you really don't want to put any description or
> changelog of any sort (which you really should, though), we at least
> need the 'Signed-off-by:' thing.
>
I apologize for this, I actually had a small entry on each of these patches as
well as the "Signed-off-by:" lines, but in getting the patch ready to send out I
must have forgotten to put them in after my last "format-patch". This will be
corrected in V2.
>> diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
>> index 0c9011a..2ee4538 100644
>> --- a/xen/common/sched_sedf.c
>> +++ b/xen/common/sched_sedf.c
>
>> @@ -58,24 +50,14 @@ struct sedf_priv_info {
>> struct sedf_vcpu_info {
>> struct vcpu *vcpu;
>> struct list_head list;
>> - struct list_head extralist[2];
>>
>> /* Parameters for EDF */
>> s_time_t period; /* = relative deadline */
>> s_time_t slice; /* = worst case execution time */
>> -
>> - /* Advaced Parameters */
>> + /* Note: Server bandwidth = (slice / period) */
>>
>> - /* Latency Scaling */
>> - s_time_t period_orig;
>> - s_time_t slice_orig;
>> - s_time_t latency;
>> -
>> /* Status of domain */
>> int status;
>> - /* Weights for "Scheduling for beginners/ lazy/ etc." ;) */
>> - short weight;
>> - short extraweight;
>>
> About these (so weight and related, latency and related), this is where
> I, _FOR_NOW_, would put a bunch of /* TODOs */, with some explanation of
> what's going on.
>
> If it were me doing this, I would, most likely:
> 1. leave the parameters alone here
> 2. temporarily kill any handling and usage of them in the rest of the
> file
> 3. stick a TODO right here explaining 2.
> 4. once the support and handling of the parameters will be back, come
> back here, and remove the TODO
>
> Of course, 4 depends on what we decide to do with 'weight' and
> 'latency'... which is one more reason to keep this in stand-by, but make
> sure this does not stop further development.
>
> Also, I'd say the series should remain an RFC, until the TODOs are gone.
>
> As I said, it's very hard to review the patch like this... Let me know
> if you agree in splitting it, in which case, I'd rather have a look at
> that version when it'll be ready.
>
Yes I think splitting (as well as some of the other reorganization mentioned
elsewhere) would be best and make it easier to review. Some of the above would
also depend on how we decided to handle the input from George as it sounds like
he doesn't feel it would cause much of an issue to change the parameters around.
I can't promise anything as we have other projects going on concurrently, but
maybe we'll shoot to have a reorganized and cleaned up V2 out by next week.
Thanks again for the feedback, this has been helpful in solidifying our goals here.
- Josh Whitehead
> Regards,
> Dario
>
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf
2014-06-26 20:17 ` Joshua Whitehead
@ 2014-06-28 2:19 ` Dario Faggioli
0 siblings, 0 replies; 27+ messages in thread
From: Dario Faggioli @ 2014-06-28 2:19 UTC (permalink / raw)
To: Joshua Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
[-- Attachment #1.1: Type: text/plain, Size: 1583 bytes --]
On gio, 2014-06-26 at 16:17 -0400, Joshua Whitehead wrote:
> On 6/17/2014 11:43 AM, Dario Faggioli wrote:
> > As I said, it's very hard to review the patch like this... Let me know
> > if you agree in splitting it, in which case, I'd rather have a look at
> > that version when it'll be ready.
> >
> Yes I think splitting (as well as some of the other reorganization mentioned
> elsewhere) would be best and make it easier to review. Some of the above would
> also depend on how we decided to handle the input from George as it sounds like
> he doesn't feel it would cause much of an issue to change the parameters around.
> I can't promise anything as we have other projects going on concurrently, but
> maybe we'll shoot to have a reorganized and cleaned up V2 out by next week.
>
Saying it once more, that would be great.
As per the renaming/removing, I'd say, for v2, do as George said. Change
in place, rename and remove the list (nothing?), at least from the
interface, and change the behavior whenever required.
Also, make sure you comply with the API stability requirement, at the
libxl level, as suggested also by George, by means of the proper
LIBXL_HAVE_xxx and/or LIBXL_API_VERSION_xxx macros.
There are usage examples of both, in libxl sources.
Thanks and Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 181 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf
2014-06-13 19:58 ` [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf Josh Whitehead
2014-06-17 15:43 ` Dario Faggioli
@ 2014-06-17 16:06 ` Dario Faggioli
2014-06-26 20:18 ` Joshua Whitehead
1 sibling, 1 reply; 27+ messages in thread
From: Dario Faggioli @ 2014-06-17 16:06 UTC (permalink / raw)
To: Josh Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
[-- Attachment #1.1: Type: text/plain, Size: 4553 bytes --]
Although very hard, I tried to have a look at the CBS implementation
(so, some of the '+' hunks):
On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
> ---
> @@ -410,49 +301,59 @@ static void desched_edf_dom(s_time_t now, struct vcpu* d)
>
> __del_from_queue(d);
>
> - /*
> - * Manage bookkeeping (i.e. calculate next deadline, memorise
> - * overrun-time of slice) of finished domains.
> - */
> +#ifdef SEDF_STATS
> + /* Manage deadline misses */
> + if ( unlikely(inf->deadl_abs < now) )
> + {
> + inf->miss_tot++;
> + inf->miss_time += inf->cputime;
> + }
> +#endif
> +
> + /* Manage overruns */
> if ( inf->cputime >= inf->slice )
> {
> inf->cputime -= inf->slice;
> -
> - if ( inf->period < inf->period_orig )
> - {
> - /* This domain runs in latency scaling or burst mode */
> - inf->period *= 2;
> - inf->slice *= 2;
> - if ( (inf->period > inf->period_orig) ||
> - (inf->slice > inf->slice_orig) )
> - {
> - /* Reset slice and period */
> - inf->period = inf->period_orig;
> - inf->slice = inf->slice_orig;
> - }
> - }
>
> /* Set next deadline */
> inf->deadl_abs += inf->period;
> +
> + /* Ensure that the cputime is always less than slice */
> + if ( unlikely(inf->cputime > inf->slice) )
> + {
> +#ifdef SEDF_STATS
> + inf->over_tot++;
> + inf->over_time += inf->cputime;
> +#endif
> +
> + /* Make up for the overage by pushing the deadline
> + into the future */
> + inf->deadl_abs += ((inf->cputime / inf->slice)
> + * inf->period) * 2;
> + inf->cputime -= (inf->cputime / inf->slice) * inf->slice;
> + }
>
Can you enlighten me a bit about the math here? I see what you're up to,
but I'm not sure I understand the '*2'...
> + /* Ensure that the start of the next period is in the future */
> + if ( unlikely(PERIOD_BEGIN(inf) < now) )
> + inf->deadl_abs +=
> + (DIV_UP(now - PERIOD_BEGIN(inf),
> + inf->period)) * inf->period;
> }
> @@ -1100,62 +663,65 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
> inf->block_tot++;
> #endif
>
> - if ( unlikely(now < PERIOD_BEGIN(inf)) )
> - {
> - /* Unblocking in extra-time! */
> - if ( inf->status & EXTRA_WANT_PEN_Q )
> + if ( sedf_soft(d) )
> + {
> + /* Apply CBS rule
> + * Where:
> + * c == Remaining server slice == (inf->slice - cpu_time)
> + * d == Server (vcpu) deadline == inf->deadl_abs
> + * r == Wake-up time of vcpu == now
> + * U == Server (vcpu) bandwidth == (inf->slice / inf->period)
> + *
> + * if c>=(d-r)*U --->
> + * (inf->slice - cputime) >= (inf->deadl_abs - now) * inf->period
> + *
Well, I think it's rather:
(inf->slice - cputime) >= (inf->deadl_abs - now) *
(inf->slice / inf->period)
It's only the comment that is wrong, though, the code is ok.
> + * If true, push deadline back by one period and refresh slice, else
> + * use current slice and deadline.
> + */
> + if((inf->slice - inf->cputime) >=
> + ((inf->deadl_abs - now) * (inf->slice / inf->period)))
> {
>
You can shuffle this a bit more, and avoid the '/'.
The condition above can be rewritten as:
c >= (d-r) * (inf->slide/inf->period)
i.e.:
c * inf->period >= (d-r) * inf->slice
and this, the code can be rewritten as:
if ((inf->slice - inf->cputime) * inf->period >=
(inf->deadl_abs - now) * inf->slice)
which I think it's better. One may worry about the fact that the
multiplication can overflow, but that's really unlikely, since all the
involved time values are relative (i.e., remaining runtime, time to
deadline, etc).
Anyway, let's cross that bridge when we get to it.
Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf
2014-06-17 16:06 ` Dario Faggioli
@ 2014-06-26 20:18 ` Joshua Whitehead
2014-06-28 2:27 ` Dario Faggioli
0 siblings, 1 reply; 27+ messages in thread
From: Joshua Whitehead @ 2014-06-26 20:18 UTC (permalink / raw)
To: Dario Faggioli
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
On 6/17/2014 12:06 PM, Dario Faggioli wrote:
> Although very hard, I tried to have a look at the CBS implementation
> (so, some of the '+' hunks):
>
> On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
>> ---
>
>> @@ -410,49 +301,59 @@ static void desched_edf_dom(s_time_t now, struct vcpu* d)
>>
>> __del_from_queue(d);
>>
>> - /*
>> - * Manage bookkeeping (i.e. calculate next deadline, memorise
>> - * overrun-time of slice) of finished domains.
>> - */
>> +#ifdef SEDF_STATS
>> + /* Manage deadline misses */
>> + if ( unlikely(inf->deadl_abs < now) )
>> + {
>> + inf->miss_tot++;
>> + inf->miss_time += inf->cputime;
>> + }
>> +#endif
>> +
>> + /* Manage overruns */
>> if ( inf->cputime >= inf->slice )
>> {
>> inf->cputime -= inf->slice;
>> -
>> - if ( inf->period < inf->period_orig )
>> - {
>> - /* This domain runs in latency scaling or burst mode */
>> - inf->period *= 2;
>> - inf->slice *= 2;
>> - if ( (inf->period > inf->period_orig) ||
>> - (inf->slice > inf->slice_orig) )
>> - {
>> - /* Reset slice and period */
>> - inf->period = inf->period_orig;
>> - inf->slice = inf->slice_orig;
>> - }
>> - }
>>
>> /* Set next deadline */
>> inf->deadl_abs += inf->period;
>> +
>> + /* Ensure that the cputime is always less than slice */
>> + if ( unlikely(inf->cputime > inf->slice) )
>> + {
>> +#ifdef SEDF_STATS
>> + inf->over_tot++;
>> + inf->over_time += inf->cputime;
>> +#endif
>> +
>> + /* Make up for the overage by pushing the deadline
>> + into the future */
>> + inf->deadl_abs += ((inf->cputime / inf->slice)
>> + * inf->period) * 2;
>> + inf->cputime -= (inf->cputime / inf->slice) * inf->slice;
>> + }
>>
> Can you enlighten me a bit about the math here? I see what you're up to,
> but I'm not sure I understand the '*2'...
>
Ah, the '*2' is not necessary, it may be a leftover from an experiment we were
doing at one point. This was something we caught as well but apparently it got
overlooked before we submitted the series. I will make sure to update this for
the V2 patch.
>> + /* Ensure that the start of the next period is in the future */
>> + if ( unlikely(PERIOD_BEGIN(inf) < now) )
>> + inf->deadl_abs +=
>> + (DIV_UP(now - PERIOD_BEGIN(inf),
>> + inf->period)) * inf->period;
>> }
>
>> @@ -1100,62 +663,65 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
>> inf->block_tot++;
>> #endif
>>
>> - if ( unlikely(now < PERIOD_BEGIN(inf)) )
>> - {
>> - /* Unblocking in extra-time! */
>> - if ( inf->status & EXTRA_WANT_PEN_Q )
>> + if ( sedf_soft(d) )
>> + {
>> + /* Apply CBS rule
>> + * Where:
>> + * c == Remaining server slice == (inf->slice - cpu_time)
>> + * d == Server (vcpu) deadline == inf->deadl_abs
>> + * r == Wake-up time of vcpu == now
>> + * U == Server (vcpu) bandwidth == (inf->slice / inf->period)
>> + *
>> + * if c>=(d-r)*U --->
>> + * (inf->slice - cputime) >= (inf->deadl_abs - now) * inf->period
>> + *
> Well, I think it's rather:
>
> (inf->slice - cputime) >= (inf->deadl_abs - now) *
> (inf->slice / inf->period)
>
> It's only the comment that is wrong, though, the code is ok.
>
Yes, just a typo in the comments, we will correct that.
>> + * If true, push deadline back by one period and refresh slice, else
>> + * use current slice and deadline.
>> + */
>> + if((inf->slice - inf->cputime) >=
>> + ((inf->deadl_abs - now) * (inf->slice / inf->period)))
>> {
>>
> You can shuffle this a bit more, and avoid the '/'.
>
> The condition above can be rewritten as:
>
> c >= (d-r) * (inf->slide/inf->period)
>
> i.e.:
>
> c * inf->period >= (d-r) * inf->slice
>
> and this, the code can be rewritten as:
>
> if ((inf->slice - inf->cputime) * inf->period >=
> (inf->deadl_abs - now) * inf->slice)
>
> which I think it's better. One may worry about the fact that the
> multiplication can overflow, but that's really unlikely, since all the
> involved time values are relative (i.e., remaining runtime, time to
> deadline, etc).
>
> Anyway, let's cross that bridge when we get to it.
>
This is a good point- because of the new nature of the scheduler we had not made
any attempts at simplification yet, but rather attempted to keep it as apparent
and straightforward as possible until we were positive everything was working
correctly. This would certainly be a good simplification and I agree the
multiplication overflow is highly unlikely. If you would like I can make this
update in the V2, or we can leave it until we get any other bugs ironed out,
whichever you think would be easiest. Thanks!
- Josh Whitehead
> Regards,
> Dario
>
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf
2014-06-26 20:18 ` Joshua Whitehead
@ 2014-06-28 2:27 ` Dario Faggioli
0 siblings, 0 replies; 27+ messages in thread
From: Dario Faggioli @ 2014-06-28 2:27 UTC (permalink / raw)
To: Joshua Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
[-- Attachment #1.1: Type: text/plain, Size: 3039 bytes --]
On gio, 2014-06-26 at 16:18 -0400, Joshua Whitehead wrote:
> On 6/17/2014 12:06 PM, Dario Faggioli wrote:
> >> +
> >> + /* Make up for the overage by pushing the deadline
> >> + into the future */
> >> + inf->deadl_abs += ((inf->cputime / inf->slice)
> >> + * inf->period) * 2;
> >> + inf->cputime -= (inf->cputime / inf->slice) * inf->slice;
> >> + }
> >>
> > Can you enlighten me a bit about the math here? I see what you're up to,
> > but I'm not sure I understand the '*2'...
> >
> Ah, the '*2' is not necessary, it may be a leftover from an experiment we were
> doing at one point. This was something we caught as well but apparently it got
> overlooked before we submitted the series. I will make sure to update this for
> the V2 patch.
>
Ok. When I'll see v2, I'll tell more, as I can't right now. My
impression still is that what you're up to is sensible, but I think it
should be done differently. At least, I've always done it differently,
in all the EDF/CBS implementation I wrote, and I'm not sure I fully
understand this variant of yours.
But again, send v2, and I'll comment on it! :-)
> > You can shuffle this a bit more, and avoid the '/'.
> >
> > The condition above can be rewritten as:
> >
> > c >= (d-r) * (inf->slide/inf->period)
> >
> > i.e.:
> >
> > c * inf->period >= (d-r) * inf->slice
> >
> > and this, the code can be rewritten as:
> >
> > if ((inf->slice - inf->cputime) * inf->period >=
> > (inf->deadl_abs - now) * inf->slice)
> >
> > which I think it's better. One may worry about the fact that the
> > multiplication can overflow, but that's really unlikely, since all the
> > involved time values are relative (i.e., remaining runtime, time to
> > deadline, etc).
> >
> > Anyway, let's cross that bridge when we get to it.
> >
> This is a good point- because of the new nature of the scheduler we had not made
> any attempts at simplification yet, but rather attempted to keep it as apparent
> and straightforward as possible until we were positive everything was working
> correctly. This would certainly be a good simplification and I agree the
> multiplication overflow is highly unlikely. If you would like I can make this
> update in the V2, or we can leave it until we get any other bugs ironed out,
> whichever you think would be easiest.
>
It's rather basic math, IMO, and it does not really makes the condition,
and hence the whole algorithm, any harder to understand.
I'd say, do it right away, in v2. Do put a comment on top, with the
equation in the 'plain' form, and specify that you're shuffling for
avoiding a div, and that should do.
Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 181 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* [RFC PATCH 2/4] Add cbs parameter support to xl tool stack, remove defunct sedf parameters
2014-06-13 19:58 [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use Josh Whitehead
2014-06-13 19:58 ` [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf Josh Whitehead
@ 2014-06-13 19:58 ` Josh Whitehead
2014-06-17 15:02 ` Dario Faggioli
2014-06-13 19:58 ` [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables Josh Whitehead
` (3 subsequent siblings)
5 siblings, 1 reply; 27+ messages in thread
From: Josh Whitehead @ 2014-06-13 19:58 UTC (permalink / raw)
To: Xen-devel
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Dario Faggioli,
Ian Jackson, Robert VanVossen, Nate Studer, Josh Whitehead
From: Robbie VanVossen <robert.vanvossen@dornerworks.com>
---
docs/man/xl.cfg.pod.5 | 9 ++----
tools/libxc/xc_sedf.c | 16 +++-------
tools/libxc/xenctrl.h | 12 +++----
tools/libxl/libxl.c | 31 ++++++-------------
tools/libxl/libxl.h | 3 +-
tools/libxl/libxl_create.c | 61 ------------------------------------
tools/libxl/libxl_types.idl | 3 +-
tools/libxl/xl_cmdimpl.c | 62 ++++++++++---------------------------
tools/libxl/xl_cmdtable.c | 8 ++---
tools/python/xen/lowlevel/xc/xc.c | 34 +++++++++-----------
xen/include/public/domctl.h | 4 +--
11 files changed, 58 insertions(+), 185 deletions(-)
mode change 100644 => 100755 docs/man/xl.cfg.pod.5
mode change 100644 => 100755 tools/libxc/xc_sedf.c
mode change 100644 => 100755 tools/libxc/xenctrl.h
mode change 100644 => 100755 tools/libxl/libxl.c
mode change 100644 => 100755 tools/libxl/libxl.h
mode change 100644 => 100755 tools/libxl/libxl_types.idl
mode change 100644 => 100755 tools/libxl/xl_cmdimpl.c
mode change 100644 => 100755 tools/libxl/xl_cmdtable.c
mode change 100644 => 100755 tools/python/xen/lowlevel/xc/xc.c
mode change 100644 => 100755 xen/include/public/domctl.h
diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
old mode 100644
new mode 100755
index a94d037..5c55298
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -212,14 +212,9 @@ The normal EDF scheduling usage in nanoseconds. it defines the time
a domain get every period time.
Honoured by the sedf scheduler.
-=item B<latency=N>
+=item B<soft=BOOLEAN>
-Scaled period if domain is doing heavy I/O.
-Honoured by the sedf scheduler.
-
-=item B<extratime=BOOLEAN>
-
-Flag for allowing domain to run in extra time.
+Flag for setting a domain or VCPU to run as a soft task.
Honoured by the sedf scheduler.
=back
diff --git a/tools/libxc/xc_sedf.c b/tools/libxc/xc_sedf.c
old mode 100644
new mode 100755
index db372ca..81ff133
--- a/tools/libxc/xc_sedf.c
+++ b/tools/libxc/xc_sedf.c
@@ -29,9 +29,7 @@ int xc_sedf_domain_set(
uint32_t domid,
uint64_t period,
uint64_t slice,
- uint64_t latency,
- uint16_t extratime,
- uint16_t weight)
+ uint16_t soft)
{
DECLARE_DOMCTL;
struct xen_domctl_sched_sedf *p = &domctl.u.scheduler_op.u.sedf;
@@ -43,9 +41,7 @@ int xc_sedf_domain_set(
p->period = period;
p->slice = slice;
- p->latency = latency;
- p->extratime = extratime;
- p->weight = weight;
+ p->soft = soft;
return do_domctl(xch, &domctl);
}
@@ -54,9 +50,7 @@ int xc_sedf_domain_get(
uint32_t domid,
uint64_t *period,
uint64_t *slice,
- uint64_t *latency,
- uint16_t *extratime,
- uint16_t *weight)
+ uint16_t *soft)
{
DECLARE_DOMCTL;
int ret;
@@ -71,8 +65,6 @@ int xc_sedf_domain_get(
*period = p->period;
*slice = p->slice;
- *latency = p->latency;
- *extratime = p->extratime;
- *weight = p->weight;
+ *soft = p->soft;
return ret;
}
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
old mode 100644
new mode 100755
index 02129f7..d5cfdb8
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -765,15 +765,15 @@ int xc_shadow_control(xc_interface *xch,
int xc_sedf_domain_set(xc_interface *xch,
uint32_t domid,
- uint64_t period, uint64_t slice,
- uint64_t latency, uint16_t extratime,
- uint16_t weight);
+ uint64_t period,
+ uint64_t slice,
+ uint16_t soft);
int xc_sedf_domain_get(xc_interface *xch,
uint32_t domid,
- uint64_t* period, uint64_t *slice,
- uint64_t *latency, uint16_t *extratime,
- uint16_t *weight);
+ uint64_t *period,
+ uint64_t *slice,
+ uint16_t *soft);
int xc_sched_credit_domain_set(xc_interface *xch,
uint32_t domid,
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
old mode 100644
new mode 100755
index 900b8d4..cea8af2
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -4932,13 +4932,10 @@ static int sched_sedf_domain_get(libxl__gc *gc, uint32_t domid,
{
uint64_t period;
uint64_t slice;
- uint64_t latency;
- uint16_t extratime;
- uint16_t weight;
+ uint16_t soft;
int rc;
- rc = xc_sedf_domain_get(CTX->xch, domid, &period, &slice, &latency,
- &extratime, &weight);
+ rc = xc_sedf_domain_get(CTX->xch, domid, &period, &slice, &soft);
if (rc != 0) {
LOGE(ERROR, "getting domain sched sedf");
return ERROR_FAIL;
@@ -4948,9 +4945,7 @@ static int sched_sedf_domain_get(libxl__gc *gc, uint32_t domid,
scinfo->sched = LIBXL_SCHEDULER_SEDF;
scinfo->period = period / 1000000;
scinfo->slice = slice / 1000000;
- scinfo->latency = latency / 1000000;
- scinfo->extratime = extratime;
- scinfo->weight = weight;
+ scinfo->soft = soft;
return 0;
}
@@ -4960,14 +4955,11 @@ static int sched_sedf_domain_set(libxl__gc *gc, uint32_t domid,
{
uint64_t period;
uint64_t slice;
- uint64_t latency;
- uint16_t extratime;
- uint16_t weight;
+ uint16_t soft;
int ret;
- ret = xc_sedf_domain_get(CTX->xch, domid, &period, &slice, &latency,
- &extratime, &weight);
+ ret = xc_sedf_domain_get(CTX->xch, domid, &period, &slice, &soft);
if (ret != 0) {
LOGE(ERROR, "getting domain sched sedf");
return ERROR_FAIL;
@@ -4977,15 +4969,10 @@ static int sched_sedf_domain_set(libxl__gc *gc, uint32_t domid,
period = (uint64_t)scinfo->period * 1000000;
if (scinfo->slice != LIBXL_DOMAIN_SCHED_PARAM_SLICE_DEFAULT)
slice = (uint64_t)scinfo->slice * 1000000;
- if (scinfo->latency != LIBXL_DOMAIN_SCHED_PARAM_LATENCY_DEFAULT)
- latency = (uint64_t)scinfo->latency * 1000000;
- if (scinfo->extratime != LIBXL_DOMAIN_SCHED_PARAM_EXTRATIME_DEFAULT)
- extratime = scinfo->extratime;
- if (scinfo->weight != LIBXL_DOMAIN_SCHED_PARAM_WEIGHT_DEFAULT)
- weight = scinfo->weight;
-
- ret = xc_sedf_domain_set(CTX->xch, domid, period, slice, latency,
- extratime, weight);
+ if (scinfo->soft != LIBXL_DOMAIN_SCHED_PARAM_SOFT_DEFAULT)
+ soft = scinfo->soft;
+
+ ret = xc_sedf_domain_set(CTX->xch, domid, period, slice, soft);
if ( ret < 0 ) {
LOGE(ERROR, "setting domain sched sedf");
return ERROR_FAIL;
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
old mode 100644
new mode 100755
index 80947c3..548d37e
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1120,8 +1120,7 @@ int libxl_sched_credit_params_set(libxl_ctx *ctx, uint32_t poolid,
#define LIBXL_DOMAIN_SCHED_PARAM_CAP_DEFAULT -1
#define LIBXL_DOMAIN_SCHED_PARAM_PERIOD_DEFAULT -1
#define LIBXL_DOMAIN_SCHED_PARAM_SLICE_DEFAULT -1
-#define LIBXL_DOMAIN_SCHED_PARAM_LATENCY_DEFAULT -1
-#define LIBXL_DOMAIN_SCHED_PARAM_EXTRATIME_DEFAULT -1
+#define LIBXL_DOMAIN_SCHED_PARAM_SOFT_DEFAULT -1
int libxl_domain_sched_params_get(libxl_ctx *ctx, uint32_t domid,
libxl_domain_sched_params *params);
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index d015cf4..83b593b 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -44,61 +44,6 @@ int libxl__domain_create_info_setdefault(libxl__gc *gc,
return 0;
}
-static int sched_params_valid(libxl__gc *gc,
- uint32_t domid, libxl_domain_sched_params *scp)
-{
- int has_weight = scp->weight != LIBXL_DOMAIN_SCHED_PARAM_WEIGHT_DEFAULT;
- int has_period = scp->period != LIBXL_DOMAIN_SCHED_PARAM_PERIOD_DEFAULT;
- int has_slice = scp->slice != LIBXL_DOMAIN_SCHED_PARAM_SLICE_DEFAULT;
- int has_extratime =
- scp->extratime != LIBXL_DOMAIN_SCHED_PARAM_EXTRATIME_DEFAULT;
-
- /* The sedf scheduler needs some more consistency checking */
- if (libxl__domain_scheduler(gc, domid) == LIBXL_SCHEDULER_SEDF) {
- if (has_weight && (has_period || has_slice))
- return 0;
- /* If you want a real-time domain, with its own period and
- * slice, please, do provide both! */
- if (has_period != has_slice)
- return 0;
-
- /*
- * Idea is, if we specify a weight, then both period and
- * slice has to be zero. OTOH, if we do specify a period and
- * slice, it is weight that should be zeroed. See
- * docs/misc/sedf_scheduler_mini-HOWTO.txt for more details
- * on the meaningful combinations and their meanings.
- */
- if (has_weight) {
- scp->slice = 0;
- scp->period = 0;
- }
- else if (!has_period) {
- /* No weight nor slice/period means best effort. Parameters needs
- * some mangling in order to properly ask for that, though. */
-
- /*
- * Providing no weight does not make any sense if we do not allow
- * the domain to run in extra time. On the other hand, if we have
- * extra time, weight will be ignored (and zeroed) by Xen, but it
- * can't be zero here, or the call for setting the scheduling
- * parameters will fail. So, avoid the latter by setting a random
- * weight (namely, 1), as it will be ignored anyway.
- */
-
- /* We can setup a proper best effort domain (extra time only)
- * iff we either already have or are asking for some extra time. */
- scp->weight = has_extratime ? scp->extratime : 1;
- scp->period = 0;
- } else {
- /* Real-time domain: will get slice CPU time over every period */
- scp->weight = 0;
- }
- }
-
- return 1;
-}
-
int libxl__domain_build_info_setdefault(libxl__gc *gc,
libxl_domain_build_info *b_info)
{
@@ -760,12 +705,6 @@ static void initiate_domain_create(libxl__egc *egc,
ret = libxl__domain_build_info_setdefault(gc, &d_config->b_info);
if (ret) goto error_out;
- if (!sched_params_valid(gc, domid, &d_config->b_info.sched_params)) {
- LOG(ERROR, "Invalid scheduling parameters\n");
- ret = ERROR_INVAL;
- goto error_out;
- }
-
for (i = 0; i < d_config->num_disks; i++) {
ret = libxl__device_disk_setdefault(gc, &d_config->disks[i]);
if (ret) goto error_out;
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
old mode 100644
new mode 100755
index 52f1aa9..3ec2f80
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -292,8 +292,7 @@ libxl_domain_sched_params = Struct("domain_sched_params",[
("cap", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_CAP_DEFAULT'}),
("period", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_PERIOD_DEFAULT'}),
("slice", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_SLICE_DEFAULT'}),
- ("latency", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_LATENCY_DEFAULT'}),
- ("extratime", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_EXTRATIME_DEFAULT'}),
+ ("soft", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_SOFT_DEFAULT'}),
])
libxl_domain_build_info = Struct("domain_build_info",[
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
old mode 100644
new mode 100755
index 5195914..e06f924
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -838,10 +838,8 @@ static void parse_config_data(const char *config_source,
b_info->sched_params.period = l;
if (!xlu_cfg_get_long (config, "slice", &l, 0))
b_info->sched_params.slice = l;
- if (!xlu_cfg_get_long (config, "latency", &l, 0))
- b_info->sched_params.latency = l;
- if (!xlu_cfg_get_long (config, "extratime", &l, 0))
- b_info->sched_params.extratime = l;
+ if (!xlu_cfg_get_long (config, "soft", &l, 0))
+ b_info->sched_params.soft = l;
if (!xlu_cfg_get_long (config, "vcpus", &l, 0)) {
b_info->max_vcpus = l;
@@ -5181,22 +5179,20 @@ static int sched_sedf_domain_output(
int rc;
if (domid < 0) {
- printf("%-33s %4s %6s %-6s %7s %5s %6s\n", "Name", "ID", "Period",
- "Slice", "Latency", "Extra", "Weight");
+ printf("%-33s %4s %6s %-6s %5s\n", "Name", "ID", "Period",
+ "Slice", "Soft");
return 0;
}
rc = sched_domain_get(LIBXL_SCHEDULER_SEDF, domid, &scinfo);
if (rc)
return rc;
domname = libxl_domid_to_name(ctx, domid);
- printf("%-33s %4d %6d %6d %7d %5d %6d\n",
+ printf("%-33s %4d %6d %6d %5d\n",
domname,
domid,
scinfo.period,
scinfo.slice,
- scinfo.latency,
- scinfo.extratime,
- scinfo.weight);
+ scinfo.soft);
free(domname);
libxl_domain_sched_params_dispose(&scinfo);
return 0;
@@ -5466,22 +5462,18 @@ int main_sched_sedf(int argc, char **argv)
const char *cpupool = NULL;
int period = 0, opt_p = 0;
int slice = 0, opt_s = 0;
- int latency = 0, opt_l = 0;
- int extra = 0, opt_e = 0;
- int weight = 0, opt_w = 0;
+ int soft = 0, opt_t = 0;
int opt, rc;
static struct option opts[] = {
{"period", 1, 0, 'p'},
{"slice", 1, 0, 's'},
- {"latency", 1, 0, 'l'},
- {"extra", 1, 0, 'e'},
- {"weight", 1, 0, 'w'},
+ {"soft", 1, 0, 't'},
{"cpupool", 1, 0, 'c'},
COMMON_LONG_OPTS,
{0, 0, 0, 0}
};
- SWITCH_FOREACH_OPT(opt, "d:p:s:l:e:w:c:h", opts, "sched-sedf", 0) {
+ SWITCH_FOREACH_OPT(opt, "d:p:s:t:c:h", opts, "sched-sedf", 0) {
case 'd':
dom = optarg;
break;
@@ -5493,36 +5485,24 @@ int main_sched_sedf(int argc, char **argv)
slice = strtol(optarg, NULL, 10);
opt_s = 1;
break;
- case 'l':
- latency = strtol(optarg, NULL, 10);
- opt_l = 1;
- break;
- case 'e':
- extra = strtol(optarg, NULL, 10);
- opt_e = 1;
- break;
- case 'w':
- weight = strtol(optarg, NULL, 10);
- opt_w = 1;
+ case 't':
+ soft = strtol(optarg, NULL, 10);
+ opt_t = 1;
break;
case 'c':
cpupool = optarg;
break;
}
- if (cpupool && (dom || opt_p || opt_s || opt_l || opt_e || opt_w)) {
+ if (cpupool && (dom || opt_p || opt_s || opt_t)) {
fprintf(stderr, "Specifying a cpupool is not allowed with other "
"options.\n");
return 1;
}
- if (!dom && (opt_p || opt_s || opt_l || opt_e || opt_w)) {
+ if (!dom && (opt_p || opt_s || opt_t)) {
fprintf(stderr, "Must specify a domain.\n");
return 1;
}
- if (opt_w && (opt_p || opt_s)) {
- fprintf(stderr, "Specifying a weight AND period or slice is not "
- "allowed.\n");
- }
if (!dom) { /* list all domain's credit scheduler info */
return -sched_domain_output(LIBXL_SCHEDULER_SEDF,
@@ -5532,7 +5512,7 @@ int main_sched_sedf(int argc, char **argv)
} else {
uint32_t domid = find_domain(dom);
- if (!opt_p && !opt_s && !opt_l && !opt_e && !opt_w) {
+ if (!opt_p && !opt_s) {
/* output sedf scheduler info */
sched_sedf_domain_output(-1);
return -sched_sedf_domain_output(domid);
@@ -5543,20 +5523,12 @@ int main_sched_sedf(int argc, char **argv)
if (opt_p) {
scinfo.period = period;
- scinfo.weight = 0;
}
if (opt_s) {
scinfo.slice = slice;
- scinfo.weight = 0;
}
- if (opt_l)
- scinfo.latency = latency;
- if (opt_e)
- scinfo.extratime = extra;
- if (opt_w) {
- scinfo.weight = weight;
- scinfo.period = 0;
- scinfo.slice = 0;
+ if (opt_t) {
+ scinfo.soft = soft;
}
rc = sched_domain_set(domid, &scinfo);
libxl_domain_sched_params_dispose(&scinfo);
diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
old mode 100644
new mode 100755
index 4279b9f..1226fb8
--- a/tools/libxl/xl_cmdtable.c
+++ b/tools/libxl/xl_cmdtable.c
@@ -269,12 +269,8 @@ struct cmd_spec cmd_table[] = {
"-p MS, --period=MS Relative deadline(ms)\n"
"-s MS, --slice=MS Worst-case execution time(ms).\n"
" (slice < period)\n"
- "-l MS, --latency=MS Scaled period (ms) when domain\n"
- " performs heavy I/O\n"
- "-e FLAG, --extra=FLAG Flag (0 or 1) controls if domain\n"
- " can run in extra time\n"
- "-w FLOAT, --weight=FLOAT CPU Period/slice (do not set with\n"
- " --period/--slice)\n"
+ "-t FLAG, --soft=FLAG Flag (0 or 1) controls if domain\n"
+ " can run as a soft task\n"
"-c CPUPOOL, --cpupool=CPUPOOL Restrict output to CPUPOOL"
},
{ "domid",
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
old mode 100644
new mode 100755
index cb34446..bf10165
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -1471,17 +1471,17 @@ static PyObject *pyxc_sedf_domain_set(XcObject *self,
PyObject *kwds)
{
uint32_t domid;
- uint64_t period, slice, latency;
- uint16_t extratime, weight;
- static char *kwd_list[] = { "domid", "period", "slice",
- "latency", "extratime", "weight",NULL };
+ uint64_t period, slice;
+ uint16_t soft;
+
+ static char *kwd_list[] = { "domid", "period", "slice", "soft",NULL };
- if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLLhh", kwd_list,
- &domid, &period, &slice,
- &latency, &extratime, &weight) )
+ if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLi", kwd_list,
+ &domid, &period, &slice, &soft) )
+
return NULL;
if ( xc_sedf_domain_set(self->xc_handle, domid, period,
- slice, latency, extratime,weight) != 0 )
+ slice, soft) != 0 )
return pyxc_error_to_exception(self->xc_handle);
Py_INCREF(zero);
@@ -1491,23 +1491,21 @@ static PyObject *pyxc_sedf_domain_set(XcObject *self,
static PyObject *pyxc_sedf_domain_get(XcObject *self, PyObject *args)
{
uint32_t domid;
- uint64_t period, slice,latency;
- uint16_t weight, extratime;
+ uint64_t period, slice;
+ uint16_t soft;
if(!PyArg_ParseTuple(args, "i", &domid))
return NULL;
if (xc_sedf_domain_get(self->xc_handle, domid, &period,
- &slice,&latency,&extratime,&weight))
+ &slice, &soft))
return pyxc_error_to_exception(self->xc_handle);
- return Py_BuildValue("{s:i,s:L,s:L,s:L,s:i,s:i}",
+ return Py_BuildValue("{s:i,s:L,s:L,s:i}",
"domid", domid,
"period", period,
"slice", slice,
- "latency", latency,
- "extratime", extratime,
- "weight", weight);
+ "soft", soft);
}
static PyObject *pyxc_shadow_control(PyObject *self,
@@ -2544,8 +2542,7 @@ static PyMethodDef pyxc_methods[] = {
" dom [int]: domain to set\n"
" period [long]: domain's scheduling period\n"
" slice [long]: domain's slice per period\n"
- " latency [long]: domain's wakeup latency hint\n"
- " extratime [int]: domain aware of extratime?\n"
+ " soft [int]: domain is a soft task?\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "sedf_domain_get",
@@ -2558,8 +2555,7 @@ static PyMethodDef pyxc_methods[] = {
" domain [int]: domain ID\n"
" period [long]: scheduler period\n"
" slice [long]: CPU reservation per period\n"
- " latency [long]: domain's wakeup latency hint\n"
- " extratime [int]: domain aware of extratime?\n"},
+ " soft [int]: domain is a soft task?\n"},
{ "sched_credit_domain_set",
(PyCFunction)pyxc_sched_credit_domain_set,
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
old mode 100644
new mode 100755
index 565fa4c..6e143d3
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -331,9 +331,7 @@ struct xen_domctl_scheduler_op {
struct xen_domctl_sched_sedf {
uint64_aligned_t period;
uint64_aligned_t slice;
- uint64_aligned_t latency;
- uint32_t extratime;
- uint32_t weight;
+ uint32_t soft;
} sedf;
struct xen_domctl_sched_credit {
uint16_t weight;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [RFC PATCH 2/4] Add cbs parameter support to xl tool stack, remove defunct sedf parameters
2014-06-13 19:58 ` [RFC PATCH 2/4] Add cbs parameter support to xl tool stack, remove defunct sedf parameters Josh Whitehead
@ 2014-06-17 15:02 ` Dario Faggioli
2014-06-26 19:55 ` Joshua Whitehead
0 siblings, 1 reply; 27+ messages in thread
From: Dario Faggioli @ 2014-06-17 15:02 UTC (permalink / raw)
To: Josh Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
[-- Attachment #1.1: Type: text/plain, Size: 2538 bytes --]
The core of this patch, as well as of patches 3 and 4, is renaming
*_sedf to *_cbs, on which I think I commented already... let's see where
that discussion goes.
So, leaving the renaming part aside...
On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
> From: Robbie VanVossen <robert.vanvossen@dornerworks.com>
>
> ---
> docs/man/xl.cfg.pod.5 | 9 ++----
> tools/libxc/xc_sedf.c | 16 +++-------
> tools/libxc/xenctrl.h | 12 +++----
> tools/libxl/libxl.c | 31 ++++++-------------
> tools/libxl/libxl.h | 3 +-
> tools/libxl/libxl_create.c | 61 ------------------------------------
> tools/libxl/libxl_types.idl | 3 +-
> tools/libxl/xl_cmdimpl.c | 62 ++++++++++---------------------------
> tools/libxl/xl_cmdtable.c | 8 ++---
> tools/python/xen/lowlevel/xc/xc.c | 34 +++++++++-----------
> xen/include/public/domctl.h | 4 +--
>
What we usually do is splitting the series so that you cross the various
components' boundaries as few as possible.
That means, for instance, in this case, having one (or more) patch(es)
modifying libxc, one or more modifying libxl and, if possible, one or
more modifying xl.
That is, mostly, for making the review easier, but also because it may
be that different components are under the umbrella of different
maintainer and/or committers.
When modifying libxl, please mind the issue of API stability that I
brought up in this thread already.
> 11 files changed, 58 insertions(+), 185 deletions(-)
> mode change 100644 => 100755 docs/man/xl.cfg.pod.5
> mode change 100644 => 100755 tools/libxc/xc_sedf.c
> mode change 100644 => 100755 tools/libxc/xenctrl.h
> mode change 100644 => 100755 tools/libxl/libxl.c
> mode change 100644 => 100755 tools/libxl/libxl.h
> mode change 100644 => 100755 tools/libxl/libxl_types.idl
> mode change 100644 => 100755 tools/libxl/xl_cmdimpl.c
> mode change 100644 => 100755 tools/libxl/xl_cmdtable.c
> mode change 100644 => 100755 tools/python/xen/lowlevel/xc/xc.c
> mode change 100644 => 100755 xen/include/public/domctl.h
>
What are these? I really don't think they should be here.
Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 2/4] Add cbs parameter support to xl tool stack, remove defunct sedf parameters
2014-06-17 15:02 ` Dario Faggioli
@ 2014-06-26 19:55 ` Joshua Whitehead
0 siblings, 0 replies; 27+ messages in thread
From: Joshua Whitehead @ 2014-06-26 19:55 UTC (permalink / raw)
To: Dario Faggioli
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
On 6/17/2014 11:02 AM, Dario Faggioli wrote:
> The core of this patch, as well as of patches 3 and 4, is renaming
> *_sedf to *_cbs, on which I think I commented already... let's see where
> that discussion goes.
>
> So, leaving the renaming part aside...
>
> On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
>> From: Robbie VanVossen <robert.vanvossen@dornerworks.com>
>>
>> ---
>> docs/man/xl.cfg.pod.5 | 9 ++----
>> tools/libxc/xc_sedf.c | 16 +++-------
>> tools/libxc/xenctrl.h | 12 +++----
>> tools/libxl/libxl.c | 31 ++++++-------------
>> tools/libxl/libxl.h | 3 +-
>> tools/libxl/libxl_create.c | 61 ------------------------------------
>> tools/libxl/libxl_types.idl | 3 +-
>> tools/libxl/xl_cmdimpl.c | 62 ++++++++++---------------------------
>> tools/libxl/xl_cmdtable.c | 8 ++---
>> tools/python/xen/lowlevel/xc/xc.c | 34 +++++++++-----------
>> xen/include/public/domctl.h | 4 +--
>>
> What we usually do is splitting the series so that you cross the various
> components' boundaries as few as possible.
>
> That means, for instance, in this case, having one (or more) patch(es)
> modifying libxc, one or more modifying libxl and, if possible, one or
> more modifying xl.
>
> That is, mostly, for making the review easier, but also because it may
> be that different components are under the umbrella of different
> maintainer and/or committers.
>
> When modifying libxl, please mind the issue of API stability that I
> brought up in this thread already.
>
Our intention had been to isolate components with each of the patches, but
apparently we did not subdivide those components as far as we needed. This
should be a simple enough fix for V2 of the patch to properly split along
component boundaries (as well as further reorganization of the patch that has
been discussed in other threads).
>> 11 files changed, 58 insertions(+), 185 deletions(-)
>> mode change 100644 => 100755 docs/man/xl.cfg.pod.5
>> mode change 100644 => 100755 tools/libxc/xc_sedf.c
>> mode change 100644 => 100755 tools/libxc/xenctrl.h
>> mode change 100644 => 100755 tools/libxl/libxl.c
>> mode change 100644 => 100755 tools/libxl/libxl.h
>> mode change 100644 => 100755 tools/libxl/libxl_types.idl
>> mode change 100644 => 100755 tools/libxl/xl_cmdimpl.c
>> mode change 100644 => 100755 tools/libxl/xl_cmdtable.c
>> mode change 100644 => 100755 tools/python/xen/lowlevel/xc/xc.c
>> mode change 100644 => 100755 xen/include/public/domctl.h
>>
> What are these? I really don't think they should be here.
>
Apologies for this, these permissions changes were not intended to go out to the
list as they were a side effect of something else we were working on, we'll
remove these for V2 of the patch. Thanks.
- Josh Whitehead
> Regards,
> Dario
>
^ permalink raw reply [flat|nested] 27+ messages in thread
* [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-13 19:58 [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use Josh Whitehead
2014-06-13 19:58 ` [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf Josh Whitehead
2014-06-13 19:58 ` [RFC PATCH 2/4] Add cbs parameter support to xl tool stack, remove defunct sedf parameters Josh Whitehead
@ 2014-06-13 19:58 ` Josh Whitehead
2014-06-16 9:33 ` Jan Beulich
2014-06-13 19:58 ` [RFC PATCH 4/4] Changed filenames with sedf to cbs to reflect the actual scheduler Josh Whitehead
` (2 subsequent siblings)
5 siblings, 1 reply; 27+ messages in thread
From: Josh Whitehead @ 2014-06-13 19:58 UTC (permalink / raw)
To: Xen-devel
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Dario Faggioli,
Ian Jackson, Robert VanVossen, Nate Studer, Josh Whitehead
From: Robbie VanVossen <robert.vanvossen@dornerworks.com>
---
docs/man/xl.cfg.pod.5 | 12 +-
docs/man/xl.pod.1 | 4 +-
docs/man/xlcpupool.cfg.pod.5 | 4 +-
docs/misc/xen-command-line.markdown | 2 +-
tools/examples/cpupool | 2 +-
tools/libxc/xc_sedf.c | 24 +-
tools/libxc/xenctrl.h | 8 +-
tools/libxl/libxl.c | 36 +--
tools/libxl/libxl.h | 2 +-
tools/libxl/libxl_types.idl | 4 +-
tools/libxl/xl.h | 2 +-
tools/libxl/xl_cmdimpl.c | 60 ++--
tools/libxl/xl_cmdtable.c | 12 +-
tools/python/README.XendConfig | 4 +-
tools/python/xen/lowlevel/xc/xc.c | 34 +-
xen/common/sched_sedf.c | 599 +++++++++++++++++------------------
xen/common/schedule.c | 2 +-
xen/include/public/domctl.h | 8 +-
xen/include/public/trace.h | 2 +-
xen/include/xen/sched-if.h | 2 +-
20 files changed, 400 insertions(+), 423 deletions(-)
diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 5c55298..b18b157 100755
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -176,7 +176,7 @@ details.
A domain with a weight of 512 will get twice as much CPU as a domain
with a weight of 256 on a contended host.
Legal weights range from 1 to 65535 and the default is 256.
-Honoured by the credit, credit2 and sedf schedulers.
+Honoured by the credit, credit2 and cbs schedulers.
=item B<cap=N>
@@ -203,19 +203,19 @@ your BIOS.
=item B<period=NANOSECONDS>
The normal EDF scheduling usage in nanoseconds. This means every period
-the domain gets cpu time defined in slice.
-Honoured by the sedf scheduler.
+the domain gets cpu time defined in budget.
+Honoured by the cbs scheduler.
-=item B<slice=NANOSECONDS>
+=item B<budget=NANOSECONDS>
The normal EDF scheduling usage in nanoseconds. it defines the time
a domain get every period time.
-Honoured by the sedf scheduler.
+Honoured by the cbs scheduler.
=item B<soft=BOOLEAN>
Flag for setting a domain or VCPU to run as a soft task.
-Honoured by the sedf scheduler.
+Honoured by the cbs scheduler.
=back
diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index 30bd4bf..e960436 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -977,12 +977,12 @@ Restrict output to domains in the specified cpupool.
=back
-=item B<sched-sedf> [I<OPTIONS>]
+=item B<sched-cbs> [I<OPTIONS>]
Set or get Simple EDF (Earliest Deadline First) scheduler parameters. This
scheduler provides weighted CPU sharing in an intuitive way and uses
realtime-algorithms to ensure time guarantees. For more information see
-docs/misc/sedf_scheduler_mini-HOWTO.txt in the Xen distribution.
+docs/misc/cbs_scheduler_mini-HOWTO.txt in the Xen distribution.
B<OPTIONS>
diff --git a/docs/man/xlcpupool.cfg.pod.5 b/docs/man/xlcpupool.cfg.pod.5
index e32ce17..dd299ac 100644
--- a/docs/man/xlcpupool.cfg.pod.5
+++ b/docs/man/xlcpupool.cfg.pod.5
@@ -74,9 +74,9 @@ the credit scheduler
the credit2 scheduler
-=item B<sedf>
+=item B<cbs>
-the SEDF scheduler
+the CBS scheduler
=back
diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index a7ac53d..8bbc151 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -858,7 +858,7 @@ Specify the host reboot method.
`acpi` instructs Xen to reboot the host using RESET_REG in the ACPI FADT.
### sched
-> `= credit | credit2 | sedf | arinc653`
+> `= credit | credit2 | cbs | arinc653`
> Default: `sched=credit`
diff --git a/tools/examples/cpupool b/tools/examples/cpupool
index 01e62c8..22d0d84 100644
--- a/tools/examples/cpupool
+++ b/tools/examples/cpupool
@@ -9,7 +9,7 @@
# the name of the new cpupool
name = "Example-Cpupool"
-# the scheduler to use: valid are e.g. credit, sedf, credit2
+# the scheduler to use: valid are e.g. credit, cbs, credit2
sched = "credit"
# list of cpus to use
diff --git a/tools/libxc/xc_sedf.c b/tools/libxc/xc_sedf.c
index 81ff133..3b578d1 100755
--- a/tools/libxc/xc_sedf.c
+++ b/tools/libxc/xc_sedf.c
@@ -1,7 +1,7 @@
/******************************************************************************
- * xc_sedf.c
+ * xc_cbs.c
*
- * API for manipulating parameters of the Simple EDF scheduler.
+ * API for manipulating parameters of the CBS scheduler.
*
* changes by Stephan Diestelhorst
* based on code
@@ -24,47 +24,47 @@
#include "xc_private.h"
-int xc_sedf_domain_set(
+int xc_cbs_domain_set(
xc_interface *xch,
uint32_t domid,
uint64_t period,
- uint64_t slice,
+ uint64_t budget,
uint16_t soft)
{
DECLARE_DOMCTL;
- struct xen_domctl_sched_sedf *p = &domctl.u.scheduler_op.u.sedf;
+ struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs;
domctl.cmd = XEN_DOMCTL_scheduler_op;
domctl.domain = (domid_t)domid;
- domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_SEDF;
+ domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS;
domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo;
p->period = period;
- p->slice = slice;
+ p->budget = budget;
p->soft = soft;
return do_domctl(xch, &domctl);
}
-int xc_sedf_domain_get(
+int xc_cbs_domain_get(
xc_interface *xch,
uint32_t domid,
uint64_t *period,
- uint64_t *slice,
+ uint64_t *budget,
uint16_t *soft)
{
DECLARE_DOMCTL;
int ret;
- struct xen_domctl_sched_sedf *p = &domctl.u.scheduler_op.u.sedf;
+ struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs;
domctl.cmd = XEN_DOMCTL_scheduler_op;
domctl.domain = (domid_t)domid;
- domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_SEDF;
+ domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS;
domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo;
ret = do_domctl(xch, &domctl);
*period = p->period;
- *slice = p->slice;
+ *budget = p->budget;
*soft = p->soft;
return ret;
}
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index d5cfdb8..af2cdb2 100755
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -763,16 +763,16 @@ int xc_shadow_control(xc_interface *xch,
uint32_t mode,
xc_shadow_op_stats_t *stats);
-int xc_sedf_domain_set(xc_interface *xch,
+int xc_cbs_domain_set(xc_interface *xch,
uint32_t domid,
uint64_t period,
- uint64_t slice,
+ uint64_t budget,
uint16_t soft);
-int xc_sedf_domain_get(xc_interface *xch,
+int xc_cbs_domain_get(xc_interface *xch,
uint32_t domid,
uint64_t *period,
- uint64_t *slice,
+ uint64_t *budget,
uint16_t *soft);
int xc_sched_credit_domain_set(xc_interface *xch,
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index cea8af2..a862cb5 100755
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -4927,54 +4927,54 @@ static int sched_credit2_domain_set(libxl__gc *gc, uint32_t domid,
return 0;
}
-static int sched_sedf_domain_get(libxl__gc *gc, uint32_t domid,
+static int sched_cbs_domain_get(libxl__gc *gc, uint32_t domid,
libxl_domain_sched_params *scinfo)
{
uint64_t period;
- uint64_t slice;
+ uint64_t budget;
uint16_t soft;
int rc;
- rc = xc_sedf_domain_get(CTX->xch, domid, &period, &slice, &soft);
+ rc = xc_cbs_domain_get(CTX->xch, domid, &period, &budget, &soft);
if (rc != 0) {
- LOGE(ERROR, "getting domain sched sedf");
+ LOGE(ERROR, "getting domain sched cbs");
return ERROR_FAIL;
}
libxl_domain_sched_params_init(scinfo);
- scinfo->sched = LIBXL_SCHEDULER_SEDF;
+ scinfo->sched = LIBXL_SCHEDULER_CBS;
scinfo->period = period / 1000000;
- scinfo->slice = slice / 1000000;
+ scinfo->budget = budget / 1000000;
scinfo->soft = soft;
return 0;
}
-static int sched_sedf_domain_set(libxl__gc *gc, uint32_t domid,
+static int sched_cbs_domain_set(libxl__gc *gc, uint32_t domid,
const libxl_domain_sched_params *scinfo)
{
uint64_t period;
- uint64_t slice;
+ uint64_t budget;
uint16_t soft;
int ret;
- ret = xc_sedf_domain_get(CTX->xch, domid, &period, &slice, &soft);
+ ret = xc_cbs_domain_get(CTX->xch, domid, &period, &budget, &soft);
if (ret != 0) {
- LOGE(ERROR, "getting domain sched sedf");
+ LOGE(ERROR, "getting domain sched cbs");
return ERROR_FAIL;
}
if (scinfo->period != LIBXL_DOMAIN_SCHED_PARAM_PERIOD_DEFAULT)
period = (uint64_t)scinfo->period * 1000000;
- if (scinfo->slice != LIBXL_DOMAIN_SCHED_PARAM_SLICE_DEFAULT)
- slice = (uint64_t)scinfo->slice * 1000000;
+ if (scinfo->budget != LIBXL_DOMAIN_SCHED_PARAM_BUDGET_DEFAULT)
+ budget = (uint64_t)scinfo->budget * 1000000;
if (scinfo->soft != LIBXL_DOMAIN_SCHED_PARAM_SOFT_DEFAULT)
soft = scinfo->soft;
- ret = xc_sedf_domain_set(CTX->xch, domid, period, slice, soft);
+ ret = xc_cbs_domain_set(CTX->xch, domid, period, budget, soft);
if ( ret < 0 ) {
- LOGE(ERROR, "setting domain sched sedf");
+ LOGE(ERROR, "setting domain sched cbs");
return ERROR_FAIL;
}
@@ -4992,8 +4992,8 @@ int libxl_domain_sched_params_set(libxl_ctx *ctx, uint32_t domid,
sched = libxl__domain_scheduler(gc, domid);
switch (sched) {
- case LIBXL_SCHEDULER_SEDF:
- ret=sched_sedf_domain_set(gc, domid, scinfo);
+ case LIBXL_SCHEDULER_CBS:
+ ret=sched_cbs_domain_set(gc, domid, scinfo);
break;
case LIBXL_SCHEDULER_CREDIT:
ret=sched_credit_domain_set(gc, domid, scinfo);
@@ -5025,8 +5025,8 @@ int libxl_domain_sched_params_get(libxl_ctx *ctx, uint32_t domid,
scinfo->sched = libxl__domain_scheduler(gc, domid);
switch (scinfo->sched) {
- case LIBXL_SCHEDULER_SEDF:
- ret=sched_sedf_domain_get(gc, domid, scinfo);
+ case LIBXL_SCHEDULER_CBS:
+ ret=sched_cbs_domain_get(gc, domid, scinfo);
break;
case LIBXL_SCHEDULER_CREDIT:
ret=sched_credit_domain_get(gc, domid, scinfo);
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 548d37e..8b26643 100755
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1119,7 +1119,7 @@ int libxl_sched_credit_params_set(libxl_ctx *ctx, uint32_t poolid,
#define LIBXL_DOMAIN_SCHED_PARAM_WEIGHT_DEFAULT -1
#define LIBXL_DOMAIN_SCHED_PARAM_CAP_DEFAULT -1
#define LIBXL_DOMAIN_SCHED_PARAM_PERIOD_DEFAULT -1
-#define LIBXL_DOMAIN_SCHED_PARAM_SLICE_DEFAULT -1
+#define LIBXL_DOMAIN_SCHED_PARAM_BUDGET_DEFAULT -1
#define LIBXL_DOMAIN_SCHED_PARAM_SOFT_DEFAULT -1
int libxl_domain_sched_params_get(libxl_ctx *ctx, uint32_t domid,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 3ec2f80..a212e33 100755
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -134,7 +134,7 @@ libxl_bios_type = Enumeration("bios_type", [
# Except unknown which we have made up
libxl_scheduler = Enumeration("scheduler", [
(0, "unknown"),
- (4, "sedf"),
+ (4, "cbs"),
(5, "credit"),
(6, "credit2"),
(7, "arinc653"),
@@ -291,7 +291,7 @@ libxl_domain_sched_params = Struct("domain_sched_params",[
("weight", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_WEIGHT_DEFAULT'}),
("cap", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_CAP_DEFAULT'}),
("period", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_PERIOD_DEFAULT'}),
- ("slice", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_SLICE_DEFAULT'}),
+ ("budget", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_BUDGET_DEFAULT'}),
("soft", integer, {'init_val': 'LIBXL_DOMAIN_SCHED_PARAM_SOFT_DEFAULT'}),
])
diff --git a/tools/libxl/xl.h b/tools/libxl/xl.h
index 10a2e66..f7c73cc 100644
--- a/tools/libxl/xl.h
+++ b/tools/libxl/xl.h
@@ -66,7 +66,7 @@ int main_memmax(int argc, char **argv);
int main_memset(int argc, char **argv);
int main_sched_credit(int argc, char **argv);
int main_sched_credit2(int argc, char **argv);
-int main_sched_sedf(int argc, char **argv);
+int main_sched_cbs(int argc, char **argv);
int main_domid(int argc, char **argv);
int main_domname(int argc, char **argv);
int main_rename(int argc, char **argv);
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index e06f924..0c1959c 100755
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -836,8 +836,8 @@ static void parse_config_data(const char *config_source,
b_info->sched_params.cap = l;
if (!xlu_cfg_get_long (config, "period", &l, 0))
b_info->sched_params.period = l;
- if (!xlu_cfg_get_long (config, "slice", &l, 0))
- b_info->sched_params.slice = l;
+ if (!xlu_cfg_get_long (config, "budget", &l, 0))
+ b_info->sched_params.budget = l;
if (!xlu_cfg_get_long (config, "soft", &l, 0))
b_info->sched_params.soft = l;
@@ -5171,7 +5171,7 @@ static int sched_credit2_domain_output(
return 0;
}
-static int sched_sedf_domain_output(
+static int sched_cbs_domain_output(
int domid)
{
char *domname;
@@ -5180,10 +5180,10 @@ static int sched_sedf_domain_output(
if (domid < 0) {
printf("%-33s %4s %6s %-6s %5s\n", "Name", "ID", "Period",
- "Slice", "Soft");
+ "Budget", "Soft");
return 0;
}
- rc = sched_domain_get(LIBXL_SCHEDULER_SEDF, domid, &scinfo);
+ rc = sched_domain_get(LIBXL_SCHEDULER_CBS, domid, &scinfo);
if (rc)
return rc;
domname = libxl_domid_to_name(ctx, domid);
@@ -5191,7 +5191,7 @@ static int sched_sedf_domain_output(
domname,
domid,
scinfo.period,
- scinfo.slice,
+ scinfo.budget,
scinfo.soft);
free(domname);
libxl_domain_sched_params_dispose(&scinfo);
@@ -5456,24 +5456,24 @@ int main_sched_credit2(int argc, char **argv)
return 0;
}
-int main_sched_sedf(int argc, char **argv)
+int main_sched_cbs(int argc, char **argv)
{
const char *dom = NULL;
const char *cpupool = NULL;
int period = 0, opt_p = 0;
- int slice = 0, opt_s = 0;
- int soft = 0, opt_t = 0;
+ int budget = 0, opt_b = 0;
+ int soft = 0, opt_s = 0;
int opt, rc;
static struct option opts[] = {
{"period", 1, 0, 'p'},
- {"slice", 1, 0, 's'},
- {"soft", 1, 0, 't'},
+ {"budget", 1, 0, 'b'},
+ {"soft", 1, 0, 's'},
{"cpupool", 1, 0, 'c'},
COMMON_LONG_OPTS,
{0, 0, 0, 0}
};
- SWITCH_FOREACH_OPT(opt, "d:p:s:t:c:h", opts, "sched-sedf", 0) {
+ SWITCH_FOREACH_OPT(opt, "d:p:b:s:c:h", opts, "sched-cbs", 0) {
case 'd':
dom = optarg;
break;
@@ -5481,53 +5481,53 @@ int main_sched_sedf(int argc, char **argv)
period = strtol(optarg, NULL, 10);
opt_p = 1;
break;
- case 's':
- slice = strtol(optarg, NULL, 10);
- opt_s = 1;
+ case 'b':
+ budget = strtol(optarg, NULL, 10);
+ opt_b = 1;
break;
- case 't':
+ case 's':
soft = strtol(optarg, NULL, 10);
- opt_t = 1;
+ opt_s = 1;
break;
case 'c':
cpupool = optarg;
break;
}
- if (cpupool && (dom || opt_p || opt_s || opt_t)) {
+ if (cpupool && (dom || opt_p || opt_b || opt_s)) {
fprintf(stderr, "Specifying a cpupool is not allowed with other "
"options.\n");
return 1;
}
- if (!dom && (opt_p || opt_s || opt_t)) {
+ if (!dom && (opt_p || opt_b || opt_s)) {
fprintf(stderr, "Must specify a domain.\n");
return 1;
}
if (!dom) { /* list all domain's credit scheduler info */
- return -sched_domain_output(LIBXL_SCHEDULER_SEDF,
- sched_sedf_domain_output,
+ return -sched_domain_output(LIBXL_SCHEDULER_CBS,
+ sched_cbs_domain_output,
sched_default_pool_output,
cpupool);
} else {
uint32_t domid = find_domain(dom);
- if (!opt_p && !opt_s) {
- /* output sedf scheduler info */
- sched_sedf_domain_output(-1);
- return -sched_sedf_domain_output(domid);
- } else { /* set sedf scheduler paramaters */
+ if (!opt_p && !opt_b) {
+ /* output cbs scheduler info */
+ sched_cbs_domain_output(-1);
+ return -sched_cbs_domain_output(domid);
+ } else { /* set cbs scheduler paramaters */
libxl_domain_sched_params scinfo;
libxl_domain_sched_params_init(&scinfo);
- scinfo.sched = LIBXL_SCHEDULER_SEDF;
+ scinfo.sched = LIBXL_SCHEDULER_CBS;
if (opt_p) {
scinfo.period = period;
}
- if (opt_s) {
- scinfo.slice = slice;
+ if (opt_b) {
+ scinfo.budget = budget;
}
- if (opt_t) {
+ if (opt_s) {
scinfo.soft = soft;
}
rc = sched_domain_set(domid, &scinfo);
diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
index 1226fb8..3507468 100755
--- a/tools/libxl/xl_cmdtable.c
+++ b/tools/libxl/xl_cmdtable.c
@@ -261,15 +261,15 @@ struct cmd_spec cmd_table[] = {
"-w WEIGHT, --weight=WEIGHT Weight (int)\n"
"-p CPUPOOL, --cpupool=CPUPOOL Restrict output to CPUPOOL"
},
- { "sched-sedf",
- &main_sched_sedf, 0, 1,
- "Get/set sedf scheduler parameters",
+ { "sched-cbs",
+ &main_sched_cbs, 0, 1,
+ "Get/set cbs scheduler parameters",
"[options]",
"-d DOMAIN, --domain=DOMAIN Domain to modify\n"
"-p MS, --period=MS Relative deadline(ms)\n"
- "-s MS, --slice=MS Worst-case execution time(ms).\n"
- " (slice < period)\n"
- "-t FLAG, --soft=FLAG Flag (0 or 1) controls if domain\n"
+ "-b MS, --budget=MS Constant bandwidth server budget(ms).\n"
+ " (budget < period)\n"
+ "-s FLAG, --soft=FLAG Flag (0 or 1) controls if domain\n"
" can run as a soft task\n"
"-c CPUPOOL, --cpupool=CPUPOOL Restrict output to CPUPOOL"
},
diff --git a/tools/python/README.XendConfig b/tools/python/README.XendConfig
index 338715b..fbf3867 100644
--- a/tools/python/README.XendConfig
+++ b/tools/python/README.XendConfig
@@ -32,8 +32,8 @@ memory_static_max maxmem
memory_actual
memory_dynamic_min
memory_dynamic_max
-vcpus_policy !set_credit/set_sedf
-vcpus_params !set_credit/set_sedf
+vcpus_policy !set_credit/set_cbs
+vcpus_params !set_credit/set_cbs
vcpus_number vcpus
vcpus_utilisation
vcpus_features_required
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index bf10165..198bfe2 100755
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -1466,45 +1466,45 @@ static PyObject *pyxc_xeninfo(XcObject *self)
}
-static PyObject *pyxc_sedf_domain_set(XcObject *self,
+static PyObject *pyxc_cbs_domain_set(XcObject *self,
PyObject *args,
PyObject *kwds)
{
uint32_t domid;
- uint64_t period, slice;
+ uint64_t period, budget;
uint16_t soft;
- static char *kwd_list[] = { "domid", "period", "slice", "soft",NULL };
+ static char *kwd_list[] = { "domid", "period", "budget", "soft",NULL };
if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLi", kwd_list,
- &domid, &period, &slice, &soft) )
+ &domid, &period, &budget, &soft) )
return NULL;
- if ( xc_sedf_domain_set(self->xc_handle, domid, period,
- slice, soft) != 0 )
+ if ( xc_cbs_domain_set(self->xc_handle, domid, period,
+ budget, soft) != 0 )
return pyxc_error_to_exception(self->xc_handle);
Py_INCREF(zero);
return zero;
}
-static PyObject *pyxc_sedf_domain_get(XcObject *self, PyObject *args)
+static PyObject *pyxc_cbs_domain_get(XcObject *self, PyObject *args)
{
uint32_t domid;
- uint64_t period, slice;
+ uint64_t period, budget;
uint16_t soft;
if(!PyArg_ParseTuple(args, "i", &domid))
return NULL;
- if (xc_sedf_domain_get(self->xc_handle, domid, &period,
- &slice, &soft))
+ if (xc_cbs_domain_get(self->xc_handle, domid, &period,
+ &budget, &soft))
return pyxc_error_to_exception(self->xc_handle);
return Py_BuildValue("{s:i,s:L,s:L,s:i}",
- "domid", domid,
+ "domid", domid,
"period", period,
- "slice", slice,
+ "budget", budget,
"soft", soft);
}
@@ -2535,8 +2535,8 @@ static PyMethodDef pyxc_methods[] = {
"Get the current scheduler type in use.\n"
"Returns: [int] sched_id.\n" },
- { "sedf_domain_set",
- (PyCFunction)pyxc_sedf_domain_set,
+ { "cbs_domain_set",
+ (PyCFunction)pyxc_cbs_domain_set,
METH_KEYWORDS, "\n"
"Set the scheduling parameters for a domain when running with Atropos.\n"
" dom [int]: domain to set\n"
@@ -2545,8 +2545,8 @@ static PyMethodDef pyxc_methods[] = {
" soft [int]: domain is a soft task?\n"
"Returns: [int] 0 on success; -1 on error.\n" },
- { "sedf_domain_get",
- (PyCFunction)pyxc_sedf_domain_get,
+ { "cbs_domain_get",
+ (PyCFunction)pyxc_cbs_domain_get,
METH_VARARGS, "\n"
"Get the current scheduling parameters for a domain when running with\n"
"the Atropos scheduler."
@@ -3076,7 +3076,7 @@ PyMODINIT_FUNC initxc(void)
PyModule_AddObject(m, "Error", xc_error_obj);
/* Expose some libxc constants to Python */
- PyModule_AddIntConstant(m, "XEN_SCHEDULER_SEDF", XEN_SCHEDULER_SEDF);
+ PyModule_AddIntConstant(m, "XEN_SCHEDULER_CBS", XEN_SCHEDULER_CBS);
PyModule_AddIntConstant(m, "XEN_SCHEDULER_CREDIT", XEN_SCHEDULER_CREDIT);
PyModule_AddIntConstant(m, "XEN_SCHEDULER_CREDIT2", XEN_SCHEDULER_CREDIT2);
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
index 2ee4538..5df4825 100644
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -1,8 +1,29 @@
/******************************************************************************
- * Simple EDF scheduler for xen
+ * Constant Bandwidth Server Scheduler for Xen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * by DornerWorks Ltd. (C) 2014 Grand Rapids, MI
+ *
+ * Adapted from code by Stephan Diestelhorst (C) 2004 Cambridge University
+ * and Mark Williamson (C) 2004 Intel Research Cambridge
*
- * by Stephan Diestelhorst (C) 2004 Cambridge University
- * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
*/
#include <xen/lib.h>
@@ -14,7 +35,7 @@
#include <xen/errno.h>
#ifndef NDEBUG
-#define SEDF_STATS
+#define CBS_STATS
#define CHECK(_p) \
do { \
if ( !(_p) ) \
@@ -25,48 +46,49 @@
#define CHECK(_p) ((void)0)
#endif
-#define SEDF_SOFT_TASK (1)
-#define SEDF_ASLEEP (16)
+#define CBS_SOFT_TASK (1)
+#define CBS_ASLEEP (16)
#define DEFAULT_PERIOD (MILLISECS(20))
-#define DEFAULT_SLICE (MILLISECS(10))
+#define DEFAULT_BUDGET (MILLISECS(10))
#define PERIOD_MAX MILLISECS(10000) /* 10s */
#define PERIOD_MIN (MICROSECS(10)) /* 10us */
-#define SLICE_MIN (MICROSECS(5)) /* 5us */
+#define BUDGET_MIN (MICROSECS(5)) /* 5us */
-#define EQ(a, b) ((!!(a)) == (!!(b)))
+#define EQ(_A, _B) ((!!(_A)) == (!!(_B)))
-struct sedf_dom_info {
+struct cbs_dom_info {
struct domain *domain;
};
-struct sedf_priv_info {
+struct cbs_priv_info {
/* lock for the whole pluggable scheduler, nests inside cpupool_lock */
spinlock_t lock;
};
-struct sedf_vcpu_info {
+struct cbs_vcpu_info {
struct vcpu *vcpu;
struct list_head list;
- /* Parameters for EDF */
- s_time_t period; /* = relative deadline */
- s_time_t slice; /* = worst case execution time */
- /* Note: Server bandwidth = (slice / period) */
- /* Status of domain */
+ /* Parameters for EDF-CBS */
+ s_time_t period; /* = Server scheduling period */
+ s_time_t budget; /* = Guarenteed minimum CPU time per period */
+ /* Note: Server bandwidth = (budget / period) */
+
+ /* Status of vcpu */
int status;
/* Bookkeeping */
s_time_t deadl_abs;
s_time_t sched_start_abs;
s_time_t cputime;
- /* Times the domain un-/blocked */
+ /* Times the vcpu un-/blocked */
s_time_t block_abs;
s_time_t unblock_abs;
-#ifdef SEDF_STATS
+#ifdef CBS_STATS
s_time_t block_time_tot;
int block_tot;
int short_block_tot;
@@ -78,45 +100,44 @@ struct sedf_vcpu_info {
#endif
};
-struct sedf_cpu_info {
+struct cbs_cpu_info {
struct list_head runnableq;
struct list_head waitq;
- s_time_t current_slice_expires;
+ s_time_t current_budget_expires;
};
-#define SEDF_PRIV(_ops) \
- ((struct sedf_priv_info *)((_ops)->sched_data))
-#define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
-#define CPU_INFO(cpu) \
- ((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
-#define LIST(d) (&EDOM_INFO(d)->list)
-#define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
-#define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
-#define IDLETASK(cpu) (idle_vcpu[cpu])
+#define CBS_PRIV(_ops) \
+ ((struct cbs_priv_info *)((_ops)->sched_data))
+#define CBS_VCPU(_vcpu) ((struct cbs_vcpu_info *)((_vcpu)->sched_priv))
+#define CBS_PCPU(_cpu) \
+ ((struct cbs_cpu_info *)per_cpu(schedule_data, _cpu).sched_priv)
+#define LIST(_vcpu) (&CBS_VCPU(_vcpu)->list)
+#define RUNQ(_cpu) (&CBS_PCPU(_cpu)->runnableq)
+#define WAITQ(_cpu) (&CBS_PCPU(_cpu)->waitq)
+#define IDLETASK(_cpu) (idle_vcpu[_cpu])
#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
-#define DIV_UP(x,y) (((x) + (y) - 1) / y)
+#define DIV_UP(_X, _Y) (((_X) + (_Y) - 1) / _Y)
-#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
+#define cbs_runnable(edom) (!(CBS_VCPU(edom)->status & CBS_ASLEEP))
-#define sedf_soft(edom) (EDOM_INFO(edom)->status & SEDF_SOFT_TASK)
+#define cbs_soft(edom) (CBS_VCPU(edom)->status & CBS_SOFT_TASK)
+static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu);
-static void sedf_dump_cpu_state(const struct scheduler *ops, int i);
-
-static inline int __task_on_queue(struct vcpu *d)
+static inline int __task_on_queue(struct vcpu *v)
{
- return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
+ return (((LIST(v))->next != NULL) && (LIST(v)->next != LIST(v)));
}
-static inline void __del_from_queue(struct vcpu *d)
+static inline void __del_from_queue(struct vcpu *v)
{
- struct list_head *list = LIST(d);
- ASSERT(__task_on_queue(d));
+ struct list_head *list = LIST(v);
+ ASSERT(__task_on_queue(v));
list_del(list);
list->next = NULL;
- ASSERT(!__task_on_queue(d));
+ ASSERT(!__task_on_queue(v));
}
typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
@@ -135,12 +156,12 @@ static inline void list_insert_sort(
list_add(element, cur->prev);
}
-#define DOMAIN_COMPARER(name, field, comp1, comp2) \
+#define VCPU_COMPARER(name, field, comp1, comp2) \
static int name##_comp(struct list_head* el1, struct list_head* el2) \
{ \
- struct sedf_vcpu_info *d1, *d2; \
- d1 = list_entry(el1,struct sedf_vcpu_info, field); \
- d2 = list_entry(el2,struct sedf_vcpu_info, field); \
+ struct cbs_vcpu_info *v1, *v2; \
+ v1 = list_entry(el1, struct cbs_vcpu_info, field); \
+ v2 = list_entry(el2, struct cbs_vcpu_info, field); \
if ( (comp1) == (comp2) ) \
return 0; \
if ( (comp1) < (comp2) ) \
@@ -150,11 +171,11 @@ static int name##_comp(struct list_head* el1, struct list_head* el2) \
}
/*
- * Adds a domain to the queue of processes which wait for the beginning of the
+ * Adds a vcpu to the queue of processes which wait for the beginning of the
* next period; this list is therefore sortet by this time, which is simply
* absol. deadline - period.
*/
-DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2));
+VCPU_COMPARER(waitq, list, PERIOD_BEGIN(v1), PERIOD_BEGIN(v2));
static inline void __add_to_waitqueue_sort(struct vcpu *v)
{
ASSERT(!__task_on_queue(v));
@@ -163,32 +184,32 @@ static inline void __add_to_waitqueue_sort(struct vcpu *v)
}
/*
- * Adds a domain to the queue of processes which have started their current
+ * Adds a vcpu to the queue of processes which have started their current
* period and are runnable (i.e. not blocked, dieing,...). The first element
* on this list is running on the processor, if the list is empty the idle
* task will run. As we are implementing EDF, this list is sorted by deadlines.
*/
-DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs);
+VCPU_COMPARER(runq, list, v1->deadl_abs, v2->deadl_abs);
static inline void __add_to_runqueue_sort(struct vcpu *v)
{
list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp);
}
-static void sedf_insert_vcpu(const struct scheduler *ops, struct vcpu *v)
+static void cbs_insert_vcpu(const struct scheduler *ops, struct vcpu *v)
{
if ( is_idle_vcpu(v) )
{
- EDOM_INFO(v)->deadl_abs = 0;
- EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
+ CBS_VCPU(v)->deadl_abs = 0;
+ CBS_VCPU(v)->status &= ~CBS_ASLEEP;
}
}
-static void *sedf_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void *dd)
+static void *cbs_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void *dd)
{
- struct sedf_vcpu_info *inf;
+ struct cbs_vcpu_info *inf;
- inf = xzalloc(struct sedf_vcpu_info);
+ inf = xzalloc(struct cbs_vcpu_info);
if ( inf == NULL )
return NULL;
@@ -196,18 +217,18 @@ static void *sedf_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void
inf->deadl_abs = 0;
inf->cputime = 0;
- inf->status = SEDF_ASLEEP;
+ inf->status = CBS_ASLEEP;
if (v->domain->domain_id == 0)
{
- /* Domain 0, needs a slice to boot the machine */
- inf->period = DEFAULT_PERIOD;
- inf->slice = DEFAULT_SLICE;
+ /* Domain 0, needs a budget to boot the machine */
+ inf->period = DEFAULT_PERIOD;
+ inf->budget = DEFAULT_BUDGET;
}
else
{
- inf->period = DEFAULT_PERIOD;
- inf->slice = 0;
+ inf->period = DEFAULT_PERIOD;
+ inf->budget = 0;
}
INIT_LIST_HEAD(&(inf->list));
@@ -218,11 +239,11 @@ static void *sedf_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void
}
static void *
-sedf_alloc_pdata(const struct scheduler *ops, int cpu)
+cbs_alloc_pdata(const struct scheduler *ops, int cpu)
{
- struct sedf_cpu_info *spc;
+ struct cbs_cpu_info *spc;
- spc = xzalloc(struct sedf_cpu_info);
+ spc = xzalloc(struct cbs_cpu_info);
BUG_ON(spc == NULL);
INIT_LIST_HEAD(&spc->waitq);
INIT_LIST_HEAD(&spc->runnableq);
@@ -231,7 +252,7 @@ sedf_alloc_pdata(const struct scheduler *ops, int cpu)
}
static void
-sedf_free_pdata(const struct scheduler *ops, void *spc, int cpu)
+cbs_free_pdata(const struct scheduler *ops, void *spc, int cpu)
{
if ( spc == NULL )
return;
@@ -239,37 +260,37 @@ sedf_free_pdata(const struct scheduler *ops, void *spc, int cpu)
xfree(spc);
}
-static void sedf_free_vdata(const struct scheduler *ops, void *priv)
+static void cbs_free_vdata(const struct scheduler *ops, void *priv)
{
xfree(priv);
}
static void *
-sedf_alloc_domdata(const struct scheduler *ops, struct domain *d)
+cbs_alloc_domdata(const struct scheduler *ops, struct domain *d)
{
- return xzalloc(struct sedf_dom_info);
+ return xzalloc(struct cbs_dom_info);
}
-static int sedf_init_domain(const struct scheduler *ops, struct domain *d)
+static int cbs_init_domain(const struct scheduler *ops, struct domain *d)
{
- d->sched_priv = sedf_alloc_domdata(ops, d);
+ d->sched_priv = cbs_alloc_domdata(ops, d);
if ( d->sched_priv == NULL )
return -ENOMEM;
return 0;
}
-static void sedf_free_domdata(const struct scheduler *ops, void *data)
+static void cbs_free_domdata(const struct scheduler *ops, void *data)
{
xfree(data);
}
-static void sedf_destroy_domain(const struct scheduler *ops, struct domain *d)
+static void cbs_destroy_domain(const struct scheduler *ops, struct domain *d)
{
- sedf_free_domdata(ops, d->sched_priv);
+ cbs_free_domdata(ops, d->sched_priv);
}
-static int sedf_pick_cpu(const struct scheduler *ops, struct vcpu *v)
+static int cbs_pick_cpu(const struct scheduler *ops, struct vcpu *v)
{
cpumask_t online_affinity;
cpumask_t *online;
@@ -281,27 +302,27 @@ static int sedf_pick_cpu(const struct scheduler *ops, struct vcpu *v)
}
/*
- * Handles the rescheduling & bookkeeping of domains running in their
- * guaranteed timeslice.
+ * Handles the rescheduling & bookkeeping of vcpus running in their
+ * guaranteed time budget.
*/
-static void desched_edf_dom(s_time_t now, struct vcpu* d)
+static void desched_edf_vcpu(s_time_t now, struct vcpu *v)
{
- struct sedf_vcpu_info* inf = EDOM_INFO(d);
+ struct cbs_vcpu_info* inf = CBS_VCPU(v);
- /* Current domain is running in real time mode */
- ASSERT(__task_on_queue(d));
+ /* Current vcpu is running in real time mode */
+ ASSERT(__task_on_queue(v));
- /* Update the domain's cputime */
+ /* Update the vcpu's cputime */
inf->cputime += now - inf->sched_start_abs;
- /* Scheduling decisions which don't remove the running domain from
+ /* Scheduling decisions which don't remove the running vcpu from
* the runq */
- if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
+ if ( (inf->cputime < inf->budget) && cbs_runnable(v) )
return;
- __del_from_queue(d);
+ __del_from_queue(v);
-#ifdef SEDF_STATS
+#ifdef CBS_STATS
/* Manage deadline misses */
if ( unlikely(inf->deadl_abs < now) )
{
@@ -311,26 +332,27 @@ static void desched_edf_dom(s_time_t now, struct vcpu* d)
#endif
/* Manage overruns */
- if ( inf->cputime >= inf->slice )
+ if ( inf->cputime >= inf->budget )
{
- inf->cputime -= inf->slice;
+ inf->cputime -= inf->budget;
+
/* Set next deadline */
inf->deadl_abs += inf->period;
- /* Ensure that the cputime is always less than slice */
- if ( unlikely(inf->cputime > inf->slice) )
+ /* Ensure that the cputime is always less than budget */
+ if ( unlikely(inf->cputime > inf->budget) )
{
-#ifdef SEDF_STATS
+#ifdef CBS_STATS
inf->over_tot++;
inf->over_time += inf->cputime;
#endif
/* Make up for the overage by pushing the deadline
into the future */
- inf->deadl_abs += ((inf->cputime / inf->slice)
+ inf->deadl_abs += ((inf->cputime / inf->budget)
* inf->period) * 2;
- inf->cputime -= (inf->cputime / inf->slice) * inf->slice;
+ inf->cputime -= (inf->cputime / inf->budget) * inf->budget;
}
/* Ensure that the start of the next period is in the future */
@@ -340,20 +362,20 @@ static void desched_edf_dom(s_time_t now, struct vcpu* d)
inf->period)) * inf->period;
}
- /* Add a runnable domain to the waitqueue */
- if ( sedf_runnable(d) )
+ /* Add a runnable vcpu to the appropriate queue */
+ if ( cbs_runnable(v) )
{
- if( sedf_soft(d) )
+ if( cbs_soft(v) )
{
- __add_to_runqueue_sort(d);
+ __add_to_runqueue_sort(v);
}
else
{
- __add_to_waitqueue_sort(d);
+ __add_to_waitqueue_sort(v);
}
}
- ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
+ ASSERT(EQ(cbs_runnable(v), __task_on_queue(v)));
}
@@ -362,7 +384,7 @@ static void update_queues(
s_time_t now, struct list_head *runq, struct list_head *waitq)
{
struct list_head *cur, *tmp;
- struct sedf_vcpu_info *curinf;
+ struct cbs_vcpu_info *curinf;
/*
* Check for the first elements of the waitqueue, whether their
@@ -370,21 +392,21 @@ static void update_queues(
*/
list_for_each_safe ( cur, tmp, waitq )
{
- curinf = list_entry(cur, struct sedf_vcpu_info, list);
+ curinf = list_entry(cur, struct cbs_vcpu_info, list);
if ( PERIOD_BEGIN(curinf) > now )
break;
__del_from_queue(curinf->vcpu);
__add_to_runqueue_sort(curinf->vcpu);
}
- /* Process the runq, find domains that are on the runq that shouldn't */
+ /* Process the runq, find vcpus that are on the runq that shouldn't */
list_for_each_safe ( cur, tmp, runq )
{
- curinf = list_entry(cur,struct sedf_vcpu_info,list);
+ curinf = list_entry(cur, struct cbs_vcpu_info, list);
- if ( unlikely(curinf->slice == 0) )
+ if ( unlikely(curinf->budget == 0) )
{
- /* Ignore domains with empty slice */
+ /* Ignore vcpus with empty budget */
__del_from_queue(curinf->vcpu);
/* Move them to their next period */
@@ -399,17 +421,18 @@ static void update_queues(
/* Put them back into the queue */
__add_to_waitqueue_sort(curinf->vcpu);
}
+
else
break;
}
}
-static int sedf_init(struct scheduler *ops)
+static int cbs_init(struct scheduler *ops)
{
- struct sedf_priv_info *prv;
+ struct cbs_priv_info *prv;
- prv = xzalloc(struct sedf_priv_info);
+ prv = xzalloc(struct cbs_priv_info);
if ( prv == NULL )
return -ENOMEM;
@@ -420,11 +443,11 @@ static int sedf_init(struct scheduler *ops)
}
-static void sedf_deinit(const struct scheduler *ops)
+static void cbs_deinit(const struct scheduler *ops)
{
- struct sedf_priv_info *prv;
+ struct cbs_priv_info *prv;
- prv = SEDF_PRIV(ops);
+ prv = CBS_PRIV(ops);
if ( prv != NULL )
xfree(prv);
}
@@ -433,43 +456,43 @@ static void sedf_deinit(const struct scheduler *ops)
/*
* Main scheduling function
* Reasons for calling this function are:
- * -timeslice for the current period used up
- * -domain on waitqueue has started it's period
- * -and various others ;) in general: determine which domain to run next
+ * -budget for the current server is used up
+ * -vcpu on waitqueue has started it's period
+ * -and various others ;) in general: determine which vcpu to run next
*/
-static struct task_slice sedf_do_schedule(
+static struct task_slice cbs_do_schedule(
const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
{
int cpu = smp_processor_id();
struct list_head *runq = RUNQ(cpu);
struct list_head *waitq = WAITQ(cpu);
- struct sedf_vcpu_info *inf = EDOM_INFO(current);
- struct sedf_vcpu_info *runinf, *waitinf;
+ struct cbs_vcpu_info *inf = CBS_VCPU(current);
+ struct cbs_vcpu_info *runinf, *waitinf;
struct task_slice ret;
SCHED_STAT_CRANK(schedule);
- /* Idle tasks don't need any of the following stuf */
+ /* Idle tasks don't need any of the following stuff */
if ( is_idle_vcpu(current) )
goto check_waitq;
/*
- * Create local state of the status of the domain, in order to avoid
+ * Create local state of the status of the vcpu, in order to avoid
* inconsistent state during scheduling decisions, because data for
* vcpu_runnable is not protected by the scheduling lock!
*/
if ( !vcpu_runnable(current) )
- inf->status |= SEDF_ASLEEP;
+ inf->status |= CBS_ASLEEP;
- if ( inf->status & SEDF_ASLEEP )
+ if ( inf->status & CBS_ASLEEP )
inf->block_abs = now;
- desched_edf_dom(now, current);
+ desched_edf_vcpu(now, current);
check_waitq:
update_queues(now, runq, waitq);
/*
- * Now simply pick the first domain from the runqueue, which has the
+ * Now simply pick the first vcpu from the runqueue, which has the
* earliest deadline, because the list is sorted
*
* Tasklet work (which runs in idle VCPU context) overrides all else.
@@ -484,28 +507,28 @@ static struct task_slice sedf_do_schedule(
}
else if ( !list_empty(runq) )
{
- runinf = list_entry(runq->next,struct sedf_vcpu_info,list);
+ runinf = list_entry(runq->next, struct cbs_vcpu_info, list);
ret.task = runinf->vcpu;
if ( !list_empty(waitq) )
{
waitinf = list_entry(waitq->next,
- struct sedf_vcpu_info,list);
+ struct cbs_vcpu_info, list);
/*
- * Rerun scheduler, when scheduled domain reaches it's
- * end of slice or the first domain from the waitqueue
+ * Rerun scheduler, when scheduled vcpu consumes
+ * its budget or the first vcpu from the waitqueue
* gets ready.
*/
- ret.time = MIN(now + runinf->slice - runinf->cputime,
+ ret.time = MIN(now + runinf->budget - runinf->cputime,
PERIOD_BEGIN(waitinf)) - now;
}
else
{
- ret.time = runinf->slice - runinf->cputime;
+ ret.time = runinf->budget - runinf->cputime;
}
}
else
{
- waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
+ waitinf = list_entry(waitq->next, struct cbs_vcpu_info, list);
ret.task = IDLETASK(cpu);
ret.time = PERIOD_BEGIN(waitinf) - now;
@@ -521,35 +544,35 @@ static struct task_slice sedf_do_schedule(
ret.migrated = 0;
- EDOM_INFO(ret.task)->sched_start_abs = now;
+ CBS_VCPU(ret.task)->sched_start_abs = now;
CHECK(ret.time > 0);
- ASSERT(sedf_runnable(ret.task));
- CPU_INFO(cpu)->current_slice_expires = now + ret.time;
+ ASSERT(cbs_runnable(ret.task));
+ CBS_PCPU(cpu)->current_budget_expires = now + ret.time;
return ret;
}
-static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
+static void cbs_sleep(const struct scheduler *ops, struct vcpu *v)
{
- if ( is_idle_vcpu(d) )
+ if ( is_idle_vcpu(v) )
return;
- EDOM_INFO(d)->status |= SEDF_ASLEEP;
+ CBS_VCPU(v)->status |= CBS_ASLEEP;
- if ( per_cpu(schedule_data, d->processor).curr == d )
+ if ( per_cpu(schedule_data, v->processor).curr == v )
{
- cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
+ cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
}
else
{
- if ( __task_on_queue(d) )
- __del_from_queue(d);
+ if ( __task_on_queue(v) )
+ __del_from_queue(v);
}
}
/*
- * Compares two domains in the relation of whether the one is allowed to
+ * Compares two vcpus in the relation of whether the one is allowed to
* interrupt the others execution.
- * It returns true (!=0) if a switch to the other domain is good.
+ * It returns true (!=0) if a switch to the other vcpu is good.
* Priority scheme is as follows:
* EDF: early deadline > late deadline
*/
@@ -557,129 +580,82 @@ static inline int should_switch(struct vcpu *cur,
struct vcpu *other,
s_time_t now)
{
- struct sedf_vcpu_info *cur_inf, *other_inf;
- cur_inf = EDOM_INFO(cur);
- other_inf = EDOM_INFO(other);
+ struct cbs_vcpu_info *cur_inf, *other_inf;
+ cur_inf = CBS_VCPU(cur);
+ other_inf = CBS_VCPU(other);
- /* Always interrupt idle domain. */
+ /* Always interrupt idle vcpu. */
if ( is_idle_vcpu(cur) )
return 1;
/* Check whether we need to make an earlier scheduling decision */
if ( PERIOD_BEGIN(other_inf) <
- CPU_INFO(other->processor)->current_slice_expires )
+ CBS_PCPU(other->processor)->current_budget_expires )
return 1;
return 0;
}
/*
- * This function wakes up a domain, i.e. moves them into the waitqueue
- * things to mention are: admission control is taking place nowhere at
- * the moment, so we can't be sure, whether it is safe to wake the domain
- * up at all. Anyway, even if it is safe (total cpu usage <=100%) there are
- * some considerations on when to allow the domain to wake up and have it's
- * first deadline...
- * I detected 3 cases, which could describe the possible behaviour of the
- * scheduler,
- * and I'll try to make them more clear:
+ * This function wakes up a vcpu, i.e. moves them into the appropriate queue
*
- * 1. Very conservative
- * -when a blocked domain unblocks, it is allowed to start execution at
+ * For Hard Real-Time vcpus (soft = 0):
+ * -When a blocked vcpu unblocks, it is allowed to start execution at
* the beginning of the next complete period
* (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up
*
* DRRB_____D__U_____DRRRRR___D________ ...
*
- * -this causes the domain to miss a period (and a deadlline)
- * -doesn't disturb the schedule at all
- * -deadlines keep occuring isochronous
- *
- * 2. Conservative Part 1: Short Unblocking
- * -when a domain unblocks in the same period as it was blocked it
- * unblocks and may consume the rest of it's original time-slice minus
- * the time it was blocked
- * (assume period=9, slice=5)
- *
- * DRB_UR___DRRRRR___D...
- *
- * -this also doesn't disturb scheduling, but might lead to the fact, that
- * the domain can't finish it's workload in the period
- * -addition: experiments have shown that this may have a HUGE impact on
- * performance of other domains, becaus it can lead to excessive context
- * switches
+ * -This causes the vcpu to miss a period (and a deadlline)
+ * -Doesn't disturb the schedule at all
+ * -Deadlines keep occuring isochronous
*
- * Part2: Long Unblocking
- * Part 2a
- * -it is obvious that such accounting of block time, applied when
- * unblocking is happening in later periods, works fine aswell
- * -the domain is treated as if it would have been running since the start
- * of its new period
+ * For Soft Real-Time vcpus (soft = 1):
+ * -Deadlines are set and updated according to the Constant Bandwidth Server
+ * rule and vcpus are moved immediately to the run queue.
*
- * DRB______D___UR___D...
- *
- * Part 2b
- * -if one needs the full slice in the next period, it is necessary to
- * treat the unblocking time as the start of the new period, i.e. move
- * the deadline further back (later)
- * -this doesn't disturb scheduling as well, because for EDF periods can
- * be treated as minimal inter-release times and scheduling stays
- * correct, when deadlines are kept relative to the time the process
- * unblocks
- *
- * DRB______D___URRRR___D...<prev [Thread] next>
- * (D) <- old deadline was here
- * -problem: deadlines don't occur isochronous anymore
- *
- * 3. Unconservative (i.e. incorrect)
- * -to boost the performance of I/O dependent domains it would be possible
- * to put the domain into the runnable queue immediately, and let it run
- * for the remainder of the slice of the current period
- * (or even worse: allocate a new full slice for the domain)
- * -either behaviour can lead to missed deadlines in other domains as
- * opposed to approaches 1,2a,2b
*/
-static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
+static void cbs_wake(const struct scheduler *ops, struct vcpu *v)
{
s_time_t now = NOW();
- struct sedf_vcpu_info* inf = EDOM_INFO(d);
+ struct cbs_vcpu_info* inf = CBS_VCPU(v);
- if ( unlikely(is_idle_vcpu(d)) )
+ if ( unlikely(is_idle_vcpu(v)) )
return;
- if ( unlikely(__task_on_queue(d)) )
+ if ( unlikely(__task_on_queue(v)) )
return;
- ASSERT(!sedf_runnable(d));
- inf->status &= ~SEDF_ASLEEP;
+ ASSERT(!cbs_runnable(v));
+ inf->status &= ~CBS_ASLEEP;
if ( unlikely(inf->deadl_abs == 0) )
{
/* Initial setup of the deadline */
- inf->deadl_abs = now + inf->slice;
+ inf->deadl_abs = now + inf->budget;
}
-#ifdef SEDF_STATS
+#ifdef CBS_STATS
inf->block_tot++;
#endif
- if ( sedf_soft(d) )
+ if ( cbs_soft(v) )
{
/* Apply CBS rule
* Where:
- * c == Remaining server slice == (inf->slice - cpu_time)
+ * c == Remaining server budget == (inf->budget - cpu_time)
* d == Server (vcpu) deadline == inf->deadl_abs
* r == Wake-up time of vcpu == now
- * U == Server (vcpu) bandwidth == (inf->slice / inf->period)
+ * U == Server (vcpu) bandwidth == (inf->budget / inf->period)
*
* if c>=(d-r)*U --->
- * (inf->slice - cputime) >= (inf->deadl_abs - now) * inf->period
+ * (inf->budget - cputime) >= (inf->deadl_abs - now) * inf->period
*
- * If true, push deadline back by one period and refresh slice, else
- * use current slice and deadline.
+ * If true, push deadline back by one period and refresh budget, else
+ * use current budget and deadline.
*/
- if((inf->slice - inf->cputime) >=
- ((inf->deadl_abs - now) * (inf->slice / inf->period)))
+ if((inf->budget - inf->cputime) >=
+ ((inf->deadl_abs - now) * (inf->budget / inf->period)))
{
/* Push back deadline by one period */
inf->deadl_abs += inf->period;
@@ -688,14 +664,14 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
/* In CBS we don't care if the period has begun,
* the task doesn't have to wait for its period
- * because it'll never request more than its slice
+ * because it'll never request more than its budget
* for any given period.
*/
- __add_to_runqueue_sort(d);
+ __add_to_runqueue_sort(v);
}
else {
/* Task is a hard task, treat accordingly */
-#ifdef SEDF_STATS
+#ifdef CBS_STATS
if ( now < inf->deadl_abs )
{
/* Short blocking */
@@ -709,12 +685,12 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
#endif
if ( PERIOD_BEGIN(inf) > now )
- __add_to_waitqueue_sort(d);
+ __add_to_waitqueue_sort(v);
else
- __add_to_runqueue_sort(d);
+ __add_to_runqueue_sort(v);
}
-#ifdef SEDF_STATS
+#ifdef CBS_STATS
/* Do some statistics here... */
if ( inf->block_abs != 0 )
{
@@ -722,74 +698,75 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
}
#endif
- ASSERT(__task_on_queue(d));
+ ASSERT(__task_on_queue(v));
/*
* Check whether the awakened task needs to invoke the do_schedule
* routine. Try to avoid unnecessary runs but:
- * Save approximation: Always switch to scheduler!
+ * Safe approximation: Always switch to scheduler!
*/
- ASSERT(d->processor >= 0);
- ASSERT(d->processor < nr_cpu_ids);
- ASSERT(per_cpu(schedule_data, d->processor).curr);
+ ASSERT(v->processor >= 0);
+ ASSERT(v->processor < nr_cpu_ids);
+ ASSERT(per_cpu(schedule_data, v->processor).curr);
- if ( should_switch(per_cpu(schedule_data, d->processor).curr, d, now) )
- cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
+ if ( should_switch(per_cpu(schedule_data, v->processor).curr, v, now) )
+ cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
}
-/* Print a lot of useful information about a domains in the system */
-static void sedf_dump_domain(struct vcpu *d)
+/* Print a lot of useful information about a vcpus in the system */
+static void cbs_dump_vcpu(struct vcpu *v)
{
- printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
- d->is_running ? 'T':'F');
+ printk("%i.%i has=%c ", v->domain->domain_id, v->vcpu_id,
+ v->is_running ? 'T':'F');
printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64,
- EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs);
+ CBS_VCPU(v)->period, CBS_VCPU(v)->budget, CBS_VCPU(v)->deadl_abs);
-#ifdef SEDF_STATS
+#ifdef CBS_STATS
printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64,
- EDOM_INFO(d)->miss_tot, EDOM_INFO(d)->miss_time,
- EDOM_INFO(d)->over_tot, EDOM_INFO(d)->over_time);
+ CBS_VCPU(v)->miss_tot, CBS_VCPU(v)->miss_time,
+ CBS_VCPU(v)->over_tot, CBS_VCPU(v)->over_time);
- if ( EDOM_INFO(d)->block_tot != 0 )
+ if ( CBS_VCPU(v)->block_tot != 0 )
printk("\n blks=%u sh=%u (%u%%) "\
"l=%u (%u%%) avg: b=%"PRIu64,
- EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
- (EDOM_INFO(d)->short_block_tot * 100) / EDOM_INFO(d)->block_tot,
- EDOM_INFO(d)->long_block_tot,
- (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
- (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot);
+ CBS_VCPU(v)->block_tot, CBS_VCPU(v)->short_block_tot,
+ (CBS_VCPU(v)->short_block_tot * 100) / CBS_VCPU(v)->block_tot,
+ CBS_VCPU(v)->long_block_tot,
+ (CBS_VCPU(v)->long_block_tot * 100) / CBS_VCPU(v)->block_tot,
+ (CBS_VCPU(v)->block_time_tot) / CBS_VCPU(v)->block_tot);
#endif
printk("\n");
}
-/* Dumps all domains on the specified cpu */
-static void sedf_dump_cpu_state(const struct scheduler *ops, int i)
+/* Dumps all vcpus on the specified cpu */
+static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu)
{
struct list_head *list, *queue, *tmp;
- struct sedf_vcpu_info *d_inf;
+ struct cbs_vcpu_info *v_inf;
struct domain *d;
- struct vcpu *ed;
+ struct vcpu *v;
int loop = 0;
- printk("now=%"PRIu64"\n",NOW());
- queue = RUNQ(i);
+ printk("now=%"PRIu64"\n", NOW());
+ queue = RUNQ(cpu);
printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
(unsigned long) queue->next, (unsigned long) queue->prev);
list_for_each_safe ( list, tmp, queue )
{
- printk("%3d: ",loop++);
- d_inf = list_entry(list, struct sedf_vcpu_info, list);
- sedf_dump_domain(d_inf->vcpu);
+ printk("%3d: ", loop++);
+ v_inf = list_entry(list, struct cbs_vcpu_info, list);
+ cbs_dump_vcpu(v_inf->vcpu);
}
- queue = WAITQ(i); loop = 0;
+ queue = WAITQ(cpu);
+ loop = 0;
printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
(unsigned long) queue->next, (unsigned long) queue->prev);
list_for_each_safe ( list, tmp, queue )
{
- printk("%3d: ",loop++);
- d_inf = list_entry(list, struct sedf_vcpu_info, list);
- sedf_dump_domain(d_inf->vcpu);
+ printk("%3d: ", loop++);
+ v_inf = list_entry(list, struct cbs_vcpu_info, list);
+ cbs_dump_vcpu(v_inf->vcpu);
}
loop = 0;
@@ -798,14 +775,14 @@ static void sedf_dump_cpu_state(const struct scheduler *ops, int i)
rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
{
- if ( (d->cpupool ? d->cpupool->sched : &sched_sedf_def) != ops )
+ if ( (d->cpupool ? d->cpupool->sched : &sched_cbs_def) != ops )
continue;
- for_each_vcpu(d, ed)
+ for_each_vcpu(d, v)
{
- if ( !__task_on_queue(ed) && (ed->processor == i) )
+ if ( !__task_on_queue(v) && (v->processor == cpu) )
{
- printk("%3d: ",loop++);
- sedf_dump_domain(ed);
+ printk("%3d: ", loop++);
+ cbs_dump_vcpu(v);
}
}
}
@@ -814,9 +791,9 @@ static void sedf_dump_cpu_state(const struct scheduler *ops, int i)
/* Set or fetch domain scheduling parameters */
-static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
+static int cbs_adjust(const struct scheduler *ops, struct domain *d, struct xen_domctl_scheduler_op *op)
{
- struct sedf_priv_info *prv = SEDF_PRIV(ops);
+ struct cbs_priv_info *prv = CBS_PRIV(ops);
unsigned long flags;
s_time_t now = NOW();
struct vcpu *v;
@@ -825,7 +802,7 @@ static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen
/*
* Serialize against the pluggable scheduler lock to protect from
* concurrent updates. We need to take the runq lock for the VCPUs
- * as well, since we are touching slice and period.
+ * as well, since we are touching budget and period.
*
* As in sched_credit2.c, runq locks nest inside the pluggable scheduler
* lock.
@@ -835,7 +812,7 @@ static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
/* Check for sane parameters */
- if ( !op->u.sedf.period )
+ if ( !op->u.cbs.period )
{
printk("Period Not set");
rc = -EINVAL;
@@ -845,51 +822,51 @@ static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen
/*
* Sanity checking
*/
- if ( (op->u.sedf.period > PERIOD_MAX) ||
- (op->u.sedf.period < PERIOD_MIN) ||
- (op->u.sedf.slice > op->u.sedf.period) ||
- (op->u.sedf.slice < SLICE_MIN) )
+ if ( (op->u.cbs.period > PERIOD_MAX) ||
+ (op->u.cbs.period < PERIOD_MIN) ||
+ (op->u.cbs.budget > op->u.cbs.period) ||
+ (op->u.cbs.budget < BUDGET_MIN) )
{
- printk("Insane Parameters: period: %lu\tbudget: %lu\n", op->u.sedf.period, op->u.sedf.slice);
+ printk("Insane Parameters: period: %lu\tbudget: %lu\n", op->u.cbs.period, op->u.cbs.budget);
rc = -EINVAL;
goto out;
}
/* Time-driven domains */
- for_each_vcpu ( p, v )
+ for_each_vcpu ( d, v )
{
spinlock_t *lock = vcpu_schedule_lock(v);
- EDOM_INFO(v)->period = op->u.sedf.period;
- EDOM_INFO(v)->slice = op->u.sedf.slice;
- if(op->u.sedf.soft)
+ CBS_VCPU(v)->period = op->u.cbs.period;
+ CBS_VCPU(v)->budget = op->u.cbs.budget;
+ if(op->u.cbs.soft)
{
- EDOM_INFO(v)->status |= SEDF_SOFT_TASK;
+ CBS_VCPU(v)->status |= CBS_SOFT_TASK;
}
else
{
/* Correct deadline when switching from a soft to hard vcpu */
- if( unlikely((EDOM_INFO(v)->deadl_abs - now) >= (EDOM_INFO(v)->period * 3)) )
+ if( unlikely((CBS_VCPU(v)->deadl_abs - now) >= (CBS_VCPU(v)->period * 3)) )
{
- EDOM_INFO(v)->deadl_abs = (now - EDOM_INFO(v)->cputime) + (2 * EDOM_INFO(v)->period);
+ CBS_VCPU(v)->deadl_abs = (now - CBS_VCPU(v)->cputime) + (2 * CBS_VCPU(v)->period);
}
- EDOM_INFO(v)->status &= (~SEDF_SOFT_TASK);
+ CBS_VCPU(v)->status &= (~CBS_SOFT_TASK);
}
vcpu_schedule_unlock(lock, v);
}
}
else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
- if ( p->vcpu[0] == NULL )
+ if ( d->vcpu[0] == NULL )
{
rc = -EINVAL;
goto out;
}
- op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
- op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
- op->u.sedf.soft = sedf_soft(p->vcpu[0]);
+ op->u.cbs.period = CBS_VCPU(d->vcpu[0])->period;
+ op->u.cbs.budget = CBS_VCPU(d->vcpu[0])->budget;
+ op->u.cbs.soft = cbs_soft(d->vcpu[0]);
}
out:
@@ -898,35 +875,35 @@ out:
return rc;
}
-static struct sedf_priv_info _sedf_priv;
+static struct cbs_priv_info _cbs_priv;
-const struct scheduler sched_sedf_def = {
- .name = "Simple EDF Scheduler",
- .opt_name = "sedf",
- .sched_id = XEN_SCHEDULER_SEDF,
- .sched_data = &_sedf_priv,
+const struct scheduler sched_cbs_def = {
+ .name = "Constant Bandwidth Server Scheduler",
+ .opt_name = "cbs",
+ .sched_id = XEN_SCHEDULER_CBS,
+ .sched_data = &_cbs_priv,
- .init_domain = sedf_init_domain,
- .destroy_domain = sedf_destroy_domain,
-
- .insert_vcpu = sedf_insert_vcpu,
-
- .alloc_vdata = sedf_alloc_vdata,
- .free_vdata = sedf_free_vdata,
- .alloc_pdata = sedf_alloc_pdata,
- .free_pdata = sedf_free_pdata,
- .alloc_domdata = sedf_alloc_domdata,
- .free_domdata = sedf_free_domdata,
-
- .init = sedf_init,
- .deinit = sedf_deinit,
-
- .do_schedule = sedf_do_schedule,
- .pick_cpu = sedf_pick_cpu,
- .dump_cpu_state = sedf_dump_cpu_state,
- .sleep = sedf_sleep,
- .wake = sedf_wake,
- .adjust = sedf_adjust,
+ .init_domain = cbs_init_domain,
+ .destroy_domain = cbs_destroy_domain,
+
+ .insert_vcpu = cbs_insert_vcpu,
+
+ .alloc_vdata = cbs_alloc_vdata,
+ .free_vdata = cbs_free_vdata,
+ .alloc_pdata = cbs_alloc_pdata,
+ .free_pdata = cbs_free_pdata,
+ .alloc_domdata = cbs_alloc_domdata,
+ .free_domdata = cbs_free_domdata,
+
+ .init = cbs_init,
+ .deinit = cbs_deinit,
+
+ .do_schedule = cbs_do_schedule,
+ .pick_cpu = cbs_pick_cpu,
+ .dump_cpu_state = cbs_dump_cpu_state,
+ .sleep = cbs_sleep,
+ .wake = cbs_wake,
+ .adjust = cbs_adjust,
};
/*
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index c174c41..bcb430d 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -65,7 +65,7 @@ DEFINE_PER_CPU(struct schedule_data, schedule_data);
DEFINE_PER_CPU(struct scheduler *, scheduler);
static const struct scheduler *schedulers[] = {
- &sched_sedf_def,
+ &sched_cbs_def,
&sched_credit_def,
&sched_credit2_def,
&sched_arinc653_def,
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 6e143d3..adf9e83 100755
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -317,7 +317,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);
/* XEN_DOMCTL_scheduler_op */
/* Scheduler types. */
-#define XEN_SCHEDULER_SEDF 4
+#define XEN_SCHEDULER_CBS 4
#define XEN_SCHEDULER_CREDIT 5
#define XEN_SCHEDULER_CREDIT2 6
#define XEN_SCHEDULER_ARINC653 7
@@ -328,11 +328,11 @@ struct xen_domctl_scheduler_op {
uint32_t sched_id; /* XEN_SCHEDULER_* */
uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */
union {
- struct xen_domctl_sched_sedf {
+ struct xen_domctl_sched_cbs {
uint64_aligned_t period;
- uint64_aligned_t slice;
+ uint64_aligned_t budget;
uint32_t soft;
- } sedf;
+ } cbs;
struct xen_domctl_sched_credit {
uint16_t weight;
uint16_t cap;
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index cfcf4aa..bd8d00b 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -75,7 +75,7 @@
/* Per-scheduler IDs, to identify scheduler specific events */
#define TRC_SCHED_CSCHED 0
#define TRC_SCHED_CSCHED2 1
-#define TRC_SCHED_SEDF 2
+#define TRC_SCHED_CBS 2
#define TRC_SCHED_ARINC653 3
/* Per-scheduler tracing */
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index d95e254..6bdbf47 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -167,7 +167,7 @@ struct scheduler {
void (*tick_resume) (const struct scheduler *, unsigned int);
};
-extern const struct scheduler sched_sedf_def;
+extern const struct scheduler sched_cbs_def;
extern const struct scheduler sched_credit_def;
extern const struct scheduler sched_credit2_def;
extern const struct scheduler sched_arinc653_def;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-13 19:58 ` [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables Josh Whitehead
@ 2014-06-16 9:33 ` Jan Beulich
2014-06-16 15:29 ` George Dunlap
0 siblings, 1 reply; 27+ messages in thread
From: Jan Beulich @ 2014-06-16 9:33 UTC (permalink / raw)
To: Josh Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Dario Faggioli,
Ian Jackson, Robert VanVossen, Xen-devel, Nate Studer
>>> On 13.06.14 at 21:58, <josh.whitehead@dornerworks.com> wrote:
> --- a/xen/include/public/trace.h
> +++ b/xen/include/public/trace.h
> @@ -75,7 +75,7 @@
> /* Per-scheduler IDs, to identify scheduler specific events */
> #define TRC_SCHED_CSCHED 0
> #define TRC_SCHED_CSCHED2 1
> -#define TRC_SCHED_SEDF 2
> +#define TRC_SCHED_CBS 2
While the change to domctl.h is fine, I'm not sure we can allow simple
renaming elsewhere in the public headers (i.e. the old name may need
to remain there, guarded with a __XEN_INTERFACE_VERSION__
conditional).
Jan
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-16 9:33 ` Jan Beulich
@ 2014-06-16 15:29 ` George Dunlap
2014-06-17 16:11 ` Dario Faggioli
0 siblings, 1 reply; 27+ messages in thread
From: George Dunlap @ 2014-06-16 15:29 UTC (permalink / raw)
To: Jan Beulich, Josh Whitehead
Cc: Ian Campbell, Stefano Stabellini, Dario Faggioli, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
On 06/16/2014 10:33 AM, Jan Beulich wrote:
>>>> On 13.06.14 at 21:58, <josh.whitehead@dornerworks.com> wrote:
>> --- a/xen/include/public/trace.h
>> +++ b/xen/include/public/trace.h
>> @@ -75,7 +75,7 @@
>> /* Per-scheduler IDs, to identify scheduler specific events */
>> #define TRC_SCHED_CSCHED 0
>> #define TRC_SCHED_CSCHED2 1
>> -#define TRC_SCHED_SEDF 2
>> +#define TRC_SCHED_CBS 2
> While the change to domctl.h is fine, I'm not sure we can allow simple
> renaming elsewhere in the public headers (i.e. the old name may need
> to remain there, guarded with a __XEN_INTERFACE_VERSION__
> conditional).
I think the tracing stuff is fine too -- we've always considered that
non-stable (and have made incompatible changes across versions).
But the libxl interfaces *do* need to have something sensible done with
them.
Given that, I think it would probably be better to make this patch series:
1/N: Add sched_cbs.c to Xen
2/N: Add cbs to toolstack
3/N: Remove sedf scheduler (with appropriate backwards-compatibility bits)
I think that would make it a bit easier to review as well.
-George
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-16 15:29 ` George Dunlap
@ 2014-06-17 16:11 ` Dario Faggioli
2014-06-17 17:28 ` Dario Faggioli
` (2 more replies)
0 siblings, 3 replies; 27+ messages in thread
From: Dario Faggioli @ 2014-06-17 16:11 UTC (permalink / raw)
To: George Dunlap
Cc: Ian Campbell, Stefano Stabellini, Ian Jackson, Robert VanVossen,
Xen-devel, Nate Studer, Josh Whitehead, Jan Beulich
[-- Attachment #1.1: Type: text/plain, Size: 3119 bytes --]
On lun, 2014-06-16 at 16:29 +0100, George Dunlap wrote:
> On 06/16/2014 10:33 AM, Jan Beulich wrote:
> >>>> On 13.06.14 at 21:58, <josh.whitehead@dornerworks.com> wrote:
> >> --- a/xen/include/public/trace.h
> >> +++ b/xen/include/public/trace.h
> >> @@ -75,7 +75,7 @@
> >> /* Per-scheduler IDs, to identify scheduler specific events */
> >> #define TRC_SCHED_CSCHED 0
> >> #define TRC_SCHED_CSCHED2 1
> >> -#define TRC_SCHED_SEDF 2
> >> +#define TRC_SCHED_CBS 2
> > While the change to domctl.h is fine, I'm not sure we can allow simple
> > renaming elsewhere in the public headers (i.e. the old name may need
> > to remain there, guarded with a __XEN_INTERFACE_VERSION__
> > conditional).
>
> I think the tracing stuff is fine too -- we've always considered that
> non-stable (and have made incompatible changes across versions).
>
> But the libxl interfaces *do* need to have something sensible done with
> them.
>
> Given that, I think it would probably be better to make this patch series:
>
> 1/N: Add sched_cbs.c to Xen
> 2/N: Add cbs to toolstack
> 3/N: Remove sedf scheduler (with appropriate backwards-compatibility bits)
>
> I think that would make it a bit easier to review as well.
>
As far as this patch is concerned, I agree with George.
However... Is removing SEDF an option? Is radically changing, if not
it's behavior (as it's known to be pretty broken), the expectations of
an user, e.g., of an old application being compiled with a new version
of xen+libxl an option?
If yes, what's the process to do that?
Personally, I'm all for having a really working real-time scheduling
solution, and you all know that. :-) However, especially considering
Josh's and Robbie's series, I think I would not remove or rename SEDF, I
rather "just" amend the implementation.
In future, it would be interesting to introduce more advanced real-time
scheduling features an capabilities, like the ones coming from RT-Xen
(and the RT-Xen guys are working on doing that), but I think that can be
done step-by-step, and without any massive renaming or removal.
So, I'm asking, mostly to George, about the overall scheduling aspects
and implications, and to the tools maintainer, as that's where API
stability is to be enforced: should this be a concern? In what sense API
stability applies here? Can we, for example, start to ignore one or more
SEDF scheduling parameters?
I'm asking explicitly about the parameters because, although I think
that most of the changes in this series does not actually call for much
renaming, at least the 'weight' and, to certain extent the 'extra',
parameters are a bit difficult to deal with (mostly because they're a
remnant from when SEDF was meant as a general purpose scheduler too!).
Thoughts?
Thanks and Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-17 16:11 ` Dario Faggioli
@ 2014-06-17 17:28 ` Dario Faggioli
2014-06-25 20:13 ` Meng Xu
2014-06-26 21:24 ` Joshua Whitehead
2014-06-18 11:18 ` George Dunlap
2014-06-26 21:23 ` Joshua Whitehead
2 siblings, 2 replies; 27+ messages in thread
From: Dario Faggioli @ 2014-06-17 17:28 UTC (permalink / raw)
To: George Dunlap
Cc: Lars Kurth, xumengpanda, Ian Campbell, xisisu, Stefano Stabellini,
Ian Jackson, Robert VanVossen, Xen-devel, Nate Studer,
Josh Whitehead, andrii.anisov, Jan Beulich, lichong659, Meng Xu
[-- Attachment #1.1: Type: text/plain, Size: 2996 bytes --]
[Adding RT-Xen people to the discussion, as well as Lars and Andrii from
GlobalLogic]
On mar, 2014-06-17 at 18:11 +0200, Dario Faggioli wrote:
> On lun, 2014-06-16 at 16:29 +0100, George Dunlap wrote:
> So, I'm asking, mostly to George, about the overall scheduling aspects
> and implications, and to the tools maintainer, as that's where API
> stability is to be enforced: should this be a concern? In what sense API
> stability applies here? Can we, for example, start to ignore one or more
> SEDF scheduling parameters?
>
> I'm asking explicitly about the parameters because, although I think
> that most of the changes in this series does not actually call for much
> renaming, at least the 'weight' and, to certain extent the 'extra',
> parameters are a bit difficult to deal with (mostly because they're a
> remnant from when SEDF was meant as a general purpose scheduler too!).
>
> Thoughts?
>
Related to this, there are basically two groups of people working on
real-time scheduling:
1) Josh and Robbie @ Dornerworks (+ Nate), working on fixing SEDF;
2) RT-Xen developers, working at isolating the upstreamable bits of
RT-Xen itself (although they've not sent patches here in public
yet).
The very nice part of 1) is that it (in the long run) fixes SEDF, and if
we are to keep it in the tree, I really think it should be fixed!
The very nice part of 2) is that it is already a more advanced and
mature real-time scheduling solution (for example, it already deals with
SMPs correctly, unlike current SEDF and Josh's RFC), but it really does
not have much to do with SEDF (either broken or fixed), both at an
interface and implementation (i.e., code sharing) points of view either.
So, again, depending on whether or not we want to keep SEDF, and how
hard we want for it's interface not only to compile, but to remain
meaningful, the way forward changes. That's why I'm asking what we want
to do with SEDF. :-/
Assuming that we *do* want to keep it, my personally preferred way to
proceed would be:
- we take the implementation changes, but not the renaming, from Josh's
effort
- we merge the sched_sedf.c resulting from above with sched_rtglobal.c
from RT-Xen, so to have a solution that works well on SMP systems
and also implements the existing SEDF interface
The merge can happen 'either way', i.e., we can try to borrow the global
scheduling bits (i.e., the SMP support) from RT-Xen's sched_rtglobal.c
into sched_sedf.c or, vice-versa, import Josh's SEDF/CBS code inside
sched_rtglobal.c... In either case, I'd need Josh and Meng, and their
respective fellows, to collaborate... Are you guys up for that? :-)
Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-17 17:28 ` Dario Faggioli
@ 2014-06-25 20:13 ` Meng Xu
2014-06-26 21:24 ` Joshua Whitehead
1 sibling, 0 replies; 27+ messages in thread
From: Meng Xu @ 2014-06-25 20:13 UTC (permalink / raw)
To: Dario Faggioli
Cc: Lars Kurth, Ian Campbell, Sisu Xi, Stefano Stabellini,
George Dunlap, Ian Jackson, Robert VanVossen, Xen-devel,
Nate Studer, Josh Whitehead, Andrii Anisov, Jan Beulich, Chong Li,
Meng Xu
[-- Attachment #1.1: Type: text/plain, Size: 4929 bytes --]
Hi Dario,
[Adding RT-Xen people to the discussion, as well as Lars and Andrii from
> GlobalLogic]
>
Thank you very much for adding us to the discussion! :-)
Related to this, there are basically two groups of people working on
> real-time scheduling:
>
> 1) Josh and Robbie @ Dornerworks (+ Nate), working on fixing SEDF;
>
> 2) RT-Xen developers, working at isolating the upstreamable bits of
> RT-Xen itself (although they've not sent patches here in public
> yet).
>
> The very nice part of 1) is that it (in the long run) fixes SEDF, and if
> we are to keep it in the tree, I really think it should be fixed!
>
> The very nice part of 2) is that it is already a more advanced and
> mature real-time scheduling solution (for example, it already deals with
> SMPs correctly, unlike current SEDF and Josh's RFC), but it really does
> not have much to do with SEDF (either broken or fixed), both at an
> interface and implementation (i.e., code sharing) points of view either.
>
Right now, our scheduler (we call it rtglobal, but the name can be changed
to SEDF easily) supports the following functions:
1) global EDF scheduling based on each VCPU's deadline;
2) set/get *each* VCPU's parameter of each domain scheduled by the real
time scheduler;
3) supports cpupool.
Here is a simple scenario to show the above functions of our scheduler:
//list each vcpu's parameters of each domain in cpu pools using rtglobal
scheduler
#xl sched-rtglobal
Cpupool Pool-0: sched=EDF
Name ID Period Budget Vcpu
Domain-0 0 10 10 0
Domain-0 0 20 20 1
Domain-0 0 30 15 2
Domain-0 0 10 10 3
litmus1 1 10 4 0
litmus1 1 10 7 1
//set domain litmus1's vcpu 1's parameters:
# xl sched-rtglobal -d litmus1 -v 1 -p 20 -b 10
//domain litmus1's vcpu 1's parameters are changed, display each VCPU's
parameters separately:
#xl sched-rtglobal -d litmus1
Name ID Period Budget Vcpu
litmus1 1 10 4 0
litmus1 1 20 10 1
//list cpupool's information (cpupool test has credit scheduler and has a
domain litmus2 already)
#xl cpupool-list -c
Name CPU list
Pool-0 0,1,2,3,4,5,6,7
test 10,11
//migrate litmus1 from cpupool Pool-0 to cpupool test.
#xl cpupool-migrate litmus1 test
//now litmus1 is in cpupool test
# xl sched-credit
Cpupool test: tslice=30ms ratelimit=1000us
Name ID Weight Cap
litmus1 1 256 0
litmus2 2 256 0
By reading the mailing list, I think Josh is working on modifying SEDF to
achieve 1) and 2). (Correct me if I'm wrong. :-) ) So I'm thinking if we
can just merge code from both sides to simply get a better SEDF instead of
doing duplicate work in both sides?
I compared our rtglobal scheduler with the simplified SEDF Josh and Nate
are proposing, the only difference with these two algorithms is the budget
replenish mechanism we are using: Josh and Nate will use the Constrant
Bandwidth Server mechanism but we used the Deferrable Server mechanism.
Both of these two server mechanisms are proved to be workable for real-time
applications and we actually can supports both later without affecting user
interface.
> So, again, depending on whether or not we want to keep SEDF, and how
> hard we want for it's interface not only to compile, but to remain
> meaningful, the way forward changes. That's why I'm asking what we want
> to do with SEDF. :-/
>
>
> Assuming that we *do* want to keep it, my personally preferred way to
> proceed would be:
> - we take the implementation changes, but not the renaming, from Josh's
> effort
> - we merge the sched_sedf.c resulting from above with sched_rtglobal.c
> from RT-Xen, so to have a solution that works well on SMP systems
> and also implements the existing SEDF interface
>
> The merge can happen 'either way', i.e., we can try to borrow the global
> scheduling bits (i.e., the SMP support) from RT-Xen's sched_rtglobal.c
> into sched_sedf.c or, vice-versa, import Josh's SEDF/CBS code inside
> sched_rtglobal.c... In either case, I'd need Josh and Meng, and their
> respective fellows, to collaborate... Are you guys up for that? :-)
>
I totally agree with you, Dario! I'd like to collaborate with Josh to get
a better real-time scheduler in Xen. :-)
Thanks,
Meng
-----------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
[-- Attachment #1.2: Type: text/html, Size: 10722 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-17 17:28 ` Dario Faggioli
2014-06-25 20:13 ` Meng Xu
@ 2014-06-26 21:24 ` Joshua Whitehead
2014-06-28 2:13 ` Dario Faggioli
1 sibling, 1 reply; 27+ messages in thread
From: Joshua Whitehead @ 2014-06-26 21:24 UTC (permalink / raw)
To: Dario Faggioli, George Dunlap
Cc: Lars Kurth, Ian Campbell, xisisu, Stefano Stabellini, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer, xumengpanda, Meng Xu,
Jan Beulich, lichong659
On 6/17/2014 1:28 PM, Dario Faggioli wrote:
> [Adding RT-Xen people to the discussion, as well as Lars and Andrii from
> GlobalLogic]
>
> On mar, 2014-06-17 at 18:11 +0200, Dario Faggioli wrote:
>> On lun, 2014-06-16 at 16:29 +0100, George Dunlap wrote:
>
>> So, I'm asking, mostly to George, about the overall scheduling aspects
>> and implications, and to the tools maintainer, as that's where API
>> stability is to be enforced: should this be a concern? In what sense API
>> stability applies here? Can we, for example, start to ignore one or more
>> SEDF scheduling parameters?
>>
>> I'm asking explicitly about the parameters because, although I think
>> that most of the changes in this series does not actually call for much
>> renaming, at least the 'weight' and, to certain extent the 'extra',
>> parameters are a bit difficult to deal with (mostly because they're a
>> remnant from when SEDF was meant as a general purpose scheduler too!).
>>
>> Thoughts?
>>
> Related to this, there are basically two groups of people working on
> real-time scheduling:
>
> 1) Josh and Robbie @ Dornerworks (+ Nate), working on fixing SEDF;
>
> 2) RT-Xen developers, working at isolating the upstreamable bits of
> RT-Xen itself (although they've not sent patches here in public
> yet).
>
> The very nice part of 1) is that it (in the long run) fixes SEDF, and if
> we are to keep it in the tree, I really think it should be fixed!
>
> The very nice part of 2) is that it is already a more advanced and
> mature real-time scheduling solution (for example, it already deals with
> SMPs correctly, unlike current SEDF and Josh's RFC), but it really does
> not have much to do with SEDF (either broken or fixed), both at an
> interface and implementation (i.e., code sharing) points of view either.
>
> So, again, depending on whether or not we want to keep SEDF, and how
> hard we want for it's interface not only to compile, but to remain
> meaningful, the way forward changes. That's why I'm asking what we want
> to do with SEDF. :-/
>
>
> Assuming that we *do* want to keep it, my personally preferred way to
> proceed would be:
> - we take the implementation changes, but not the renaming, from Josh's
> effort
> - we merge the sched_sedf.c resulting from above with sched_rtglobal.c
> from RT-Xen, so to have a solution that works well on SMP systems
> and also implements the existing SEDF interface
>
> The merge can happen 'either way', i.e., we can try to borrow the global
> scheduling bits (i.e., the SMP support) from RT-Xen's sched_rtglobal.c
> into sched_sedf.c or, vice-versa, import Josh's SEDF/CBS code inside
> sched_rtglobal.c... In either case, I'd need Josh and Meng, and their
> respective fellows, to collaborate... Are you guys up for that? :-)
>
Reading over this again in combination with some of the other discussion that
has gone on (and having seen the e-mails from Meng/RT-Xen, thanks for that!) we
think this is probably a good solution.
To this end I think our next version of the patch series will focus on the
reorganization of the patch order as discussed with the separation between the
various areas. We will keep our cuts to SEDF and our implementation details on
CBS, but we'll keep the the renaming/parameter changes very limited to only
what's necessary to properly reflect the state of the scheduler. (Similar to
what George proposed in his last e-mail, i.e. changing the scheduler and
interface in place rather than introducing a new scheduler entirely)
At a minimum this would put the series in a good state so that, if it were
upstreamed, RT-Xen and/or DornerWorks could easily patch against the scheduler
to add the additional features discussed. These patches could then come
directly from RT-Xen, DornerWorks, or some other hybrid of RT-Xen and
DornerWorks code, all while occurring incrementally and leaving a working
version of the scheduler in the tree at all times.
Everyone let us know what you think, we'll get to work on our V2 to get that out
soon to give everyone a better idea of where that puts us. Thanks,
- Josh Whitehead
> Regards,
> Dario
>
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-26 21:24 ` Joshua Whitehead
@ 2014-06-28 2:13 ` Dario Faggioli
0 siblings, 0 replies; 27+ messages in thread
From: Dario Faggioli @ 2014-06-28 2:13 UTC (permalink / raw)
To: Joshua Whitehead
Cc: Lars Kurth, Ian Campbell, xisisu, Stefano Stabellini,
George Dunlap, Ian Jackson, Robert VanVossen, Xen-devel,
Nate Studer, xumengpanda, Meng Xu, Jan Beulich, lichong659
[-- Attachment #1.1: Type: text/plain, Size: 2425 bytes --]
On gio, 2014-06-26 at 17:24 -0400, Joshua Whitehead wrote:
> On 6/17/2014 1:28 PM, Dario Faggioli wrote:
> > The merge can happen 'either way', i.e., we can try to borrow the global
> > scheduling bits (i.e., the SMP support) from RT-Xen's sched_rtglobal.c
> > into sched_sedf.c or, vice-versa, import Josh's SEDF/CBS code inside
> > sched_rtglobal.c... In either case, I'd need Josh and Meng, and their
> > respective fellows, to collaborate... Are you guys up for that? :-)
> >
> Reading over this again in combination with some of the other discussion that
> has gone on (and having seen the e-mails from Meng/RT-Xen, thanks for that!) we
> think this is probably a good solution.
>
> To this end I think our next version of the patch series will focus on the
> reorganization of the patch order as discussed with the separation between the
> various areas. We will keep our cuts to SEDF and our implementation details on
> CBS, but we'll keep the the renaming/parameter changes very limited to only
> what's necessary to properly reflect the state of the scheduler. (Similar to
> what George proposed in his last e-mail, i.e. changing the scheduler and
> interface in place rather than introducing a new scheduler entirely)
>
Yes, that, I think, would help. As said, the series is very hard to
review like this. Having it in a better split and more 'focused' (i.e.,
less massive killing/renaming) would help (me) a lot!
> At a minimum this would put the series in a good state so that, if it were
> upstreamed, RT-Xen and/or DornerWorks could easily patch against the scheduler
> to add the additional features discussed. These patches could then come
> directly from RT-Xen, DornerWorks, or some other hybrid of RT-Xen and
> DornerWorks code, all while occurring incrementally and leaving a working
> version of the scheduler in the tree at all times.
>
Right... That's the spirit!! :-D :-D
> Everyone let us know what you think, we'll get to work on our V2 to get that out
> soon to give everyone a better idea of where that puts us. Thanks,
>
Really, it would be super-cool. And thanks to you. :-)
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 181 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-17 16:11 ` Dario Faggioli
2014-06-17 17:28 ` Dario Faggioli
@ 2014-06-18 11:18 ` George Dunlap
2014-06-26 21:30 ` Joshua Whitehead
2014-06-26 21:23 ` Joshua Whitehead
2 siblings, 1 reply; 27+ messages in thread
From: George Dunlap @ 2014-06-18 11:18 UTC (permalink / raw)
To: Dario Faggioli
Cc: Ian Campbell, Stefano Stabellini, Ian Jackson, Robert VanVossen,
Xen-devel, Nate Studer, Josh Whitehead, Jan Beulich
On 06/17/2014 05:11 PM, Dario Faggioli wrote:
> On lun, 2014-06-16 at 16:29 +0100, George Dunlap wrote:
>> On 06/16/2014 10:33 AM, Jan Beulich wrote:
>>>>>> On 13.06.14 at 21:58, <josh.whitehead@dornerworks.com> wrote:
>>>> --- a/xen/include/public/trace.h
>>>> +++ b/xen/include/public/trace.h
>>>> @@ -75,7 +75,7 @@
>>>> /* Per-scheduler IDs, to identify scheduler specific events */
>>>> #define TRC_SCHED_CSCHED 0
>>>> #define TRC_SCHED_CSCHED2 1
>>>> -#define TRC_SCHED_SEDF 2
>>>> +#define TRC_SCHED_CBS 2
>>> While the change to domctl.h is fine, I'm not sure we can allow simple
>>> renaming elsewhere in the public headers (i.e. the old name may need
>>> to remain there, guarded with a __XEN_INTERFACE_VERSION__
>>> conditional).
>> I think the tracing stuff is fine too -- we've always considered that
>> non-stable (and have made incompatible changes across versions).
>>
>> But the libxl interfaces *do* need to have something sensible done with
>> them.
>>
>> Given that, I think it would probably be better to make this patch series:
>>
>> 1/N: Add sched_cbs.c to Xen
>> 2/N: Add cbs to toolstack
>> 3/N: Remove sedf scheduler (with appropriate backwards-compatibility bits)
>>
>> I think that would make it a bit easier to review as well.
>>
> As far as this patch is concerned, I agree with George.
>
> However... Is removing SEDF an option? Is radically changing, if not
> it's behavior (as it's known to be pretty broken), the expectations of
> an user, e.g., of an old application being compiled with a new version
> of xen+libxl an option?
>
> If yes, what's the process to do that?
> Personally, I'm all for having a really working real-time scheduling
> solution, and you all know that. :-) However, especially considering
> Josh's and Robbie's series, I think I would not remove or rename SEDF, I
> rather "just" amend the implementation.
>
> In future, it would be interesting to introduce more advanced real-time
> scheduling features an capabilities, like the ones coming from RT-Xen
> (and the RT-Xen guys are working on doing that), but I think that can be
> done step-by-step, and without any massive renaming or removal.
>
> So, I'm asking, mostly to George, about the overall scheduling aspects
> and implications, and to the tools maintainer, as that's where API
> stability is to be enforced: should this be a concern? In what sense API
> stability applies here? Can we, for example, start to ignore one or more
> SEDF scheduling parameters?
Well the primary thing is that programs using the old interface still
need to compile. This could be done with the libxl interface version
#ifdef-ery.
One of the things that makes this handy is that the only way a user can
*set* SEDF via libxl is when they create a cpupool; all the other uses
are just recognizing SEDF if it's actually the scheduler already.
Re the functionality: for one, if nobody is actually using it, we don't
need to keep it around. One advantage of adding a CBS scheduler and
just removing the SEDF scheduler is that it should make it pretty clear
who those people are, and make putting it back pretty easy.
If someone is using it -- I think asking them to switch to the
(presumably) better CBS scheduler makes sense, since it accomplishes
much the same thing.
On the whole, I'm inclined to say we should just go ahead and change the
SEDF interface in place rather than introducing a new scheduler.
The question then comes with the parameters: whether it's better to
attempt to keep working the best you can, or whether it's better to
alert the admin to the fact that the scheduler has changed significantly
and it's time to re-tweak the system.
I'm inclined to say that we should:
* Leave the name and number as sedf
* Remove parameters no longer being used, gated with the LIBXL_API_VERSION.
* For all schedulers, if parameters that are not used are set to
anything other than LIBXL_DOMAIN_SCHED_PARAM_{FOO}_DEFAULT,
libxl_domain_set_sched_params() should return an error.
Alternately, we could have libxl throw a warning that we're ignoring a
parameter in most cases; and in the case of no longer used parameters,
warn specifically that it's no longer implemented. We could maybe also
point them to some documentation, and in the documentation say to
contact the xen-devel list if you're still using it.
If people complain, then we can try to accomodate them. But at very
least we get an idea if anyone actually cares. :-)
-George
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-18 11:18 ` George Dunlap
@ 2014-06-26 21:30 ` Joshua Whitehead
0 siblings, 0 replies; 27+ messages in thread
From: Joshua Whitehead @ 2014-06-26 21:30 UTC (permalink / raw)
To: George Dunlap, Dario Faggioli
Cc: Ian Campbell, Stefano Stabellini, Ian Jackson, Robert VanVossen,
Xen-devel, Nate Studer, Jan Beulich
On 6/18/2014 7:18 AM, George Dunlap wrote:
> On 06/17/2014 05:11 PM, Dario Faggioli wrote:
>> On lun, 2014-06-16 at 16:29 +0100, George Dunlap wrote:
>>> On 06/16/2014 10:33 AM, Jan Beulich wrote:
>>>>>>> On 13.06.14 at 21:58, <josh.whitehead@dornerworks.com> wrote:
>>>>> --- a/xen/include/public/trace.h
>>>>> +++ b/xen/include/public/trace.h
>>>>> @@ -75,7 +75,7 @@
>>>>> /* Per-scheduler IDs, to identify scheduler specific events */
>>>>> #define TRC_SCHED_CSCHED 0
>>>>> #define TRC_SCHED_CSCHED2 1
>>>>> -#define TRC_SCHED_SEDF 2
>>>>> +#define TRC_SCHED_CBS 2
>>>> While the change to domctl.h is fine, I'm not sure we can allow simple
>>>> renaming elsewhere in the public headers (i.e. the old name may need
>>>> to remain there, guarded with a __XEN_INTERFACE_VERSION__
>>>> conditional).
>>> I think the tracing stuff is fine too -- we've always considered that
>>> non-stable (and have made incompatible changes across versions).
>>>
>>> But the libxl interfaces *do* need to have something sensible done with
>>> them.
>>>
>>> Given that, I think it would probably be better to make this patch series:
>>>
>>> 1/N: Add sched_cbs.c to Xen
>>> 2/N: Add cbs to toolstack
>>> 3/N: Remove sedf scheduler (with appropriate backwards-compatibility bits)
>>>
>>> I think that would make it a bit easier to review as well.
>>>
>> As far as this patch is concerned, I agree with George.
>>
>> However... Is removing SEDF an option? Is radically changing, if not
>> it's behavior (as it's known to be pretty broken), the expectations of
>> an user, e.g., of an old application being compiled with a new version
>> of xen+libxl an option?
>>
>> If yes, what's the process to do that?
>> Personally, I'm all for having a really working real-time scheduling
>> solution, and you all know that. :-) However, especially considering
>> Josh's and Robbie's series, I think I would not remove or rename SEDF, I
>> rather "just" amend the implementation.
>>
>> In future, it would be interesting to introduce more advanced real-time
>> scheduling features an capabilities, like the ones coming from RT-Xen
>> (and the RT-Xen guys are working on doing that), but I think that can be
>> done step-by-step, and without any massive renaming or removal.
>>
>> So, I'm asking, mostly to George, about the overall scheduling aspects
>> and implications, and to the tools maintainer, as that's where API
>> stability is to be enforced: should this be a concern? In what sense API
>> stability applies here? Can we, for example, start to ignore one or more
>> SEDF scheduling parameters?
Just wanted to throw in a few comments:
>
> Well the primary thing is that programs using the old interface still
> need to compile. This could be done with the libxl interface version
> #ifdef-ery.
>
This should be simple to add, we'll try to take a look at that for V2, but it
may be better to wait until V3 so we can get some more feedback on the series
before making too many of those types of changes and can see what's actually
necessary.
> One of the things that makes this handy is that the only way a user can
> *set* SEDF via libxl is when they create a cpupool; all the other uses
> are just recognizing SEDF if it's actually the scheduler already.
>
> Re the functionality: for one, if nobody is actually using it, we don't
> need to keep it around. One advantage of adding a CBS scheduler and
> just removing the SEDF scheduler is that it should make it pretty clear
> who those people are, and make putting it back pretty easy.
>
> If someone is using it -- I think asking them to switch to the
> (presumably) better CBS scheduler makes sense, since it accomplishes
> much the same thing.
>
One of our motivations behind modifying the SEDF was our general impression that
it was no longer in use in any serious way by anyone in the Xen community as
there were better general purpose schedules available (credit/credit2) and the
modifications to it made it unsuitable for real-time use.
> On the whole, I'm inclined to say we should just go ahead and change the
> SEDF interface in place rather than introducing a new scheduler.
>
> The question then comes with the parameters: whether it's better to
> attempt to keep working the best you can, or whether it's better to
> alert the admin to the fact that the scheduler has changed significantly
> and it's time to re-tweak the system.
>
> I'm inclined to say that we should:
> * Leave the name and number as sedf
> * Remove parameters no longer being used, gated with the LIBXL_API_VERSION.
> * For all schedulers, if parameters that are not used are set to
> anything other than LIBXL_DOMAIN_SCHED_PARAM_{FOO}_DEFAULT,
> libxl_domain_set_sched_params() should return an error.
>
I commented on it in an earlier e-mail as well, but I thought I'd put it here
too- I think this is the path our V2 of the patch series will focus on. Keep
the SEDF naming scheme in general, update names and parameters only on a limited
basis ("change the SEDF interface in place" as George said), and reorganize the
patch to be easier to review. This should also put it in a good position to add
functionality in the future (from RT-Xen or otherwise).
> Alternately, we could have libxl throw a warning that we're ignoring a
> parameter in most cases; and in the case of no longer used parameters,
> warn specifically that it's no longer implemented. We could maybe also
> point them to some documentation, and in the documentation say to
> contact the xen-devel list if you're still using it.
>
We would also be willing to add this as desired :-)
Thanks again George for your input as well!
- Josh Whitehead
> If people complain, then we can try to accomodate them. But at very
> least we get an idea if anyone actually cares. :-)
>
> -George
>
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-17 16:11 ` Dario Faggioli
2014-06-17 17:28 ` Dario Faggioli
2014-06-18 11:18 ` George Dunlap
@ 2014-06-26 21:23 ` Joshua Whitehead
2014-06-28 2:09 ` Dario Faggioli
2 siblings, 1 reply; 27+ messages in thread
From: Joshua Whitehead @ 2014-06-26 21:23 UTC (permalink / raw)
To: Dario Faggioli, George Dunlap
Cc: Ian Campbell, Stefano Stabellini, Ian Jackson, Robert VanVossen,
Xen-devel, Nate Studer, Jan Beulich
On 6/17/2014 12:11 PM, Dario Faggioli wrote:
> On lun, 2014-06-16 at 16:29 +0100, George Dunlap wrote:
>> On 06/16/2014 10:33 AM, Jan Beulich wrote:
>>>>>> On 13.06.14 at 21:58, <josh.whitehead@dornerworks.com> wrote:
>>>> --- a/xen/include/public/trace.h
>>>> +++ b/xen/include/public/trace.h
>>>> @@ -75,7 +75,7 @@
>>>> /* Per-scheduler IDs, to identify scheduler specific events */
>>>> #define TRC_SCHED_CSCHED 0
>>>> #define TRC_SCHED_CSCHED2 1
>>>> -#define TRC_SCHED_SEDF 2
>>>> +#define TRC_SCHED_CBS 2
>>> While the change to domctl.h is fine, I'm not sure we can allow simple
>>> renaming elsewhere in the public headers (i.e. the old name may need
>>> to remain there, guarded with a __XEN_INTERFACE_VERSION__
>>> conditional).
>>
>> I think the tracing stuff is fine too -- we've always considered that
>> non-stable (and have made incompatible changes across versions).
>>
>> But the libxl interfaces *do* need to have something sensible done with
>> them.
>>
>> Given that, I think it would probably be better to make this patch series:
>>
>> 1/N: Add sched_cbs.c to Xen
>> 2/N: Add cbs to toolstack
>> 3/N: Remove sedf scheduler (with appropriate backwards-compatibility bits)
>>
>> I think that would make it a bit easier to review as well.
>>
> As far as this patch is concerned, I agree with George.
>
> However... Is removing SEDF an option? Is radically changing, if not
> it's behavior (as it's known to be pretty broken), the expectations of
> an user, e.g., of an old application being compiled with a new version
> of xen+libxl an option?
>
> If yes, what's the process to do that?
>
> Personally, I'm all for having a really working real-time scheduling
> solution, and you all know that. :-) However, especially considering
> Josh's and Robbie's series, I think I would not remove or rename SEDF, I
> rather "just" amend the implementation.
>
I'll let George comment on this again, but it sounds like from his e-mails that
removing SEDF isn't *that* big of a problem, however as discussed elsewhere,
keeping the name and changing the "guts" of it sounds like a better option.
> In future, it would be interesting to introduce more advanced real-time
> scheduling features an capabilities, like the ones coming from RT-Xen
> (and the RT-Xen guys are working on doing that), but I think that can be
> done step-by-step, and without any massive renaming or removal.
>
This is another point for splitting the patch as we discussed in the earlier
e-mail. Having that separation would give us more flexibility in perhaps
merging and splicing in functionality from others if desired. We haven't had
the chance to fully review the stuff from Meng/RT-Xen, so we'll have to see
what's applicable, but that could certainly be an option. If we upstream this
patch series it should be relatively easy to then incrementally add features
from other sources over time and for DornerWorks to maintain the scheduler in
the future.
> So, I'm asking, mostly to George, about the overall scheduling aspects
> and implications, and to the tools maintainer, as that's where API
> stability is to be enforced: should this be a concern? In what sense API
> stability applies here? Can we, for example, start to ignore one or more
> SEDF scheduling parameters?
>
> I'm asking explicitly about the parameters because, although I think
> that most of the changes in this series does not actually call for much
> renaming, at least the 'weight' and, to certain extent the 'extra',
> parameters are a bit difficult to deal with (mostly because they're a
> remnant from when SEDF was meant as a general purpose scheduler too!).
>
Just a quick comment on this - our view of the changes to SEDF is to return it
to something that's suitable for real-time applications which almost by
definition makes it unsuitable as a general purpose scheduler, it was the
conversion to general purpose that made SEDF so ugly in the first place. So
there may be things about our changes that may cause problems for someone trying
to run a normal computer on SEDF but make perfect sense in the
embedded/real-time world. Thanks,
- Josh Whitehead
> Thoughts?
>
> Thanks and Regards,
> Dario
>
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables
2014-06-26 21:23 ` Joshua Whitehead
@ 2014-06-28 2:09 ` Dario Faggioli
0 siblings, 0 replies; 27+ messages in thread
From: Dario Faggioli @ 2014-06-28 2:09 UTC (permalink / raw)
To: Joshua Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer, Jan Beulich
[-- Attachment #1.1: Type: text/plain, Size: 4599 bytes --]
On gio, 2014-06-26 at 17:23 -0400, Joshua Whitehead wrote:
> On 6/17/2014 12:11 PM, Dario Faggioli wrote:
> > Personally, I'm all for having a really working real-time scheduling
> > solution, and you all know that. :-) However, especially considering
> > Josh's and Robbie's series, I think I would not remove or rename SEDF, I
> > rather "just" amend the implementation.
> >
> I'll let George comment on this again, but it sounds like from his e-mails that
> removing SEDF isn't *that* big of a problem, however as discussed elsewhere,
> keeping the name and changing the "guts" of it sounds like a better option.
>
Indeed.
> > In future, it would be interesting to introduce more advanced real-time
> > scheduling features an capabilities, like the ones coming from RT-Xen
> > (and the RT-Xen guys are working on doing that), but I think that can be
> > done step-by-step, and without any massive renaming or removal.
> >
> This is another point for splitting the patch as we discussed in the earlier
> e-mail. Having that separation would give us more flexibility in perhaps
> merging and splicing in functionality from others if desired. We haven't had
> the chance to fully review the stuff from Meng/RT-Xen, so we'll have to see
> what's applicable, but that could certainly be an option. If we upstream this
> patch series it should be relatively easy to then incrementally add features
> from other sources over time and for DornerWorks to maintain the scheduler in
> the future.
>
I wanted to re-review and look as close as possible to both your and
RT-Xen's guys' series, but couldn't today. I'll do this on Monday, and
let you know what my feeling is about what we should use as a basis and
what should be added on top of that incrementally.
> > I'm asking explicitly about the parameters because, although I think
> > that most of the changes in this series does not actually call for much
> > renaming, at least the 'weight' and, to certain extent the 'extra',
> > parameters are a bit difficult to deal with (mostly because they're a
> > remnant from when SEDF was meant as a general purpose scheduler too!).
> >
> Just a quick comment on this - our view of the changes to SEDF is to return it
> to something that's suitable for real-time applications which almost by
> definition makes it unsuitable as a general purpose scheduler, it was the
> conversion to general purpose that made SEDF so ugly in the first place. So
> there may be things about our changes that may cause problems for someone trying
> to run a normal computer on SEDF but make perfect sense in the
> embedded/real-time world.
>
I have no intention to keep/make SEDF suitable for GP scheduling. I'm
well aware of the different needs of the two domains (RT and non-RT)
and, in general, I'm no fan of "one scheduler to rule them all"
approach, as you may well end up in making everyone really unhappy about
the service they get.
That being said, there are two reasons for my comments above. For one,
have a look at what Linus Torvalds usually says about kernel changes
that breaks userspace. Sure, we are not Linux, sure SEDF is already
broken, etc., but still I don't think it's nice to completely subvert
some user's world (provided there are any users, which may be false, I
have to admit). As George said, not breaking the compilation at libxl
level is something we really must do. About the functionality, I was not
so sure. I'm not so sure yet, but I guess I fundamentally concur with
him.
On a completely different perspective, as I at least partially already
pointed out, 'extra' really looks very similar to your soft to me, and
for 'weight', since we're talking about CBS, ever heard of GRUB (not the
bootloader: Greedy Reclaiming of Unused Bandwidth) and its further
evolution SHRUB. they both are enhanced version of the CBS, where a
weight may come handy. Of course, I'm not suggesting rushing to
implement those right away, I was just wondering what would be best
done, from an interface point of view right now, knowing that we may get
to it (i.e., to use 'weight' back, and in a very similar way, _without_
turning back SEDF into a non-RT scheduler!).
Anyway, there are more pressing decisions to make right now, I
think. :-)
Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 181 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* [RFC PATCH 4/4] Changed filenames with sedf to cbs to reflect the actual scheduler
2014-06-13 19:58 [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use Josh Whitehead
` (2 preceding siblings ...)
2014-06-13 19:58 ` [RFC PATCH 3/4] Updated comments/variables to reflect cbs, fixed formatting and confusing comments/variables Josh Whitehead
@ 2014-06-13 19:58 ` Josh Whitehead
2014-06-16 7:25 ` [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use Dario Faggioli
2014-06-17 14:44 ` Dario Faggioli
5 siblings, 0 replies; 27+ messages in thread
From: Josh Whitehead @ 2014-06-13 19:58 UTC (permalink / raw)
To: Xen-devel
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Dario Faggioli,
Ian Jackson, Robert VanVossen, Nate Studer, Josh Whitehead
From: Robbie VanVossen <robert.vanvossen@dornerworks.com>
---
tools/libxc/Makefile | 2 +-
tools/libxc/xc_cbs.c | 70 ++++
tools/libxc/xc_sedf.c | 70 ----
xen/common/Makefile | 2 +-
xen/common/sched_cbs.c | 917 +++++++++++++++++++++++++++++++++++++++++++++++
xen/common/sched_sedf.c | 917 -----------------------------------------------
6 files changed, 989 insertions(+), 989 deletions(-)
create mode 100755 tools/libxc/xc_cbs.c
delete mode 100755 tools/libxc/xc_sedf.c
create mode 100644 xen/common/sched_cbs.c
delete mode 100644 xen/common/sched_sedf.c
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index a74b19e..dc06e59 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -16,7 +16,7 @@ CTRL_SRCS-y += xc_misc.c
CTRL_SRCS-y += xc_flask.c
CTRL_SRCS-y += xc_physdev.c
CTRL_SRCS-y += xc_private.c
-CTRL_SRCS-y += xc_sedf.c
+CTRL_SRCS-y += xc_cbs.c
CTRL_SRCS-y += xc_csched.c
CTRL_SRCS-y += xc_csched2.c
CTRL_SRCS-y += xc_arinc653.c
diff --git a/tools/libxc/xc_cbs.c b/tools/libxc/xc_cbs.c
new file mode 100755
index 0000000..3b578d1
--- /dev/null
+++ b/tools/libxc/xc_cbs.c
@@ -0,0 +1,70 @@
+/******************************************************************************
+ * xc_cbs.c
+ *
+ * API for manipulating parameters of the CBS scheduler.
+ *
+ * changes by Stephan Diestelhorst
+ * based on code
+ * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "xc_private.h"
+
+int xc_cbs_domain_set(
+ xc_interface *xch,
+ uint32_t domid,
+ uint64_t period,
+ uint64_t budget,
+ uint16_t soft)
+{
+ DECLARE_DOMCTL;
+ struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs;
+
+ domctl.cmd = XEN_DOMCTL_scheduler_op;
+ domctl.domain = (domid_t)domid;
+ domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS;
+ domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo;
+
+ p->period = period;
+ p->budget = budget;
+ p->soft = soft;
+ return do_domctl(xch, &domctl);
+}
+
+int xc_cbs_domain_get(
+ xc_interface *xch,
+ uint32_t domid,
+ uint64_t *period,
+ uint64_t *budget,
+ uint16_t *soft)
+{
+ DECLARE_DOMCTL;
+ int ret;
+ struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs;
+
+ domctl.cmd = XEN_DOMCTL_scheduler_op;
+ domctl.domain = (domid_t)domid;
+ domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS;
+ domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo;
+
+ ret = do_domctl(xch, &domctl);
+
+ *period = p->period;
+ *budget = p->budget;
+ *soft = p->soft;
+ return ret;
+}
diff --git a/tools/libxc/xc_sedf.c b/tools/libxc/xc_sedf.c
deleted file mode 100755
index 3b578d1..0000000
--- a/tools/libxc/xc_sedf.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/******************************************************************************
- * xc_cbs.c
- *
- * API for manipulating parameters of the CBS scheduler.
- *
- * changes by Stephan Diestelhorst
- * based on code
- * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "xc_private.h"
-
-int xc_cbs_domain_set(
- xc_interface *xch,
- uint32_t domid,
- uint64_t period,
- uint64_t budget,
- uint16_t soft)
-{
- DECLARE_DOMCTL;
- struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs;
-
- domctl.cmd = XEN_DOMCTL_scheduler_op;
- domctl.domain = (domid_t)domid;
- domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS;
- domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo;
-
- p->period = period;
- p->budget = budget;
- p->soft = soft;
- return do_domctl(xch, &domctl);
-}
-
-int xc_cbs_domain_get(
- xc_interface *xch,
- uint32_t domid,
- uint64_t *period,
- uint64_t *budget,
- uint16_t *soft)
-{
- DECLARE_DOMCTL;
- int ret;
- struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs;
-
- domctl.cmd = XEN_DOMCTL_scheduler_op;
- domctl.domain = (domid_t)domid;
- domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS;
- domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo;
-
- ret = do_domctl(xch, &domctl);
-
- *period = p->period;
- *budget = p->budget;
- *soft = p->soft;
- return ret;
-}
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 3683ae3..f2cb709 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -24,7 +24,7 @@ obj-y += random.o
obj-y += rangeset.o
obj-y += sched_credit.o
obj-y += sched_credit2.o
-obj-y += sched_sedf.o
+obj-y += sched_cbs.o
obj-y += sched_arinc653.o
obj-y += schedule.o
obj-y += shutdown.o
diff --git a/xen/common/sched_cbs.c b/xen/common/sched_cbs.c
new file mode 100644
index 0000000..5df4825
--- /dev/null
+++ b/xen/common/sched_cbs.c
@@ -0,0 +1,917 @@
+/******************************************************************************
+ * Constant Bandwidth Server Scheduler for Xen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * by DornerWorks Ltd. (C) 2014 Grand Rapids, MI
+ *
+ * Adapted from code by Stephan Diestelhorst (C) 2004 Cambridge University
+ * and Mark Williamson (C) 2004 Intel Research Cambridge
+ *
+ */
+
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+#include <xen/timer.h>
+#include <xen/softirq.h>
+#include <xen/time.h>
+#include <xen/errno.h>
+
+#ifndef NDEBUG
+#define CBS_STATS
+#define CHECK(_p) \
+ do { \
+ if ( !(_p) ) \
+ printk("Check '%s' failed, line %d, file %s\n", \
+ #_p , __LINE__, __FILE__); \
+ } while ( 0 )
+#else
+#define CHECK(_p) ((void)0)
+#endif
+
+#define CBS_SOFT_TASK (1)
+#define CBS_ASLEEP (16)
+
+#define DEFAULT_PERIOD (MILLISECS(20))
+#define DEFAULT_BUDGET (MILLISECS(10))
+
+#define PERIOD_MAX MILLISECS(10000) /* 10s */
+#define PERIOD_MIN (MICROSECS(10)) /* 10us */
+#define BUDGET_MIN (MICROSECS(5)) /* 5us */
+
+#define EQ(_A, _B) ((!!(_A)) == (!!(_B)))
+
+
+struct cbs_dom_info {
+ struct domain *domain;
+};
+
+struct cbs_priv_info {
+ /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
+ spinlock_t lock;
+};
+
+struct cbs_vcpu_info {
+ struct vcpu *vcpu;
+ struct list_head list;
+
+
+ /* Parameters for EDF-CBS */
+ s_time_t period; /* = Server scheduling period */
+ s_time_t budget; /* = Guarenteed minimum CPU time per period */
+ /* Note: Server bandwidth = (budget / period) */
+
+ /* Status of vcpu */
+ int status;
+ /* Bookkeeping */
+ s_time_t deadl_abs;
+ s_time_t sched_start_abs;
+ s_time_t cputime;
+ /* Times the vcpu un-/blocked */
+ s_time_t block_abs;
+ s_time_t unblock_abs;
+
+#ifdef CBS_STATS
+ s_time_t block_time_tot;
+ int block_tot;
+ int short_block_tot;
+ int long_block_tot;
+ s_time_t miss_time;
+ s_time_t over_time;
+ int miss_tot;
+ int over_tot;
+#endif
+};
+
+struct cbs_cpu_info {
+ struct list_head runnableq;
+ struct list_head waitq;
+ s_time_t current_budget_expires;
+};
+
+#define CBS_PRIV(_ops) \
+ ((struct cbs_priv_info *)((_ops)->sched_data))
+#define CBS_VCPU(_vcpu) ((struct cbs_vcpu_info *)((_vcpu)->sched_priv))
+#define CBS_PCPU(_cpu) \
+ ((struct cbs_cpu_info *)per_cpu(schedule_data, _cpu).sched_priv)
+#define LIST(_vcpu) (&CBS_VCPU(_vcpu)->list)
+#define RUNQ(_cpu) (&CBS_PCPU(_cpu)->runnableq)
+#define WAITQ(_cpu) (&CBS_PCPU(_cpu)->waitq)
+#define IDLETASK(_cpu) (idle_vcpu[_cpu])
+
+#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
+
+#define DIV_UP(_X, _Y) (((_X) + (_Y) - 1) / _Y)
+
+#define cbs_runnable(edom) (!(CBS_VCPU(edom)->status & CBS_ASLEEP))
+
+#define cbs_soft(edom) (CBS_VCPU(edom)->status & CBS_SOFT_TASK)
+
+static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu);
+
+static inline int __task_on_queue(struct vcpu *v)
+{
+ return (((LIST(v))->next != NULL) && (LIST(v)->next != LIST(v)));
+}
+
+static inline void __del_from_queue(struct vcpu *v)
+{
+ struct list_head *list = LIST(v);
+ ASSERT(__task_on_queue(v));
+ list_del(list);
+ list->next = NULL;
+ ASSERT(!__task_on_queue(v));
+}
+
+typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
+
+static inline void list_insert_sort(
+ struct list_head *list, struct list_head *element, list_comparer comp)
+{
+ struct list_head *cur;
+
+ /* Iterate through all elements to find our "hole" */
+ list_for_each( cur, list )
+ if ( comp(element, cur) < 0 )
+ break;
+
+ /* cur now contains the element, before which we'll enqueue */
+ list_add(element, cur->prev);
+}
+
+#define VCPU_COMPARER(name, field, comp1, comp2) \
+static int name##_comp(struct list_head* el1, struct list_head* el2) \
+{ \
+ struct cbs_vcpu_info *v1, *v2; \
+ v1 = list_entry(el1, struct cbs_vcpu_info, field); \
+ v2 = list_entry(el2, struct cbs_vcpu_info, field); \
+ if ( (comp1) == (comp2) ) \
+ return 0; \
+ if ( (comp1) < (comp2) ) \
+ return -1; \
+ else \
+ return 1; \
+}
+
+/*
+ * Adds a vcpu to the queue of processes which wait for the beginning of the
+ * next period; this list is therefore sortet by this time, which is simply
+ * absol. deadline - period.
+ */
+VCPU_COMPARER(waitq, list, PERIOD_BEGIN(v1), PERIOD_BEGIN(v2));
+static inline void __add_to_waitqueue_sort(struct vcpu *v)
+{
+ ASSERT(!__task_on_queue(v));
+ list_insert_sort(WAITQ(v->processor), LIST(v), waitq_comp);
+ ASSERT(__task_on_queue(v));
+}
+
+/*
+ * Adds a vcpu to the queue of processes which have started their current
+ * period and are runnable (i.e. not blocked, dieing,...). The first element
+ * on this list is running on the processor, if the list is empty the idle
+ * task will run. As we are implementing EDF, this list is sorted by deadlines.
+ */
+VCPU_COMPARER(runq, list, v1->deadl_abs, v2->deadl_abs);
+static inline void __add_to_runqueue_sort(struct vcpu *v)
+{
+ list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp);
+}
+
+
+static void cbs_insert_vcpu(const struct scheduler *ops, struct vcpu *v)
+{
+ if ( is_idle_vcpu(v) )
+ {
+ CBS_VCPU(v)->deadl_abs = 0;
+ CBS_VCPU(v)->status &= ~CBS_ASLEEP;
+ }
+}
+
+static void *cbs_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void *dd)
+{
+ struct cbs_vcpu_info *inf;
+
+ inf = xzalloc(struct cbs_vcpu_info);
+ if ( inf == NULL )
+ return NULL;
+
+ inf->vcpu = v;
+
+ inf->deadl_abs = 0;
+ inf->cputime = 0;
+ inf->status = CBS_ASLEEP;
+
+ if (v->domain->domain_id == 0)
+ {
+ /* Domain 0, needs a budget to boot the machine */
+ inf->period = DEFAULT_PERIOD;
+ inf->budget = DEFAULT_BUDGET;
+ }
+ else
+ {
+ inf->period = DEFAULT_PERIOD;
+ inf->budget = 0;
+ }
+
+ INIT_LIST_HEAD(&(inf->list));
+
+ SCHED_STAT_CRANK(vcpu_init);
+
+ return inf;
+}
+
+static void *
+cbs_alloc_pdata(const struct scheduler *ops, int cpu)
+{
+ struct cbs_cpu_info *spc;
+
+ spc = xzalloc(struct cbs_cpu_info);
+ BUG_ON(spc == NULL);
+ INIT_LIST_HEAD(&spc->waitq);
+ INIT_LIST_HEAD(&spc->runnableq);
+
+ return (void *)spc;
+}
+
+static void
+cbs_free_pdata(const struct scheduler *ops, void *spc, int cpu)
+{
+ if ( spc == NULL )
+ return;
+
+ xfree(spc);
+}
+
+static void cbs_free_vdata(const struct scheduler *ops, void *priv)
+{
+ xfree(priv);
+}
+
+static void *
+cbs_alloc_domdata(const struct scheduler *ops, struct domain *d)
+{
+ return xzalloc(struct cbs_dom_info);
+}
+
+static int cbs_init_domain(const struct scheduler *ops, struct domain *d)
+{
+ d->sched_priv = cbs_alloc_domdata(ops, d);
+ if ( d->sched_priv == NULL )
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void cbs_free_domdata(const struct scheduler *ops, void *data)
+{
+ xfree(data);
+}
+
+static void cbs_destroy_domain(const struct scheduler *ops, struct domain *d)
+{
+ cbs_free_domdata(ops, d->sched_priv);
+}
+
+static int cbs_pick_cpu(const struct scheduler *ops, struct vcpu *v)
+{
+ cpumask_t online_affinity;
+ cpumask_t *online;
+
+ online = cpupool_scheduler_cpumask(v->domain->cpupool);
+ cpumask_and(&online_affinity, v->cpu_affinity, online);
+ return cpumask_cycle(v->vcpu_id % cpumask_weight(&online_affinity) - 1,
+ &online_affinity);
+}
+
+/*
+ * Handles the rescheduling & bookkeeping of vcpus running in their
+ * guaranteed time budget.
+ */
+static void desched_edf_vcpu(s_time_t now, struct vcpu *v)
+{
+ struct cbs_vcpu_info* inf = CBS_VCPU(v);
+
+ /* Current vcpu is running in real time mode */
+ ASSERT(__task_on_queue(v));
+
+ /* Update the vcpu's cputime */
+ inf->cputime += now - inf->sched_start_abs;
+
+ /* Scheduling decisions which don't remove the running vcpu from
+ * the runq */
+ if ( (inf->cputime < inf->budget) && cbs_runnable(v) )
+ return;
+
+ __del_from_queue(v);
+
+#ifdef CBS_STATS
+ /* Manage deadline misses */
+ if ( unlikely(inf->deadl_abs < now) )
+ {
+ inf->miss_tot++;
+ inf->miss_time += inf->cputime;
+ }
+#endif
+
+ /* Manage overruns */
+ if ( inf->cputime >= inf->budget )
+ {
+ inf->cputime -= inf->budget;
+
+
+ /* Set next deadline */
+ inf->deadl_abs += inf->period;
+
+ /* Ensure that the cputime is always less than budget */
+ if ( unlikely(inf->cputime > inf->budget) )
+ {
+#ifdef CBS_STATS
+ inf->over_tot++;
+ inf->over_time += inf->cputime;
+#endif
+
+ /* Make up for the overage by pushing the deadline
+ into the future */
+ inf->deadl_abs += ((inf->cputime / inf->budget)
+ * inf->period) * 2;
+ inf->cputime -= (inf->cputime / inf->budget) * inf->budget;
+ }
+
+ /* Ensure that the start of the next period is in the future */
+ if ( unlikely(PERIOD_BEGIN(inf) < now) )
+ inf->deadl_abs +=
+ (DIV_UP(now - PERIOD_BEGIN(inf),
+ inf->period)) * inf->period;
+ }
+
+ /* Add a runnable vcpu to the appropriate queue */
+ if ( cbs_runnable(v) )
+ {
+ if( cbs_soft(v) )
+ {
+ __add_to_runqueue_sort(v);
+ }
+ else
+ {
+ __add_to_waitqueue_sort(v);
+ }
+ }
+
+ ASSERT(EQ(cbs_runnable(v), __task_on_queue(v)));
+}
+
+
+/* Update all elements on the queues */
+static void update_queues(
+ s_time_t now, struct list_head *runq, struct list_head *waitq)
+{
+ struct list_head *cur, *tmp;
+ struct cbs_vcpu_info *curinf;
+
+ /*
+ * Check for the first elements of the waitqueue, whether their
+ * next period has already started.
+ */
+ list_for_each_safe ( cur, tmp, waitq )
+ {
+ curinf = list_entry(cur, struct cbs_vcpu_info, list);
+ if ( PERIOD_BEGIN(curinf) > now )
+ break;
+ __del_from_queue(curinf->vcpu);
+ __add_to_runqueue_sort(curinf->vcpu);
+ }
+
+ /* Process the runq, find vcpus that are on the runq that shouldn't */
+ list_for_each_safe ( cur, tmp, runq )
+ {
+ curinf = list_entry(cur, struct cbs_vcpu_info, list);
+
+ if ( unlikely(curinf->budget == 0) )
+ {
+ /* Ignore vcpus with empty budget */
+ __del_from_queue(curinf->vcpu);
+
+ /* Move them to their next period */
+ curinf->deadl_abs += curinf->period;
+
+ /* Ensure that the start of the next period is in the future */
+ if ( unlikely(PERIOD_BEGIN(curinf) < now) )
+ curinf->deadl_abs +=
+ (DIV_UP(now - PERIOD_BEGIN(curinf),
+ curinf->period)) * curinf->period;
+
+ /* Put them back into the queue */
+ __add_to_waitqueue_sort(curinf->vcpu);
+ }
+
+ else
+ break;
+ }
+}
+
+
+static int cbs_init(struct scheduler *ops)
+{
+ struct cbs_priv_info *prv;
+
+ prv = xzalloc(struct cbs_priv_info);
+ if ( prv == NULL )
+ return -ENOMEM;
+
+ ops->sched_data = prv;
+ spin_lock_init(&prv->lock);
+
+ return 0;
+}
+
+
+static void cbs_deinit(const struct scheduler *ops)
+{
+ struct cbs_priv_info *prv;
+
+ prv = CBS_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
+}
+
+
+/*
+ * Main scheduling function
+ * Reasons for calling this function are:
+ * -budget for the current server is used up
+ * -vcpu on waitqueue has started it's period
+ * -and various others ;) in general: determine which vcpu to run next
+ */
+static struct task_slice cbs_do_schedule(
+ const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
+{
+ int cpu = smp_processor_id();
+ struct list_head *runq = RUNQ(cpu);
+ struct list_head *waitq = WAITQ(cpu);
+ struct cbs_vcpu_info *inf = CBS_VCPU(current);
+ struct cbs_vcpu_info *runinf, *waitinf;
+ struct task_slice ret;
+
+ SCHED_STAT_CRANK(schedule);
+
+ /* Idle tasks don't need any of the following stuff */
+ if ( is_idle_vcpu(current) )
+ goto check_waitq;
+
+ /*
+ * Create local state of the status of the vcpu, in order to avoid
+ * inconsistent state during scheduling decisions, because data for
+ * vcpu_runnable is not protected by the scheduling lock!
+ */
+ if ( !vcpu_runnable(current) )
+ inf->status |= CBS_ASLEEP;
+
+ if ( inf->status & CBS_ASLEEP )
+ inf->block_abs = now;
+
+ desched_edf_vcpu(now, current);
+ check_waitq:
+ update_queues(now, runq, waitq);
+
+ /*
+ * Now simply pick the first vcpu from the runqueue, which has the
+ * earliest deadline, because the list is sorted
+ *
+ * Tasklet work (which runs in idle VCPU context) overrides all else.
+ */
+ if ( tasklet_work_scheduled ||
+ (list_empty(runq) && list_empty(waitq)) ||
+ unlikely(!cpumask_test_cpu(cpu,
+ cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)))) )
+ {
+ ret.task = IDLETASK(cpu);
+ ret.time = SECONDS(1);
+ }
+ else if ( !list_empty(runq) )
+ {
+ runinf = list_entry(runq->next, struct cbs_vcpu_info, list);
+ ret.task = runinf->vcpu;
+ if ( !list_empty(waitq) )
+ {
+ waitinf = list_entry(waitq->next,
+ struct cbs_vcpu_info, list);
+ /*
+ * Rerun scheduler, when scheduled vcpu consumes
+ * its budget or the first vcpu from the waitqueue
+ * gets ready.
+ */
+ ret.time = MIN(now + runinf->budget - runinf->cputime,
+ PERIOD_BEGIN(waitinf)) - now;
+ }
+ else
+ {
+ ret.time = runinf->budget - runinf->cputime;
+ }
+ }
+ else
+ {
+ waitinf = list_entry(waitq->next, struct cbs_vcpu_info, list);
+
+ ret.task = IDLETASK(cpu);
+ ret.time = PERIOD_BEGIN(waitinf) - now;
+ }
+
+ /*
+ * TODO: Do something USEFUL when this happens and find out, why it
+ * still can happen!!!
+ */
+ if ( ret.time < 0)
+ printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
+ ret.time);
+
+ ret.migrated = 0;
+
+ CBS_VCPU(ret.task)->sched_start_abs = now;
+ CHECK(ret.time > 0);
+ ASSERT(cbs_runnable(ret.task));
+ CBS_PCPU(cpu)->current_budget_expires = now + ret.time;
+ return ret;
+}
+
+static void cbs_sleep(const struct scheduler *ops, struct vcpu *v)
+{
+ if ( is_idle_vcpu(v) )
+ return;
+
+ CBS_VCPU(v)->status |= CBS_ASLEEP;
+
+ if ( per_cpu(schedule_data, v->processor).curr == v )
+ {
+ cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
+ }
+ else
+ {
+ if ( __task_on_queue(v) )
+ __del_from_queue(v);
+ }
+}
+
+/*
+ * Compares two vcpus in the relation of whether the one is allowed to
+ * interrupt the others execution.
+ * It returns true (!=0) if a switch to the other vcpu is good.
+ * Priority scheme is as follows:
+ * EDF: early deadline > late deadline
+ */
+static inline int should_switch(struct vcpu *cur,
+ struct vcpu *other,
+ s_time_t now)
+{
+ struct cbs_vcpu_info *cur_inf, *other_inf;
+ cur_inf = CBS_VCPU(cur);
+ other_inf = CBS_VCPU(other);
+
+ /* Always interrupt idle vcpu. */
+ if ( is_idle_vcpu(cur) )
+ return 1;
+
+ /* Check whether we need to make an earlier scheduling decision */
+ if ( PERIOD_BEGIN(other_inf) <
+ CBS_PCPU(other->processor)->current_budget_expires )
+ return 1;
+
+ return 0;
+}
+
+/*
+ * This function wakes up a vcpu, i.e. moves them into the appropriate queue
+ *
+ * For Hard Real-Time vcpus (soft = 0):
+ * -When a blocked vcpu unblocks, it is allowed to start execution at
+ * the beginning of the next complete period
+ * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up
+ *
+ * DRRB_____D__U_____DRRRRR___D________ ...
+ *
+ * -This causes the vcpu to miss a period (and a deadlline)
+ * -Doesn't disturb the schedule at all
+ * -Deadlines keep occuring isochronous
+ *
+ * For Soft Real-Time vcpus (soft = 1):
+ * -Deadlines are set and updated according to the Constant Bandwidth Server
+ * rule and vcpus are moved immediately to the run queue.
+ *
+ */
+static void cbs_wake(const struct scheduler *ops, struct vcpu *v)
+{
+ s_time_t now = NOW();
+ struct cbs_vcpu_info* inf = CBS_VCPU(v);
+
+ if ( unlikely(is_idle_vcpu(v)) )
+ return;
+
+ if ( unlikely(__task_on_queue(v)) )
+ return;
+
+ ASSERT(!cbs_runnable(v));
+ inf->status &= ~CBS_ASLEEP;
+
+ if ( unlikely(inf->deadl_abs == 0) )
+ {
+ /* Initial setup of the deadline */
+ inf->deadl_abs = now + inf->budget;
+ }
+
+#ifdef CBS_STATS
+ inf->block_tot++;
+#endif
+
+ if ( cbs_soft(v) )
+ {
+ /* Apply CBS rule
+ * Where:
+ * c == Remaining server budget == (inf->budget - cpu_time)
+ * d == Server (vcpu) deadline == inf->deadl_abs
+ * r == Wake-up time of vcpu == now
+ * U == Server (vcpu) bandwidth == (inf->budget / inf->period)
+ *
+ * if c>=(d-r)*U --->
+ * (inf->budget - cputime) >= (inf->deadl_abs - now) * inf->period
+ *
+ * If true, push deadline back by one period and refresh budget, else
+ * use current budget and deadline.
+ */
+ if((inf->budget - inf->cputime) >=
+ ((inf->deadl_abs - now) * (inf->budget / inf->period)))
+ {
+ /* Push back deadline by one period */
+ inf->deadl_abs += inf->period;
+ inf->cputime = 0;
+ }
+
+ /* In CBS we don't care if the period has begun,
+ * the task doesn't have to wait for its period
+ * because it'll never request more than its budget
+ * for any given period.
+ */
+ __add_to_runqueue_sort(v);
+ }
+ else {
+ /* Task is a hard task, treat accordingly */
+#ifdef CBS_STATS
+ if ( now < inf->deadl_abs )
+ {
+ /* Short blocking */
+ inf->short_block_tot++;
+ }
+ else
+ {
+ /* Long unblocking, someone is going to miss their deadline. */
+ inf->long_block_tot++;
+ }
+#endif
+
+ if ( PERIOD_BEGIN(inf) > now )
+ __add_to_waitqueue_sort(v);
+ else
+ __add_to_runqueue_sort(v);
+ }
+
+#ifdef CBS_STATS
+ /* Do some statistics here... */
+ if ( inf->block_abs != 0 )
+ {
+ inf->block_time_tot += now - inf->block_abs;
+ }
+#endif
+
+ ASSERT(__task_on_queue(v));
+ /*
+ * Check whether the awakened task needs to invoke the do_schedule
+ * routine. Try to avoid unnecessary runs but:
+ * Safe approximation: Always switch to scheduler!
+ */
+ ASSERT(v->processor >= 0);
+ ASSERT(v->processor < nr_cpu_ids);
+ ASSERT(per_cpu(schedule_data, v->processor).curr);
+
+ if ( should_switch(per_cpu(schedule_data, v->processor).curr, v, now) )
+ cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
+}
+
+/* Print a lot of useful information about a vcpus in the system */
+static void cbs_dump_vcpu(struct vcpu *v)
+{
+ printk("%i.%i has=%c ", v->domain->domain_id, v->vcpu_id,
+ v->is_running ? 'T':'F');
+ printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64,
+ CBS_VCPU(v)->period, CBS_VCPU(v)->budget, CBS_VCPU(v)->deadl_abs);
+
+#ifdef CBS_STATS
+ printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64,
+ CBS_VCPU(v)->miss_tot, CBS_VCPU(v)->miss_time,
+ CBS_VCPU(v)->over_tot, CBS_VCPU(v)->over_time);
+
+ if ( CBS_VCPU(v)->block_tot != 0 )
+ printk("\n blks=%u sh=%u (%u%%) "\
+ "l=%u (%u%%) avg: b=%"PRIu64,
+ CBS_VCPU(v)->block_tot, CBS_VCPU(v)->short_block_tot,
+ (CBS_VCPU(v)->short_block_tot * 100) / CBS_VCPU(v)->block_tot,
+ CBS_VCPU(v)->long_block_tot,
+ (CBS_VCPU(v)->long_block_tot * 100) / CBS_VCPU(v)->block_tot,
+ (CBS_VCPU(v)->block_time_tot) / CBS_VCPU(v)->block_tot);
+#endif
+ printk("\n");
+}
+
+
+/* Dumps all vcpus on the specified cpu */
+static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu)
+{
+ struct list_head *list, *queue, *tmp;
+ struct cbs_vcpu_info *v_inf;
+ struct domain *d;
+ struct vcpu *v;
+ int loop = 0;
+
+ printk("now=%"PRIu64"\n", NOW());
+ queue = RUNQ(cpu);
+ printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue )
+ {
+ printk("%3d: ", loop++);
+ v_inf = list_entry(list, struct cbs_vcpu_info, list);
+ cbs_dump_vcpu(v_inf->vcpu);
+ }
+
+ queue = WAITQ(cpu);
+ loop = 0;
+ printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue )
+ {
+ printk("%3d: ", loop++);
+ v_inf = list_entry(list, struct cbs_vcpu_info, list);
+ cbs_dump_vcpu(v_inf->vcpu);
+ }
+
+ loop = 0;
+ printk("\nnot on Q\n");
+
+ rcu_read_lock(&domlist_read_lock);
+ for_each_domain ( d )
+ {
+ if ( (d->cpupool ? d->cpupool->sched : &sched_cbs_def) != ops )
+ continue;
+ for_each_vcpu(d, v)
+ {
+ if ( !__task_on_queue(v) && (v->processor == cpu) )
+ {
+ printk("%3d: ", loop++);
+ cbs_dump_vcpu(v);
+ }
+ }
+ }
+ rcu_read_unlock(&domlist_read_lock);
+}
+
+
+/* Set or fetch domain scheduling parameters */
+static int cbs_adjust(const struct scheduler *ops, struct domain *d, struct xen_domctl_scheduler_op *op)
+{
+ struct cbs_priv_info *prv = CBS_PRIV(ops);
+ unsigned long flags;
+ s_time_t now = NOW();
+ struct vcpu *v;
+ int rc = 0;
+
+ /*
+ * Serialize against the pluggable scheduler lock to protect from
+ * concurrent updates. We need to take the runq lock for the VCPUs
+ * as well, since we are touching budget and period.
+ *
+ * As in sched_credit2.c, runq locks nest inside the pluggable scheduler
+ * lock.
+ */
+ spin_lock_irqsave(&prv->lock, flags);
+
+ if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
+ {
+ /* Check for sane parameters */
+ if ( !op->u.cbs.period )
+ {
+ printk("Period Not set");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Sanity checking
+ */
+ if ( (op->u.cbs.period > PERIOD_MAX) ||
+ (op->u.cbs.period < PERIOD_MIN) ||
+ (op->u.cbs.budget > op->u.cbs.period) ||
+ (op->u.cbs.budget < BUDGET_MIN) )
+ {
+ printk("Insane Parameters: period: %lu\tbudget: %lu\n", op->u.cbs.period, op->u.cbs.budget);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* Time-driven domains */
+ for_each_vcpu ( d, v )
+ {
+ spinlock_t *lock = vcpu_schedule_lock(v);
+
+ CBS_VCPU(v)->period = op->u.cbs.period;
+ CBS_VCPU(v)->budget = op->u.cbs.budget;
+ if(op->u.cbs.soft)
+ {
+ CBS_VCPU(v)->status |= CBS_SOFT_TASK;
+ }
+ else
+ {
+ /* Correct deadline when switching from a soft to hard vcpu */
+ if( unlikely((CBS_VCPU(v)->deadl_abs - now) >= (CBS_VCPU(v)->period * 3)) )
+ {
+ CBS_VCPU(v)->deadl_abs = (now - CBS_VCPU(v)->cputime) + (2 * CBS_VCPU(v)->period);
+ }
+
+ CBS_VCPU(v)->status &= (~CBS_SOFT_TASK);
+ }
+ vcpu_schedule_unlock(lock, v);
+ }
+ }
+ else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
+ {
+ if ( d->vcpu[0] == NULL )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ op->u.cbs.period = CBS_VCPU(d->vcpu[0])->period;
+ op->u.cbs.budget = CBS_VCPU(d->vcpu[0])->budget;
+ op->u.cbs.soft = cbs_soft(d->vcpu[0]);
+ }
+
+out:
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+ return rc;
+}
+
+static struct cbs_priv_info _cbs_priv;
+
+const struct scheduler sched_cbs_def = {
+ .name = "Constant Bandwidth Server Scheduler",
+ .opt_name = "cbs",
+ .sched_id = XEN_SCHEDULER_CBS,
+ .sched_data = &_cbs_priv,
+
+ .init_domain = cbs_init_domain,
+ .destroy_domain = cbs_destroy_domain,
+
+ .insert_vcpu = cbs_insert_vcpu,
+
+ .alloc_vdata = cbs_alloc_vdata,
+ .free_vdata = cbs_free_vdata,
+ .alloc_pdata = cbs_alloc_pdata,
+ .free_pdata = cbs_free_pdata,
+ .alloc_domdata = cbs_alloc_domdata,
+ .free_domdata = cbs_free_domdata,
+
+ .init = cbs_init,
+ .deinit = cbs_deinit,
+
+ .do_schedule = cbs_do_schedule,
+ .pick_cpu = cbs_pick_cpu,
+ .dump_cpu_state = cbs_dump_cpu_state,
+ .sleep = cbs_sleep,
+ .wake = cbs_wake,
+ .adjust = cbs_adjust,
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
deleted file mode 100644
index 5df4825..0000000
--- a/xen/common/sched_sedf.c
+++ /dev/null
@@ -1,917 +0,0 @@
-/******************************************************************************
- * Constant Bandwidth Server Scheduler for Xen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * by DornerWorks Ltd. (C) 2014 Grand Rapids, MI
- *
- * Adapted from code by Stephan Diestelhorst (C) 2004 Cambridge University
- * and Mark Williamson (C) 2004 Intel Research Cambridge
- *
- */
-
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <xen/sched-if.h>
-#include <xen/timer.h>
-#include <xen/softirq.h>
-#include <xen/time.h>
-#include <xen/errno.h>
-
-#ifndef NDEBUG
-#define CBS_STATS
-#define CHECK(_p) \
- do { \
- if ( !(_p) ) \
- printk("Check '%s' failed, line %d, file %s\n", \
- #_p , __LINE__, __FILE__); \
- } while ( 0 )
-#else
-#define CHECK(_p) ((void)0)
-#endif
-
-#define CBS_SOFT_TASK (1)
-#define CBS_ASLEEP (16)
-
-#define DEFAULT_PERIOD (MILLISECS(20))
-#define DEFAULT_BUDGET (MILLISECS(10))
-
-#define PERIOD_MAX MILLISECS(10000) /* 10s */
-#define PERIOD_MIN (MICROSECS(10)) /* 10us */
-#define BUDGET_MIN (MICROSECS(5)) /* 5us */
-
-#define EQ(_A, _B) ((!!(_A)) == (!!(_B)))
-
-
-struct cbs_dom_info {
- struct domain *domain;
-};
-
-struct cbs_priv_info {
- /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
- spinlock_t lock;
-};
-
-struct cbs_vcpu_info {
- struct vcpu *vcpu;
- struct list_head list;
-
-
- /* Parameters for EDF-CBS */
- s_time_t period; /* = Server scheduling period */
- s_time_t budget; /* = Guarenteed minimum CPU time per period */
- /* Note: Server bandwidth = (budget / period) */
-
- /* Status of vcpu */
- int status;
- /* Bookkeeping */
- s_time_t deadl_abs;
- s_time_t sched_start_abs;
- s_time_t cputime;
- /* Times the vcpu un-/blocked */
- s_time_t block_abs;
- s_time_t unblock_abs;
-
-#ifdef CBS_STATS
- s_time_t block_time_tot;
- int block_tot;
- int short_block_tot;
- int long_block_tot;
- s_time_t miss_time;
- s_time_t over_time;
- int miss_tot;
- int over_tot;
-#endif
-};
-
-struct cbs_cpu_info {
- struct list_head runnableq;
- struct list_head waitq;
- s_time_t current_budget_expires;
-};
-
-#define CBS_PRIV(_ops) \
- ((struct cbs_priv_info *)((_ops)->sched_data))
-#define CBS_VCPU(_vcpu) ((struct cbs_vcpu_info *)((_vcpu)->sched_priv))
-#define CBS_PCPU(_cpu) \
- ((struct cbs_cpu_info *)per_cpu(schedule_data, _cpu).sched_priv)
-#define LIST(_vcpu) (&CBS_VCPU(_vcpu)->list)
-#define RUNQ(_cpu) (&CBS_PCPU(_cpu)->runnableq)
-#define WAITQ(_cpu) (&CBS_PCPU(_cpu)->waitq)
-#define IDLETASK(_cpu) (idle_vcpu[_cpu])
-
-#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
-
-#define DIV_UP(_X, _Y) (((_X) + (_Y) - 1) / _Y)
-
-#define cbs_runnable(edom) (!(CBS_VCPU(edom)->status & CBS_ASLEEP))
-
-#define cbs_soft(edom) (CBS_VCPU(edom)->status & CBS_SOFT_TASK)
-
-static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu);
-
-static inline int __task_on_queue(struct vcpu *v)
-{
- return (((LIST(v))->next != NULL) && (LIST(v)->next != LIST(v)));
-}
-
-static inline void __del_from_queue(struct vcpu *v)
-{
- struct list_head *list = LIST(v);
- ASSERT(__task_on_queue(v));
- list_del(list);
- list->next = NULL;
- ASSERT(!__task_on_queue(v));
-}
-
-typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
-
-static inline void list_insert_sort(
- struct list_head *list, struct list_head *element, list_comparer comp)
-{
- struct list_head *cur;
-
- /* Iterate through all elements to find our "hole" */
- list_for_each( cur, list )
- if ( comp(element, cur) < 0 )
- break;
-
- /* cur now contains the element, before which we'll enqueue */
- list_add(element, cur->prev);
-}
-
-#define VCPU_COMPARER(name, field, comp1, comp2) \
-static int name##_comp(struct list_head* el1, struct list_head* el2) \
-{ \
- struct cbs_vcpu_info *v1, *v2; \
- v1 = list_entry(el1, struct cbs_vcpu_info, field); \
- v2 = list_entry(el2, struct cbs_vcpu_info, field); \
- if ( (comp1) == (comp2) ) \
- return 0; \
- if ( (comp1) < (comp2) ) \
- return -1; \
- else \
- return 1; \
-}
-
-/*
- * Adds a vcpu to the queue of processes which wait for the beginning of the
- * next period; this list is therefore sortet by this time, which is simply
- * absol. deadline - period.
- */
-VCPU_COMPARER(waitq, list, PERIOD_BEGIN(v1), PERIOD_BEGIN(v2));
-static inline void __add_to_waitqueue_sort(struct vcpu *v)
-{
- ASSERT(!__task_on_queue(v));
- list_insert_sort(WAITQ(v->processor), LIST(v), waitq_comp);
- ASSERT(__task_on_queue(v));
-}
-
-/*
- * Adds a vcpu to the queue of processes which have started their current
- * period and are runnable (i.e. not blocked, dieing,...). The first element
- * on this list is running on the processor, if the list is empty the idle
- * task will run. As we are implementing EDF, this list is sorted by deadlines.
- */
-VCPU_COMPARER(runq, list, v1->deadl_abs, v2->deadl_abs);
-static inline void __add_to_runqueue_sort(struct vcpu *v)
-{
- list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp);
-}
-
-
-static void cbs_insert_vcpu(const struct scheduler *ops, struct vcpu *v)
-{
- if ( is_idle_vcpu(v) )
- {
- CBS_VCPU(v)->deadl_abs = 0;
- CBS_VCPU(v)->status &= ~CBS_ASLEEP;
- }
-}
-
-static void *cbs_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void *dd)
-{
- struct cbs_vcpu_info *inf;
-
- inf = xzalloc(struct cbs_vcpu_info);
- if ( inf == NULL )
- return NULL;
-
- inf->vcpu = v;
-
- inf->deadl_abs = 0;
- inf->cputime = 0;
- inf->status = CBS_ASLEEP;
-
- if (v->domain->domain_id == 0)
- {
- /* Domain 0, needs a budget to boot the machine */
- inf->period = DEFAULT_PERIOD;
- inf->budget = DEFAULT_BUDGET;
- }
- else
- {
- inf->period = DEFAULT_PERIOD;
- inf->budget = 0;
- }
-
- INIT_LIST_HEAD(&(inf->list));
-
- SCHED_STAT_CRANK(vcpu_init);
-
- return inf;
-}
-
-static void *
-cbs_alloc_pdata(const struct scheduler *ops, int cpu)
-{
- struct cbs_cpu_info *spc;
-
- spc = xzalloc(struct cbs_cpu_info);
- BUG_ON(spc == NULL);
- INIT_LIST_HEAD(&spc->waitq);
- INIT_LIST_HEAD(&spc->runnableq);
-
- return (void *)spc;
-}
-
-static void
-cbs_free_pdata(const struct scheduler *ops, void *spc, int cpu)
-{
- if ( spc == NULL )
- return;
-
- xfree(spc);
-}
-
-static void cbs_free_vdata(const struct scheduler *ops, void *priv)
-{
- xfree(priv);
-}
-
-static void *
-cbs_alloc_domdata(const struct scheduler *ops, struct domain *d)
-{
- return xzalloc(struct cbs_dom_info);
-}
-
-static int cbs_init_domain(const struct scheduler *ops, struct domain *d)
-{
- d->sched_priv = cbs_alloc_domdata(ops, d);
- if ( d->sched_priv == NULL )
- return -ENOMEM;
-
- return 0;
-}
-
-static void cbs_free_domdata(const struct scheduler *ops, void *data)
-{
- xfree(data);
-}
-
-static void cbs_destroy_domain(const struct scheduler *ops, struct domain *d)
-{
- cbs_free_domdata(ops, d->sched_priv);
-}
-
-static int cbs_pick_cpu(const struct scheduler *ops, struct vcpu *v)
-{
- cpumask_t online_affinity;
- cpumask_t *online;
-
- online = cpupool_scheduler_cpumask(v->domain->cpupool);
- cpumask_and(&online_affinity, v->cpu_affinity, online);
- return cpumask_cycle(v->vcpu_id % cpumask_weight(&online_affinity) - 1,
- &online_affinity);
-}
-
-/*
- * Handles the rescheduling & bookkeeping of vcpus running in their
- * guaranteed time budget.
- */
-static void desched_edf_vcpu(s_time_t now, struct vcpu *v)
-{
- struct cbs_vcpu_info* inf = CBS_VCPU(v);
-
- /* Current vcpu is running in real time mode */
- ASSERT(__task_on_queue(v));
-
- /* Update the vcpu's cputime */
- inf->cputime += now - inf->sched_start_abs;
-
- /* Scheduling decisions which don't remove the running vcpu from
- * the runq */
- if ( (inf->cputime < inf->budget) && cbs_runnable(v) )
- return;
-
- __del_from_queue(v);
-
-#ifdef CBS_STATS
- /* Manage deadline misses */
- if ( unlikely(inf->deadl_abs < now) )
- {
- inf->miss_tot++;
- inf->miss_time += inf->cputime;
- }
-#endif
-
- /* Manage overruns */
- if ( inf->cputime >= inf->budget )
- {
- inf->cputime -= inf->budget;
-
-
- /* Set next deadline */
- inf->deadl_abs += inf->period;
-
- /* Ensure that the cputime is always less than budget */
- if ( unlikely(inf->cputime > inf->budget) )
- {
-#ifdef CBS_STATS
- inf->over_tot++;
- inf->over_time += inf->cputime;
-#endif
-
- /* Make up for the overage by pushing the deadline
- into the future */
- inf->deadl_abs += ((inf->cputime / inf->budget)
- * inf->period) * 2;
- inf->cputime -= (inf->cputime / inf->budget) * inf->budget;
- }
-
- /* Ensure that the start of the next period is in the future */
- if ( unlikely(PERIOD_BEGIN(inf) < now) )
- inf->deadl_abs +=
- (DIV_UP(now - PERIOD_BEGIN(inf),
- inf->period)) * inf->period;
- }
-
- /* Add a runnable vcpu to the appropriate queue */
- if ( cbs_runnable(v) )
- {
- if( cbs_soft(v) )
- {
- __add_to_runqueue_sort(v);
- }
- else
- {
- __add_to_waitqueue_sort(v);
- }
- }
-
- ASSERT(EQ(cbs_runnable(v), __task_on_queue(v)));
-}
-
-
-/* Update all elements on the queues */
-static void update_queues(
- s_time_t now, struct list_head *runq, struct list_head *waitq)
-{
- struct list_head *cur, *tmp;
- struct cbs_vcpu_info *curinf;
-
- /*
- * Check for the first elements of the waitqueue, whether their
- * next period has already started.
- */
- list_for_each_safe ( cur, tmp, waitq )
- {
- curinf = list_entry(cur, struct cbs_vcpu_info, list);
- if ( PERIOD_BEGIN(curinf) > now )
- break;
- __del_from_queue(curinf->vcpu);
- __add_to_runqueue_sort(curinf->vcpu);
- }
-
- /* Process the runq, find vcpus that are on the runq that shouldn't */
- list_for_each_safe ( cur, tmp, runq )
- {
- curinf = list_entry(cur, struct cbs_vcpu_info, list);
-
- if ( unlikely(curinf->budget == 0) )
- {
- /* Ignore vcpus with empty budget */
- __del_from_queue(curinf->vcpu);
-
- /* Move them to their next period */
- curinf->deadl_abs += curinf->period;
-
- /* Ensure that the start of the next period is in the future */
- if ( unlikely(PERIOD_BEGIN(curinf) < now) )
- curinf->deadl_abs +=
- (DIV_UP(now - PERIOD_BEGIN(curinf),
- curinf->period)) * curinf->period;
-
- /* Put them back into the queue */
- __add_to_waitqueue_sort(curinf->vcpu);
- }
-
- else
- break;
- }
-}
-
-
-static int cbs_init(struct scheduler *ops)
-{
- struct cbs_priv_info *prv;
-
- prv = xzalloc(struct cbs_priv_info);
- if ( prv == NULL )
- return -ENOMEM;
-
- ops->sched_data = prv;
- spin_lock_init(&prv->lock);
-
- return 0;
-}
-
-
-static void cbs_deinit(const struct scheduler *ops)
-{
- struct cbs_priv_info *prv;
-
- prv = CBS_PRIV(ops);
- if ( prv != NULL )
- xfree(prv);
-}
-
-
-/*
- * Main scheduling function
- * Reasons for calling this function are:
- * -budget for the current server is used up
- * -vcpu on waitqueue has started it's period
- * -and various others ;) in general: determine which vcpu to run next
- */
-static struct task_slice cbs_do_schedule(
- const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
-{
- int cpu = smp_processor_id();
- struct list_head *runq = RUNQ(cpu);
- struct list_head *waitq = WAITQ(cpu);
- struct cbs_vcpu_info *inf = CBS_VCPU(current);
- struct cbs_vcpu_info *runinf, *waitinf;
- struct task_slice ret;
-
- SCHED_STAT_CRANK(schedule);
-
- /* Idle tasks don't need any of the following stuff */
- if ( is_idle_vcpu(current) )
- goto check_waitq;
-
- /*
- * Create local state of the status of the vcpu, in order to avoid
- * inconsistent state during scheduling decisions, because data for
- * vcpu_runnable is not protected by the scheduling lock!
- */
- if ( !vcpu_runnable(current) )
- inf->status |= CBS_ASLEEP;
-
- if ( inf->status & CBS_ASLEEP )
- inf->block_abs = now;
-
- desched_edf_vcpu(now, current);
- check_waitq:
- update_queues(now, runq, waitq);
-
- /*
- * Now simply pick the first vcpu from the runqueue, which has the
- * earliest deadline, because the list is sorted
- *
- * Tasklet work (which runs in idle VCPU context) overrides all else.
- */
- if ( tasklet_work_scheduled ||
- (list_empty(runq) && list_empty(waitq)) ||
- unlikely(!cpumask_test_cpu(cpu,
- cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)))) )
- {
- ret.task = IDLETASK(cpu);
- ret.time = SECONDS(1);
- }
- else if ( !list_empty(runq) )
- {
- runinf = list_entry(runq->next, struct cbs_vcpu_info, list);
- ret.task = runinf->vcpu;
- if ( !list_empty(waitq) )
- {
- waitinf = list_entry(waitq->next,
- struct cbs_vcpu_info, list);
- /*
- * Rerun scheduler, when scheduled vcpu consumes
- * its budget or the first vcpu from the waitqueue
- * gets ready.
- */
- ret.time = MIN(now + runinf->budget - runinf->cputime,
- PERIOD_BEGIN(waitinf)) - now;
- }
- else
- {
- ret.time = runinf->budget - runinf->cputime;
- }
- }
- else
- {
- waitinf = list_entry(waitq->next, struct cbs_vcpu_info, list);
-
- ret.task = IDLETASK(cpu);
- ret.time = PERIOD_BEGIN(waitinf) - now;
- }
-
- /*
- * TODO: Do something USEFUL when this happens and find out, why it
- * still can happen!!!
- */
- if ( ret.time < 0)
- printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
- ret.time);
-
- ret.migrated = 0;
-
- CBS_VCPU(ret.task)->sched_start_abs = now;
- CHECK(ret.time > 0);
- ASSERT(cbs_runnable(ret.task));
- CBS_PCPU(cpu)->current_budget_expires = now + ret.time;
- return ret;
-}
-
-static void cbs_sleep(const struct scheduler *ops, struct vcpu *v)
-{
- if ( is_idle_vcpu(v) )
- return;
-
- CBS_VCPU(v)->status |= CBS_ASLEEP;
-
- if ( per_cpu(schedule_data, v->processor).curr == v )
- {
- cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
- }
- else
- {
- if ( __task_on_queue(v) )
- __del_from_queue(v);
- }
-}
-
-/*
- * Compares two vcpus in the relation of whether the one is allowed to
- * interrupt the others execution.
- * It returns true (!=0) if a switch to the other vcpu is good.
- * Priority scheme is as follows:
- * EDF: early deadline > late deadline
- */
-static inline int should_switch(struct vcpu *cur,
- struct vcpu *other,
- s_time_t now)
-{
- struct cbs_vcpu_info *cur_inf, *other_inf;
- cur_inf = CBS_VCPU(cur);
- other_inf = CBS_VCPU(other);
-
- /* Always interrupt idle vcpu. */
- if ( is_idle_vcpu(cur) )
- return 1;
-
- /* Check whether we need to make an earlier scheduling decision */
- if ( PERIOD_BEGIN(other_inf) <
- CBS_PCPU(other->processor)->current_budget_expires )
- return 1;
-
- return 0;
-}
-
-/*
- * This function wakes up a vcpu, i.e. moves them into the appropriate queue
- *
- * For Hard Real-Time vcpus (soft = 0):
- * -When a blocked vcpu unblocks, it is allowed to start execution at
- * the beginning of the next complete period
- * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up
- *
- * DRRB_____D__U_____DRRRRR___D________ ...
- *
- * -This causes the vcpu to miss a period (and a deadlline)
- * -Doesn't disturb the schedule at all
- * -Deadlines keep occuring isochronous
- *
- * For Soft Real-Time vcpus (soft = 1):
- * -Deadlines are set and updated according to the Constant Bandwidth Server
- * rule and vcpus are moved immediately to the run queue.
- *
- */
-static void cbs_wake(const struct scheduler *ops, struct vcpu *v)
-{
- s_time_t now = NOW();
- struct cbs_vcpu_info* inf = CBS_VCPU(v);
-
- if ( unlikely(is_idle_vcpu(v)) )
- return;
-
- if ( unlikely(__task_on_queue(v)) )
- return;
-
- ASSERT(!cbs_runnable(v));
- inf->status &= ~CBS_ASLEEP;
-
- if ( unlikely(inf->deadl_abs == 0) )
- {
- /* Initial setup of the deadline */
- inf->deadl_abs = now + inf->budget;
- }
-
-#ifdef CBS_STATS
- inf->block_tot++;
-#endif
-
- if ( cbs_soft(v) )
- {
- /* Apply CBS rule
- * Where:
- * c == Remaining server budget == (inf->budget - cpu_time)
- * d == Server (vcpu) deadline == inf->deadl_abs
- * r == Wake-up time of vcpu == now
- * U == Server (vcpu) bandwidth == (inf->budget / inf->period)
- *
- * if c>=(d-r)*U --->
- * (inf->budget - cputime) >= (inf->deadl_abs - now) * inf->period
- *
- * If true, push deadline back by one period and refresh budget, else
- * use current budget and deadline.
- */
- if((inf->budget - inf->cputime) >=
- ((inf->deadl_abs - now) * (inf->budget / inf->period)))
- {
- /* Push back deadline by one period */
- inf->deadl_abs += inf->period;
- inf->cputime = 0;
- }
-
- /* In CBS we don't care if the period has begun,
- * the task doesn't have to wait for its period
- * because it'll never request more than its budget
- * for any given period.
- */
- __add_to_runqueue_sort(v);
- }
- else {
- /* Task is a hard task, treat accordingly */
-#ifdef CBS_STATS
- if ( now < inf->deadl_abs )
- {
- /* Short blocking */
- inf->short_block_tot++;
- }
- else
- {
- /* Long unblocking, someone is going to miss their deadline. */
- inf->long_block_tot++;
- }
-#endif
-
- if ( PERIOD_BEGIN(inf) > now )
- __add_to_waitqueue_sort(v);
- else
- __add_to_runqueue_sort(v);
- }
-
-#ifdef CBS_STATS
- /* Do some statistics here... */
- if ( inf->block_abs != 0 )
- {
- inf->block_time_tot += now - inf->block_abs;
- }
-#endif
-
- ASSERT(__task_on_queue(v));
- /*
- * Check whether the awakened task needs to invoke the do_schedule
- * routine. Try to avoid unnecessary runs but:
- * Safe approximation: Always switch to scheduler!
- */
- ASSERT(v->processor >= 0);
- ASSERT(v->processor < nr_cpu_ids);
- ASSERT(per_cpu(schedule_data, v->processor).curr);
-
- if ( should_switch(per_cpu(schedule_data, v->processor).curr, v, now) )
- cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
-}
-
-/* Print a lot of useful information about a vcpus in the system */
-static void cbs_dump_vcpu(struct vcpu *v)
-{
- printk("%i.%i has=%c ", v->domain->domain_id, v->vcpu_id,
- v->is_running ? 'T':'F');
- printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64,
- CBS_VCPU(v)->period, CBS_VCPU(v)->budget, CBS_VCPU(v)->deadl_abs);
-
-#ifdef CBS_STATS
- printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64,
- CBS_VCPU(v)->miss_tot, CBS_VCPU(v)->miss_time,
- CBS_VCPU(v)->over_tot, CBS_VCPU(v)->over_time);
-
- if ( CBS_VCPU(v)->block_tot != 0 )
- printk("\n blks=%u sh=%u (%u%%) "\
- "l=%u (%u%%) avg: b=%"PRIu64,
- CBS_VCPU(v)->block_tot, CBS_VCPU(v)->short_block_tot,
- (CBS_VCPU(v)->short_block_tot * 100) / CBS_VCPU(v)->block_tot,
- CBS_VCPU(v)->long_block_tot,
- (CBS_VCPU(v)->long_block_tot * 100) / CBS_VCPU(v)->block_tot,
- (CBS_VCPU(v)->block_time_tot) / CBS_VCPU(v)->block_tot);
-#endif
- printk("\n");
-}
-
-
-/* Dumps all vcpus on the specified cpu */
-static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu)
-{
- struct list_head *list, *queue, *tmp;
- struct cbs_vcpu_info *v_inf;
- struct domain *d;
- struct vcpu *v;
- int loop = 0;
-
- printk("now=%"PRIu64"\n", NOW());
- queue = RUNQ(cpu);
- printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
- (unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue )
- {
- printk("%3d: ", loop++);
- v_inf = list_entry(list, struct cbs_vcpu_info, list);
- cbs_dump_vcpu(v_inf->vcpu);
- }
-
- queue = WAITQ(cpu);
- loop = 0;
- printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
- (unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue )
- {
- printk("%3d: ", loop++);
- v_inf = list_entry(list, struct cbs_vcpu_info, list);
- cbs_dump_vcpu(v_inf->vcpu);
- }
-
- loop = 0;
- printk("\nnot on Q\n");
-
- rcu_read_lock(&domlist_read_lock);
- for_each_domain ( d )
- {
- if ( (d->cpupool ? d->cpupool->sched : &sched_cbs_def) != ops )
- continue;
- for_each_vcpu(d, v)
- {
- if ( !__task_on_queue(v) && (v->processor == cpu) )
- {
- printk("%3d: ", loop++);
- cbs_dump_vcpu(v);
- }
- }
- }
- rcu_read_unlock(&domlist_read_lock);
-}
-
-
-/* Set or fetch domain scheduling parameters */
-static int cbs_adjust(const struct scheduler *ops, struct domain *d, struct xen_domctl_scheduler_op *op)
-{
- struct cbs_priv_info *prv = CBS_PRIV(ops);
- unsigned long flags;
- s_time_t now = NOW();
- struct vcpu *v;
- int rc = 0;
-
- /*
- * Serialize against the pluggable scheduler lock to protect from
- * concurrent updates. We need to take the runq lock for the VCPUs
- * as well, since we are touching budget and period.
- *
- * As in sched_credit2.c, runq locks nest inside the pluggable scheduler
- * lock.
- */
- spin_lock_irqsave(&prv->lock, flags);
-
- if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
- {
- /* Check for sane parameters */
- if ( !op->u.cbs.period )
- {
- printk("Period Not set");
- rc = -EINVAL;
- goto out;
- }
-
- /*
- * Sanity checking
- */
- if ( (op->u.cbs.period > PERIOD_MAX) ||
- (op->u.cbs.period < PERIOD_MIN) ||
- (op->u.cbs.budget > op->u.cbs.period) ||
- (op->u.cbs.budget < BUDGET_MIN) )
- {
- printk("Insane Parameters: period: %lu\tbudget: %lu\n", op->u.cbs.period, op->u.cbs.budget);
- rc = -EINVAL;
- goto out;
- }
-
- /* Time-driven domains */
- for_each_vcpu ( d, v )
- {
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- CBS_VCPU(v)->period = op->u.cbs.period;
- CBS_VCPU(v)->budget = op->u.cbs.budget;
- if(op->u.cbs.soft)
- {
- CBS_VCPU(v)->status |= CBS_SOFT_TASK;
- }
- else
- {
- /* Correct deadline when switching from a soft to hard vcpu */
- if( unlikely((CBS_VCPU(v)->deadl_abs - now) >= (CBS_VCPU(v)->period * 3)) )
- {
- CBS_VCPU(v)->deadl_abs = (now - CBS_VCPU(v)->cputime) + (2 * CBS_VCPU(v)->period);
- }
-
- CBS_VCPU(v)->status &= (~CBS_SOFT_TASK);
- }
- vcpu_schedule_unlock(lock, v);
- }
- }
- else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
- {
- if ( d->vcpu[0] == NULL )
- {
- rc = -EINVAL;
- goto out;
- }
-
- op->u.cbs.period = CBS_VCPU(d->vcpu[0])->period;
- op->u.cbs.budget = CBS_VCPU(d->vcpu[0])->budget;
- op->u.cbs.soft = cbs_soft(d->vcpu[0]);
- }
-
-out:
- spin_unlock_irqrestore(&prv->lock, flags);
-
- return rc;
-}
-
-static struct cbs_priv_info _cbs_priv;
-
-const struct scheduler sched_cbs_def = {
- .name = "Constant Bandwidth Server Scheduler",
- .opt_name = "cbs",
- .sched_id = XEN_SCHEDULER_CBS,
- .sched_data = &_cbs_priv,
-
- .init_domain = cbs_init_domain,
- .destroy_domain = cbs_destroy_domain,
-
- .insert_vcpu = cbs_insert_vcpu,
-
- .alloc_vdata = cbs_alloc_vdata,
- .free_vdata = cbs_free_vdata,
- .alloc_pdata = cbs_alloc_pdata,
- .free_pdata = cbs_free_pdata,
- .alloc_domdata = cbs_alloc_domdata,
- .free_domdata = cbs_free_domdata,
-
- .init = cbs_init,
- .deinit = cbs_deinit,
-
- .do_schedule = cbs_do_schedule,
- .pick_cpu = cbs_pick_cpu,
- .dump_cpu_state = cbs_dump_cpu_state,
- .sleep = cbs_sleep,
- .wake = cbs_wake,
- .adjust = cbs_adjust,
-};
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
--
1.7.9.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use
2014-06-13 19:58 [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use Josh Whitehead
` (3 preceding siblings ...)
2014-06-13 19:58 ` [RFC PATCH 4/4] Changed filenames with sedf to cbs to reflect the actual scheduler Josh Whitehead
@ 2014-06-16 7:25 ` Dario Faggioli
2014-06-17 14:44 ` Dario Faggioli
5 siblings, 0 replies; 27+ messages in thread
From: Dario Faggioli @ 2014-06-16 7:25 UTC (permalink / raw)
To: Josh Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
[-- Attachment #1.1: Type: text/plain, Size: 1601 bytes --]
Hi Josh and Robbie (and everyone),
On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
> NEED:
> With the increased interest in embedded Xen, there is a need for a suitable
> real-time scheduler. The arinc653 scheduler currently only supports a
> single core and has limited niche appeal, while the sedf scheduler is
> widely consider deprecated and is currently a mess. This patchset
> repurposes the current sedf scheduler and adds a more capable and robust
> real-time scheduler suitable for embedded use to the Xen repertoire.
>
Indeed! And in fact, thanks a lot for this work. I'll have a look at the
patches ASAP, and will let you know what I think of them.
> PROPOSED SOLUTION:
> Repurposing of the sedf scheduler was accomplished by implementing the
> Constant Bandwidth Server (CBS) algorithm (originally proposed by Dario
> Faggioli) which is capable of properly handling mixed soft real-time and
> hard real-time tasks (domains/vcpus) on the same system.
>
Well, I did a lot of work _on_top_of_ CBS during Ph.D on RT systems, but
the algorithm itself is not my invention! :-P
I certainly was the one that originally proposed to use it in Xen,
inside (or in spite of, if you want) SEDF, though. :-)
Anyway, thanks again, I'll let you have my comments on the patches.
Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use
2014-06-13 19:58 [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use Josh Whitehead
` (4 preceding siblings ...)
2014-06-16 7:25 ` [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use Dario Faggioli
@ 2014-06-17 14:44 ` Dario Faggioli
2014-06-26 19:53 ` Joshua Whitehead
5 siblings, 1 reply; 27+ messages in thread
From: Dario Faggioli @ 2014-06-17 14:44 UTC (permalink / raw)
To: Josh Whitehead
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
[-- Attachment #1.1: Type: text/plain, Size: 6537 bytes --]
Hi everyone again,
I had the chance to give a close look to the patches, so here I am. :-)
First of all, thanks again Josh, Robbie, and Nate for this work!
That being said...
On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
> NEED:
> With the increased interest in embedded Xen, there is a need for a suitable
> real-time scheduler. The arinc653 scheduler currently only supports a
> single core and has limited niche appeal, while the sedf scheduler is
> widely consider deprecated and is currently a mess. This patchset
> repurposes the current sedf scheduler and adds a more capable and robust
> real-time scheduler suitable for embedded use to the Xen repertoire.
>
That describes our current situation quite well. :-)
> PROPOSED SOLUTION:
> Repurposing of the sedf scheduler was accomplished by implementing the
> Constant Bandwidth Server (CBS) algorithm (originally proposed by Dario
> Faggioli) which is capable of properly handling mixed soft real-time and
> hard real-time tasks (domains/vcpus) on the same system.
>
So, as agreed via conf-call, and as implied by what you say yourself
below (when mentioning the TBS), the solution is to have a working
global EDF framework, on top of which we can potentially put multiple
and different budgetting algorithms.
For that exact reason, I'm not sure I'd go all the way down to SCHED_CBS
and sched_cbs.c. In fact, even with the above 'big plan' in mind, I
don't think renaming/substituting SEDF is that important at this stage.
What we have now in SEDF is, basically, an outdated implementation of
EDF with an hackish budgetting scheme on top of it.
What this patchset is meant at is (although in RFC status) implementing
EDF with a well known and working budgetting scheme on top of it.
Therefore, I don't think we need to go through the renaming of the c
files, the functions, the parameters, etc... just change the
implementation!
Consider that, at least at the libxl level, we committed to maintain a
stable and compile time backward compatible API. Going through something
like this would require a lot of trickery, in order to make the above
true.
The great value I see in this series is that it is the first step in
turning SEDF into something useful, and you don't need to change the
name and kill the parameters for doing that.
In fact, about the parameters:
* you' re adding a 'soft' param, but I think the existing 'extra' can
be used to mean just that, can't it? It seems to me they've got a
pretty compatible meaning, at least up to a certain extent (i.e.,
when extra=1 is used on a domain with a !0 budget and slice).
* 'latency', certainly there is no such thing as scaling U in the
original CBS algorithm, but it won't be too difficult to do that.
Perhaps as a subsequent step, i.e., stick a TODO about it around, for
now, but just don't kill it!
* 'weight' is the only one I'm afraid about, as that was meant to be
useful when SEDF was the default scheduler (so used for general
purpose workloads as well). That's why I'm asking other people what
the constraints of API stability are in this situation. However, even
if we en up being stuck with it, I've got a few ideas on how to use
it in a similar enough way to the original meaning. I.e., I'd
recommend the same. Ignore it and put TODOs around, but don't get rid
of it.
I won't comment on the details of the algorithm here. However, something
similar to what you wrote in this cover letter, would be well suited for
a document in docs/misc. I can contribute and help make it as clear and
easy to understand as possible, of course.
Also, something similar to what you put in the "USAGE INSTRUCTIONS",
still of the cover letter, would fit very well in, still in docs/misc,
the equivalent of sedf_scheduler_mini-HOWTO.txt (whether by replacing
the content of such file, or killing it and creating a new one, I still
don't know).
> FUTURE DEVELOPMENT:
> Though useful in its current state, there are a few additional features
> and modifications we'd like to make in the future:
>
> 1) More efficient multiprocessor support that makes better use of
> available cores. This would likely be accomplished through the use of a
> global EDF queue for all available PCPUs. Though the current version
> does recognize and use multiple cores it takes some creative assigning
> and pinning by the user to use them efficiently, the goal of this change
> would be to have this load balancing occur seamlessly and automatically.
>
Yep, we do need this. However, I agree that we want an incremental
approach. What I was hoping was that, for turning the scheduler from
local to global, we could borrow a few ideas from credit2 (e.g., 1
runqueue per socket), and a few code from RT-Xen (and those guys are
also close to submitting patches here on xen-devel).
> 2) VCPU level parameter assignments instead of the current domain level
> assignments. This would allow for more accurate and efficient division of
> resources amongst domains and cpu pools. Currently all vcpus within a
> domain must use the same period and budget parameters.
>
That would be good. It would be even better to go fully hierarchical,
but yes, let's leave this to a later development.
> 3) Alternate scheduling algorithms (e.g. Total Bandwidth Server) could now
> be implemented in this scheduler with relative ease given the
> simplification of the underlying EDF scheduler that was done as part of
> this patchset. This would further expand the capabilities of Xen in
> embedded real-time systems and give developers more options to fit their
> individual system requirements.
>
Indeed. And I certainly am all for providing different server algorithms
and implementations (as said above already). At some point, and perhaps
off-list (as it may be too specific here), someone of you guys will have
to explain what it is that you like in TBS better than in CBS. I mean,
as far as I remember, CBS is an improved version of TBS, so why stick
with the old one when we can have the new one... or is it that TBS is
part of some standard/certification/whatever?
Thanks again and Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [RFC PATCH 0/4] Repurpose SEDF Scheduler for Real-time use
2014-06-17 14:44 ` Dario Faggioli
@ 2014-06-26 19:53 ` Joshua Whitehead
0 siblings, 0 replies; 27+ messages in thread
From: Joshua Whitehead @ 2014-06-26 19:53 UTC (permalink / raw)
To: Dario Faggioli
Cc: Ian Campbell, Stefano Stabellini, George Dunlap, Ian Jackson,
Robert VanVossen, Xen-devel, Nate Studer
On 6/17/2014 10:44 AM, Dario Faggioli wrote:
> Hi everyone again,
>
> I had the chance to give a close look to the patches, so here I am. :-)
>
> First of all, thanks again Josh, Robbie, and Nate for this work!
> That being said...
>
Hello everyone, sorry we didn't respond to this conversation sooner, the last
couple weeks have been a bit busy around here with some other projects!
Thanks for your feedback on this Dario, I will comment on each area
individually, but many of the things you brought up were debates that we were
having internally here at DornerWorks and we finally decided to just put the
patch out and get a feel for what everyone else was thinking, which is exactly
what happened :-) We do want to make every effort to not work in a vacuum and
make sure this patch series is a benefit to the Xen community as well as our own
work. So all comments are most definitely welcome, and we plan to make any
changes necessary to line-up with what is decided is best for Xen.
> On ven, 2014-06-13 at 15:58 -0400, Josh Whitehead wrote:
>> NEED:
>> With the increased interest in embedded Xen, there is a need for a suitable
>> real-time scheduler. The arinc653 scheduler currently only supports a
>> single core and has limited niche appeal, while the sedf scheduler is
>> widely consider deprecated and is currently a mess. This patchset
>> repurposes the current sedf scheduler and adds a more capable and robust
>> real-time scheduler suitable for embedded use to the Xen repertoire.
>>
> That describes our current situation quite well. :-)
>
>> PROPOSED SOLUTION:
>> Repurposing of the sedf scheduler was accomplished by implementing the
>> Constant Bandwidth Server (CBS) algorithm (originally proposed by Dario
>> Faggioli) which is capable of properly handling mixed soft real-time and
>> hard real-time tasks (domains/vcpus) on the same system.
>>
> So, as agreed via conf-call, and as implied by what you say yourself
> below (when mentioning the TBS), the solution is to have a working
> global EDF framework, on top of which we can potentially put multiple
> and different budgetting algorithms.
>
> For that exact reason, I'm not sure I'd go all the way down to SCHED_CBS
> and sched_cbs.c. In fact, even with the above 'big plan' in mind, I
> don't think renaming/substituting SEDF is that important at this stage.
>
> What we have now in SEDF is, basically, an outdated implementation of
> EDF with an hackish budgetting scheme on top of it.
> What this patchset is meant at is (although in RFC status) implementing
> EDF with a well known and working budgetting scheme on top of it.
>
> Therefore, I don't think we need to go through the renaming of the c
> files, the functions, the parameters, etc... just change the
> implementation!
>
Indeed this was something we debated as well. I think the final argument that
made us decide to submit the patch with the name change was that we had modified
the SEDF to a significant enough level that if there was anyone still using SEDF
they would be confused by the changes. However if we follow some of the other
comments by you, George, and Jan (using if-defs to protect things) this should
not be a problem and leaving the naming scheme as SEDF should not be an issue.
(I will respond to the last e-mail sent by George as well when I'm done with
this one)
> Consider that, at least at the libxl level, we committed to maintain a
> stable and compile time backward compatible API. Going through something
> like this would require a lot of trickery, in order to make the above
> true.
>
> The great value I see in this series is that it is the first step in
> turning SEDF into something useful, and you don't need to change the
> name and kill the parameters for doing that.
>
> In fact, about the parameters:
> * you' re adding a 'soft' param, but I think the existing 'extra' can
> be used to mean just that, can't it? It seems to me they've got a
> pretty compatible meaning, at least up to a certain extent (i.e.,
> when extra=1 is used on a domain with a !0 budget and slice).
> * 'latency', certainly there is no such thing as scaling U in the
> original CBS algorithm, but it won't be too difficult to do that.
> Perhaps as a subsequent step, i.e., stick a TODO about it around, for
> now, but just don't kill it!
> * 'weight' is the only one I'm afraid about, as that was meant to be
> useful when SEDF was the default scheduler (so used for general
> purpose workloads as well). That's why I'm asking other people what
> the constraints of API stability are in this situation. However, even
> if we en up being stuck with it, I've got a few ideas on how to use
> it in a similar enough way to the original meaning. I.e., I'd
> recommend the same. Ignore it and put TODOs around, but don't get rid
> of it.
>
Again we debated these name changes, the primary motivator was to update the
language of the parameters to reflect the current state of the scheduler and get
away from it's prior (current) deprecated/broken state. I think some
"if-defery" as suggested by George and perhaps a "TODO" as you suggest may be
the best solution to updating the naming while maintaining backward compatibility.
> I won't comment on the details of the algorithm here. However, something
> similar to what you wrote in this cover letter, would be well suited for
> a document in docs/misc. I can contribute and help make it as clear and
> easy to understand as possible, of course.
>
> Also, something similar to what you put in the "USAGE INSTRUCTIONS",
> still of the cover letter, would fit very well in, still in docs/misc,
> the equivalent of sedf_scheduler_mini-HOWTO.txt (whether by replacing
> the content of such file, or killing it and creating a new one, I still
> don't know).
>
We would be happy to do this, it's always nice to find good documentation when
attempting to use a part of Xen! This would also let people know the intent of
this scheduler so that the people who don’t need it, don’t end up using it.
>> FUTURE DEVELOPMENT:
>> Though useful in its current state, there are a few additional features
>> and modifications we'd like to make in the future:
>>
>> 1) More efficient multiprocessor support that makes better use of
>> available cores. This would likely be accomplished through the use of a
>> global EDF queue for all available PCPUs. Though the current version
>> does recognize and use multiple cores it takes some creative assigning
>> and pinning by the user to use them efficiently, the goal of this change
>> would be to have this load balancing occur seamlessly and automatically.
>>
> Yep, we do need this. However, I agree that we want an incremental
> approach. What I was hoping was that, for turning the scheduler from
> local to global, we could borrow a few ideas from credit2 (e.g., 1
> runqueue per socket), and a few code from RT-Xen (and those guys are
> also close to submitting patches here on xen-devel).
>
We have some changes in the works that implement this, but we had reached the
point where we just wanted to get some discussion going on what the community
wanted before spending too much time building a "complete" solution that might
not fit the desires of all interested parties :-)
>> 2) VCPU level parameter assignments instead of the current domain level
>> assignments. This would allow for more accurate and efficient division of
>> resources amongst domains and cpu pools. Currently all vcpus within a
>> domain must use the same period and budget parameters.
>>
> That would be good. It would be even better to go fully hierarchical,
> but yes, let's leave this to a later development.
>
Again we have plans for this that should be easy to implement, and don't think
it would take much time, but we wanted more input before continuing blindly. I
believe I've heard you mention hierarchical assignments before- could you give a
little more detail on exactly what you mean by that and maybe what you would
expect that to look like in an actual implementation? What do you see as the
advantages to that over simple individual VCPU level assignments?
>> 3) Alternate scheduling algorithms (e.g. Total Bandwidth Server) could now
>> be implemented in this scheduler with relative ease given the
>> simplification of the underlying EDF scheduler that was done as part of
>> this patchset. This would further expand the capabilities of Xen in
>> embedded real-time systems and give developers more options to fit their
>> individual system requirements.
>>
> Indeed. And I certainly am all for providing different server algorithms
> and implementations (as said above already). At some point, and perhaps
> off-list (as it may be too specific here), someone of you guys will have
> to explain what it is that you like in TBS better than in CBS. I mean,
> as far as I remember, CBS is an improved version of TBS, so why stick
> with the old one when we can have the new one... or is it that TBS is
> part of some standard/certification/whatever?
>
As discussed above our biggest argument FOR the name change was to avoid
confusion, probably our biggest argument AGAINST was that we are intending this
to be available in such a way that it would be easy to implement other
algorithms on top of it. So leaving it as "sedf.c" instead would make that
point more plain.
In the current implementation, any vcpu/domain that is "hard real-time" (the
default for all new vcpus) is treated as a pure EDF- it gets its budget for the
period, no more, no less, and if it runs out of budget it has to wait until the
next period. Only when the "soft" flag is set does the CBS algorithm come in to
play. Because the underlying bare-bones EDF functionality is there it would
make it a simple matter to implement other budgeting algorithms and have them
toggled via command line parameters.
We have no special attachment to Total Bandwidth Server, that was just the first
example algorithm I happened to think of when I was writing the cover letter :-)
The systems that we have in mind on which we would like to use Xen will in all
likelihood use CBS (or ARINC653 depending on the customer) as it is superior in
many ways. We just wanted to make sure it was clear the patch left it open for
us or anyone else that might have a need for a different algorithm in the future.
> Thanks again and Regards,
> Dario
>
Thanks again for the feedback, this is exactly what we were looking for. I will
respond to your other comments in the other e-mails as well.
- Josh Whitehead
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 27+ messages in thread