[RFC][PATCH 4/4] sched: introduce boost credit for latency-sensitive domain

All of lore.kernel.org
 help / color / mirror / Atom feed

* [RFC][PATCH 4/4] sched: introduce boost credit for latency-sensitive domain
  2008-12-05 10:01 [RFC][PATCH 0/4] Modification of credit scheduler NISHIGUCHI Naoki
@ 2008-12-08  8:53 ` NISHIGUCHI Naoki
  0 siblings, 0 replies; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2008-12-08  8:53 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Ian.Pratt, disheng.su, keir.fraser

[-- Attachment #1: Type: text/plain, Size: 1068 bytes --]

The attached two patches need previous patches(1-3).
  credit_boost_xen.patch  : modification to xen hypervisor
  credit_boost_tools.patch: modification to tools

By applying these two patches, boost credit is introduced to the credit
scheduler. The credit scheduler comes to be able to give priority to
latency-sensitive domain.

In order to set a domain to latency-sensitive domain, you enable boost
credit of the domain. There is two method.

1. Using xm command, set upper bound value of boost credit of the
domain. It is specified by not the value of credit but the millisecond.
It is named max boost period.
  e.g. domain:0, max boost period:100ms
    xm sched-bcredit -d 0 -m 100

2. Using xm command, set upper bound value of boost credit of the domain
and set boost ratio. Boost ratio is ratio to one CPU that is used for
distributing boost credit. Boost credit corresponding to boost ratio is
distributed in place of credit.
  e.g. domain:0, max boost period:500ms, boost ratio:80(80% to one CPU)
    xm sched-bcredit -d 0 -m 500 -r 80


Best regards,
Naoki

[-- Attachment #2: credit_boost_xen.patch --]
[-- Type: text/x-patch, Size: 19239 bytes --]

diff -r e0e26c5c0218 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Mon Dec 08 09:46:24 2008 +0900
+++ b/xen/common/sched_credit.c	Mon Dec 08 16:36:46 2008 +0900
@@ -47,6 +47,7 @@
     (CSCHED_CREDITS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
 #define CSCHED_CREDITS_PER_ACCT     \
     (CSCHED_CREDITS_PER_TICK * CSCHED_TICKS_PER_ACCT)
+#define CSCHED_MSECS_PER_BOOST_TSLICE 2
 
 
 /*
@@ -200,6 +201,7 @@ struct csched_vcpu {
     struct csched_dom *sdom;
     struct vcpu *vcpu;
     atomic_t credit;
+    atomic_t boost_credit;
     uint16_t flags;
     int16_t pri;
     s_time_t start_time;
@@ -225,6 +227,8 @@ struct csched_dom {
     uint16_t active_vcpu_count;
     uint16_t weight;
     uint16_t cap;
+    uint16_t boost_ratio;
+    uint16_t max_boost_period;
 };
 
 /*
@@ -239,6 +243,8 @@ struct csched_private {
     cpumask_t idlers;
     uint32_t weight;
     uint32_t credit;
+    uint32_t boost_credit;
+    uint16_t total_boost_ratio;
     int credit_balance;
     uint32_t runq_sort;
     CSCHED_STATS_DEFINE()
@@ -249,6 +255,10 @@ struct csched_private {
  * Global variables
  */
 static struct csched_private csched_priv;
+
+/* opt_credit_tslice: time slice for BOOST priority */
+static unsigned int opt_credit_tslice = CSCHED_MSECS_PER_BOOST_TSLICE;
+integer_param("credit_tslice", opt_credit_tslice);
 
 static void csched_tick(void *_cpu);
 
@@ -340,6 +350,14 @@ __runq_tickle(unsigned int cpu, struct c
             cpus_or(mask, mask, csched_priv.idlers);
             cpus_and(mask, mask, new->vcpu->cpu_affinity);
         }
+    }
+
+    /* If new VCPU has boost credit, signal the CPU. */
+    if ( new->pri == CSCHED_PRI_TS_BOOST &&
+         new->sdom->max_boost_period && cpus_empty(mask) )
+    {
+        CSCHED_STAT_CRANK(tickle_local_other);
+        cpu_set(cpu, mask);
     }
 
     /* Send scheduler interrupts to designated CPUs */
@@ -503,6 +521,8 @@ __csched_vcpu_acct_start_locked(struct c
         {
             list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
             csched_priv.weight += sdom->weight;
+            csched_priv.boost_credit += (sdom->boost_ratio *
+                                         CSCHED_CREDITS_PER_TSLICE) / 100;
         }
     }
 }
@@ -535,6 +555,8 @@ __csched_vcpu_acct_stop_locked(struct cs
         BUG_ON( csched_priv.weight < sdom->weight );
         list_del_init(&sdom->active_sdom_elem);
         csched_priv.weight -= sdom->weight;
+        csched_priv.boost_credit -= (sdom->boost_ratio *
+                                     CSCHED_CREDITS_PER_TSLICE) / 100;
     }
 }
 
@@ -545,14 +567,6 @@ csched_vcpu_acct(unsigned int cpu)
 
     ASSERT( current->processor == cpu );
     ASSERT( svc->sdom != NULL );
-
-    /*
-     * If this VCPU's priority was boosted when it last awoke, reset it.
-     * If the VCPU is found here, then it's consuming a non-negligeable
-     * amount of CPU resources and should no longer be boosted.
-     */
-    if ( svc->pri == CSCHED_PRI_TS_BOOST )
-        svc->pri = CSCHED_PRI_TS_UNDER;
 
     /*
      * Put this VCPU and domain back on the active list if it was
@@ -595,6 +609,7 @@ csched_vcpu_init(struct vcpu *vc)
     svc->sdom = sdom;
     svc->vcpu = vc;
     atomic_set(&svc->credit, 0);
+    atomic_set(&svc->boost_credit, 0);
     svc->flags = 0U;
     svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
     CSCHED_VCPU_STATS_RESET(svc);
@@ -718,25 +733,73 @@ csched_dom_cntl(
     {
         op->u.credit.weight = sdom->weight;
         op->u.credit.cap = sdom->cap;
+        op->u.credit.max_boost_period = sdom->max_boost_period;
+        op->u.credit.boost_ratio = sdom->boost_ratio;
     }
     else
     {
+        uint16_t weight = (uint16_t)~0U;
+
         ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
 
         spin_lock_irqsave(&csched_priv.lock, flags);
 
-        if ( op->u.credit.weight != 0 )
+        if ( (op->u.credit.weight != 0) &&
+             (sdom->boost_ratio == 0 || op->u.credit.boost_ratio == 0) )
+        {
+            weight = op->u.credit.weight;
+        }
+
+        if ( op->u.credit.cap != (uint16_t)~0U )
+            sdom->cap = op->u.credit.cap;
+
+        if ( (op->u.credit.max_boost_period != (uint16_t)~0U) &&
+             (op->u.credit.max_boost_period >= CSCHED_MSECS_PER_TICK ||
+              op->u.credit.max_boost_period == 0) )
+        {
+            sdom->max_boost_period = op->u.credit.max_boost_period;
+        }
+
+        if ( (op->u.credit.boost_ratio != (uint16_t)~0U) &&
+             ((csched_priv.total_boost_ratio - sdom->boost_ratio +
+               op->u.credit.boost_ratio) <= 100 * csched_priv.ncpus) &&
+             (sdom->max_boost_period || op->u.credit.boost_ratio == 0) )
+        {
+            uint16_t new_bc, old_bc;
+
+            new_bc = (op->u.credit.boost_ratio *
+                      CSCHED_CREDITS_PER_TSLICE) / 100;
+            old_bc = (sdom->boost_ratio *
+                      CSCHED_CREDITS_PER_TSLICE) / 100;
+
+            csched_priv.total_boost_ratio -= sdom->boost_ratio;
+            csched_priv.total_boost_ratio += op->u.credit.boost_ratio;
+
+            sdom->boost_ratio = op->u.credit.boost_ratio;
+
+            if ( !list_empty(&sdom->active_sdom_elem) )
+            {
+                csched_priv.boost_credit -= old_bc;
+                csched_priv.boost_credit += new_bc;
+            }
+            if ( new_bc == 0 )
+            {
+                if ( sdom->weight == 0 )
+                    weight = CSCHED_DEFAULT_WEIGHT;
+            }
+            else
+                weight = 0;
+        }
+
+        if ( weight != (uint16_t)~0U )
         {
             if ( !list_empty(&sdom->active_sdom_elem) )
             {
                 csched_priv.weight -= sdom->weight;
-                csched_priv.weight += op->u.credit.weight;
+                csched_priv.weight += weight;
             }
-            sdom->weight = op->u.credit.weight;
-        }
-
-        if ( op->u.credit.cap != (uint16_t)~0U )
-            sdom->cap = op->u.credit.cap;
+            sdom->weight = weight;
+        }
 
         spin_unlock_irqrestore(&csched_priv.lock, flags);
     }
@@ -765,6 +828,8 @@ csched_dom_init(struct domain *dom)
     sdom->dom = dom;
     sdom->weight = CSCHED_DEFAULT_WEIGHT;
     sdom->cap = 0U;
+    sdom->boost_ratio = 0U;
+    sdom->max_boost_period = 0U;
     dom->sched_priv = sdom;
 
     return 0;
@@ -780,15 +845,16 @@ csched_dom_destroy(struct domain *dom)
 /*
  * This is a O(n) optimized sort of the runq.
  *
- * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
- * through the runq and move up any UNDERs that are preceded by OVERS. We
- * remember the last UNDER to make the move up operation O(1).
+ * Time-share VCPUs can only be one of three priorities, BOOST, UNDER or OVER.
+ * We walk through the runq and move up any BOOSTs that are preceded by UNDERs
+ * or OVERs, and any UNDERs that are preceded by OVERS. We remember the last
+ * BOOST and UNDER to make the move up operation O(1).
  */
 static void
 csched_runq_sort(unsigned int cpu)
 {
     struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
-    struct list_head *runq, *elem, *next, *last_under;
+    struct list_head *runq, *elem, *next, *last_boost, *last_under;
     struct csched_vcpu *svc_elem;
     unsigned long flags;
     int sort_epoch;
@@ -803,14 +869,26 @@ csched_runq_sort(unsigned int cpu)
 
     runq = &spc->runq;
     elem = runq->next;
-    last_under = runq;
+    last_boost = last_under = runq;
 
     while ( elem != runq )
     {
         next = elem->next;
         svc_elem = __runq_elem(elem);
 
-        if ( svc_elem->pri >= CSCHED_PRI_TS_UNDER )
+        if ( svc_elem->pri == CSCHED_PRI_TS_BOOST )
+        {
+            /* does elem need to move up the runq? */
+            if ( elem->prev != last_boost )
+            {
+                list_del(elem);
+                list_add(elem, last_boost);
+            }
+            if ( last_boost == last_under )
+                last_under = elem;
+            last_boost = elem;
+        }
+        else if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
         {
             /* does elem need to move up the runq? */
             if ( elem->prev != last_under )
@@ -846,6 +924,14 @@ csched_acct(void)
     int credit;
     int64_t credit_sum;
     int credit_average;
+    /* for boost credit */
+    uint32_t bc_total;
+    uint32_t bc_fair;
+    int boost_credit;
+    int max_boost_credit;
+    int64_t bc_sum;
+    int bc_average;
+
 
     spin_lock_irqsave(&csched_priv.lock, flags);
 
@@ -854,8 +940,12 @@ csched_acct(void)
     {
         svc = list_entry(iter_vcpu, struct csched_vcpu, inactive_vcpu_elem);
 
-        if ( atomic_read(&svc->credit)
-             <= CSCHED_CREDITS_PER_TICK * (CSCHED_TICKS_PER_ACCT - 1) )
+        max_boost_credit = svc->sdom->max_boost_period *
+                           (CSCHED_CREDITS_PER_TSLICE/CSCHED_MSECS_PER_TSLICE);
+        if ( (atomic_read(&svc->credit)
+              <= CSCHED_CREDITS_PER_TICK * (CSCHED_TICKS_PER_ACCT - 1)) ||
+             (atomic_read(&svc->boost_credit)
+              <= (max_boost_credit - CSCHED_CREDITS_PER_TICK)) )
         {
             __csched_vcpu_acct_start_locked(svc);
         }
@@ -863,6 +953,7 @@ csched_acct(void)
 
     weight_total = csched_priv.weight;
     credit_total = csched_priv.credit;
+    bc_total = csched_priv.boost_credit;
 
     /* Converge balance towards 0 when it drops negative */
     if ( csched_priv.credit_balance < 0 )
@@ -871,7 +962,7 @@ csched_acct(void)
         CSCHED_STAT_CRANK(acct_balance);
     }
 
-    if ( unlikely(weight_total == 0) )
+    if ( unlikely(weight_total == 0 && bc_total == 0) )
     {
         csched_priv.credit_balance = 0;
         spin_unlock_irqrestore(&csched_priv.lock, flags);
@@ -886,26 +977,43 @@ csched_acct(void)
     credit_xtra = 0;
     credit_cap = 0U;
 
+    /* Firstly, subtract boost credits from credit_total. */
+    if ( bc_total != 0 )
+    {
+        credit_total -= bc_total;
+        credit_balance += bc_total;
+    }
+
+    /* Avoid 0 divide error */
+    if ( weight_total == 0 )
+        weight_total = 1;
+
     list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
     {
         sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
 
         BUG_ON( is_idle_domain(sdom->dom) );
         BUG_ON( sdom->active_vcpu_count == 0 );
-        BUG_ON( sdom->weight == 0 );
         BUG_ON( sdom->weight > weight_left );
+
+        max_boost_credit = sdom->max_boost_period *
+                           (CSCHED_CREDITS_PER_TSLICE / CSCHED_MSECS_PER_TSLICE);
 
         /* Compute the average of active VCPUs. */
         credit_sum = 0;
+        bc_sum = 0;
         list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
         {
             svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
             BUG_ON( sdom != svc->sdom );
 
             credit_sum += atomic_read(&svc->credit);
+            bc_sum += atomic_read(&svc->boost_credit);
         }
         credit_average = ( credit_sum + (sdom->active_vcpu_count - 1)
                          ) / sdom->active_vcpu_count;
+        bc_average = ( bc_sum + (sdom->active_vcpu_count - 1)
+                     ) / sdom->active_vcpu_count;
 
         weight_left -= sdom->weight;
 
@@ -940,7 +1048,9 @@ csched_acct(void)
 
         if ( credit_fair < credit_peak )
         {
-            credit_xtra = 1;
+            /* credit_fair is 0 if weight is 0. */
+            if ( sdom->weight != 0 )
+                credit_xtra = 1;
         }
         else
         {
@@ -972,6 +1082,10 @@ csched_acct(void)
         credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
                       ) / sdom->active_vcpu_count;
 
+        /* Compute fair share of boost credit per VCPU */
+        bc_fair = ( ((sdom->boost_ratio * CSCHED_CREDITS_PER_ACCT)/100) +
+                    (sdom->active_vcpu_count - 1)
+                  ) / sdom->active_vcpu_count;
 
         list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
         {
@@ -982,6 +1096,42 @@ csched_acct(void)
             credit = atomic_read(&svc->credit);
             atomic_add(credit_average - credit + credit_fair, &svc->credit);
             credit = atomic_read(&svc->credit);
+
+            /* Balance and increment boost credit */
+            boost_credit = atomic_read(&svc->boost_credit);
+            atomic_add(bc_average - boost_credit + bc_fair, &svc->boost_credit);
+            boost_credit = atomic_read(&svc->boost_credit);
+
+            /*
+             * Upper bound on boost credits.
+             * Add excess to credit.
+             */
+            if ( boost_credit > max_boost_credit )
+            {
+                atomic_add(boost_credit - max_boost_credit, &svc->credit);
+                credit = atomic_read(&svc->credit);
+                atomic_set(&svc->boost_credit, max_boost_credit);
+                boost_credit = atomic_read(&svc->boost_credit);
+            }
+            /*
+             * If credit is negative,
+             * boost credits compensate credit.
+             */
+            if ( credit < 0 && boost_credit > 0 )
+            {
+                if ( boost_credit > -credit )
+                {
+                    atomic_sub(-credit, &svc->boost_credit);
+                    atomic_add(-credit, &svc->credit);
+                }
+                else
+                {
+                    atomic_sub(boost_credit, &svc->boost_credit);
+                    atomic_add(boost_credit, &svc->credit);
+                }
+                credit = atomic_read(&svc->credit);
+                boost_credit = atomic_read(&svc->boost_credit);
+            }
 
             /*
              * Recompute priority or, if VCPU is idling, remove it from
@@ -1011,7 +1161,10 @@ csched_acct(void)
             }
             else
             {
-                svc->pri = CSCHED_PRI_TS_UNDER;
+                if ( boost_credit > 0 )
+                    svc->pri = CSCHED_PRI_TS_BOOST;
+                else
+                    svc->pri = CSCHED_PRI_TS_UNDER;
 
                 /* Unpark any capped domains whose credits go positive */
                 if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
@@ -1026,18 +1179,37 @@ csched_acct(void)
                     svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
                 }
 
-                /* Upper bound on credits means VCPU stops earning */
+                /*
+                 * Upper bound on credits and boost credits means VCPU stops
+                 * earning
+                 */
                 if ( credit > CSCHED_CREDITS_PER_TSLICE )
                 {
-                    __csched_vcpu_acct_stop_locked(svc);
+                    atomic_add(credit - CSCHED_CREDITS_PER_TSLICE,
+                               &svc->boost_credit);
+                    boost_credit = atomic_read(&svc->boost_credit);
                     credit = CSCHED_CREDITS_PER_TSLICE;
                     atomic_set(&svc->credit, credit);
+
+                    if ( boost_credit > max_boost_credit )
+                    {
+                        atomic_set(&svc->boost_credit, max_boost_credit);
+                        __csched_vcpu_acct_stop_locked(svc);
+                    }
                 }
             }
 
-            CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
-            CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
-            credit_balance += credit;
+            if ( sdom->boost_ratio == 0 )
+            {
+                CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
+                CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
+                credit_balance += credit;
+            }
+            else
+            {
+                CSCHED_VCPU_STAT_SET(svc, credit_last, boost_credit);
+                CSCHED_VCPU_STAT_SET(svc, credit_incr, bc_fair);
+            }
         }
     }
 
@@ -1221,6 +1393,22 @@ csched_schedule(s_time_t now)
                ) /
                ( MILLISECS(CSCHED_MSECS_PER_TSLICE) /
                  CSCHED_CREDITS_PER_TSLICE );
+    if ( scurr->pri == CSCHED_PRI_TS_BOOST )
+    {
+        int boost_credit = atomic_read(&scurr->boost_credit);
+
+        if ( boost_credit > consumed )
+        {
+            atomic_sub(consumed, &scurr->boost_credit);
+            consumed = 0;
+        }
+        else
+        {
+            atomic_sub(boost_credit, &scurr->boost_credit);
+            consumed -= boost_credit;
+            scurr->pri = CSCHED_PRI_TS_UNDER;
+        }
+    }
     if ( consumed > 0 && !is_idle_vcpu(current) )
         atomic_sub(consumed, &scurr->credit);
 
@@ -1264,7 +1452,17 @@ csched_schedule(s_time_t now)
     /*
      * Return task to run next...
      */
-    ret.time = MILLISECS(CSCHED_MSECS_PER_TSLICE);
+    if ( snext->pri == CSCHED_PRI_TS_BOOST )
+    {
+        struct csched_vcpu * const svc = __runq_elem(runq->next);
+
+        if ( svc->pri == CSCHED_PRI_TS_BOOST )
+            ret.time = MILLISECS(opt_credit_tslice);
+        else
+            ret.time = MILLISECS(CSCHED_MSECS_PER_TICK);
+    }
+    else
+        ret.time = MILLISECS(CSCHED_MSECS_PER_TSLICE);
     ret.task = snext->vcpu;
 
     snext->start_time = now;
@@ -1287,7 +1485,11 @@ csched_dump_vcpu(struct csched_vcpu *svc
 
     if ( sdom )
     {
-        printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
+        printk(" credit=%i bc=%i [w=%u,bc=%i]",
+               atomic_read(&svc->credit),
+               atomic_read(&svc->boost_credit),
+               sdom->weight,
+               (sdom->boost_ratio * CSCHED_CREDITS_PER_TSLICE)/100);
 #ifdef CSCHED_STATS
         printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
                 svc->stats.credit_last,
@@ -1353,6 +1555,8 @@ csched_dump(void)
            "\tcredit balance     = %d\n"
            "\tweight             = %u\n"
            "\trunq_sort          = %u\n"
+           "\tboost_credit       = %u\n"
+           "\ttotal_boost_ratio  = %u\n"
            "\tdefault-weight     = %d\n"
            "\tmsecs per tick     = %dms\n"
            "\tcredits per tick   = %d\n"
@@ -1364,6 +1568,8 @@ csched_dump(void)
            csched_priv.credit_balance,
            csched_priv.weight,
            csched_priv.runq_sort,
+           csched_priv.boost_credit,
+           csched_priv.total_boost_ratio,
            CSCHED_DEFAULT_WEIGHT,
            CSCHED_MSECS_PER_TICK,
            CSCHED_CREDITS_PER_TICK,
@@ -1417,6 +1623,8 @@ csched_init(void)
     csched_priv.credit = 0U;
     csched_priv.credit_balance = 0;
     csched_priv.runq_sort = 0U;
+    csched_priv.boost_credit = 0;
+    csched_priv.total_boost_ratio = 0;
     CSCHED_STATS_RESET();
 }
 
diff -r e0e26c5c0218 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Mon Dec 08 09:46:24 2008 +0900
+++ b/xen/include/public/domctl.h	Mon Dec 08 16:36:46 2008 +0900
@@ -311,6 +311,8 @@ struct xen_domctl_scheduler_op {
         struct xen_domctl_sched_credit {
             uint16_t weight;
             uint16_t cap;
+            uint16_t max_boost_period;
+            uint16_t boost_ratio;
         } credit;
     } u;
 };

[-- Attachment #3: credit_boost_tools.patch --]
[-- Type: text/x-patch, Size: 15698 bytes --]

diff -r e0e26c5c0218 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c	Mon Dec 08 09:46:24 2008 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c	Mon Dec 08 16:36:45 2008 +0900
@@ -1281,18 +1281,26 @@ static PyObject *pyxc_sched_credit_domai
     uint32_t domid;
     uint16_t weight;
     uint16_t cap;
-    static char *kwd_list[] = { "domid", "weight", "cap", NULL };
-    static char kwd_type[] = "I|HH";
+    uint16_t max_boost_period;
+    uint16_t boost_ratio;
+    static char *kwd_list[] = { "domid", "weight", "cap",
+                                "max_boost_period", "boost_ratio", NULL };
+    static char kwd_type[] = "I|HHhh";
     struct xen_domctl_sched_credit sdom;
     
     weight = 0;
     cap = (uint16_t)~0U;
+    max_boost_period = (uint16_t)~0U;
+    boost_ratio = (uint16_t)~0U;
     if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, 
-                                     &domid, &weight, &cap) )
+                                     &domid, &weight, &cap,
+                                     &max_boost_period, &boost_ratio) )
         return NULL;
 
     sdom.weight = weight;
     sdom.cap = cap;
+    sdom.max_boost_period = max_boost_period;
+    sdom.boost_ratio = boost_ratio;
 
     if ( xc_sched_credit_domain_set(self->xc_handle, domid, &sdom) != 0 )
         return pyxc_error_to_exception();
@@ -1312,9 +1320,11 @@ static PyObject *pyxc_sched_credit_domai
     if ( xc_sched_credit_domain_get(self->xc_handle, domid, &sdom) != 0 )
         return pyxc_error_to_exception();
 
-    return Py_BuildValue("{s:H,s:H}",
-                         "weight",  sdom.weight,
-                         "cap",     sdom.cap);
+    return Py_BuildValue("{s:H,s:H,s:i,s:i}",
+                         "weight",           sdom.weight,
+                         "cap",              sdom.cap,
+                         "max_boost_period", sdom.max_boost_period,
+                         "boost_ratio",      sdom.boost_ratio);
 }
 
 static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
@@ -1720,8 +1730,11 @@ static PyMethodDef pyxc_methods[] = {
       METH_KEYWORDS, "\n"
       "Set the scheduling parameters for a domain when running with the\n"
       "SMP credit scheduler.\n"
-      " domid     [int]:   domain id to set\n"
-      " weight    [short]: domain's scheduling weight\n"
+      " domid            [int]:   domain id to set\n"
+      " weight           [short]: domain's scheduling weight\n"
+      " cap              [short]: cap\n"
+      " max_boost_period [short]: upper limit in BOOST priority\n"
+      " boost_ratio      [short]; domain's boost ratio per a cpu\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
     { "sched_credit_domain_get",
@@ -1729,9 +1742,12 @@ static PyMethodDef pyxc_methods[] = {
       METH_VARARGS, "\n"
       "Get the scheduling parameters for a domain when running with the\n"
       "SMP credit scheduler.\n"
-      " domid     [int]:   domain id to get\n"
+      " domid            [int]:   domain id to get\n"
       "Returns:   [dict]\n"
-      " weight    [short]: domain's scheduling weight\n"},
+      " weight           [short]: domain's scheduling weight\n"
+      " cap              [short]: cap\n"
+      " max_boost_period [short]: upper limit in BOOST priority\n"
+      " boost_ratio      [short]: domain's boost ratio per a cpu\n"},
 
     { "evtchn_alloc_unbound", 
       (PyCFunction)pyxc_evtchn_alloc_unbound,
diff -r e0e26c5c0218 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py	Mon Dec 08 09:46:24 2008 +0900
+++ b/tools/python/xen/xend/XendAPI.py	Mon Dec 08 16:36:45 2008 +0900
@@ -1505,10 +1505,14 @@ class XendAPI(object):
 
         #need to update sched params aswell
         if 'weight' in xeninfo.info['vcpus_params'] \
-           and 'cap' in xeninfo.info['vcpus_params']:
+           and 'cap' in xeninfo.info['vcpus_params'] \
+           and 'max_boost_period' in xeninfo.info['vcpus_params'] \
+           and 'boost_ratio' in xeninfo.info['vcpus_params']:
             weight = xeninfo.info['vcpus_params']['weight']
             cap = xeninfo.info['vcpus_params']['cap']
-            xendom.domain_sched_credit_set(xeninfo.getDomid(), weight, cap)
+            max_boost_period = xeninfo.info['vcpus_params']['max_boost_period']
+            boost_ratio = xeninfo.info['vcpus_params']['boost_ratio']
+            xendom.domain_sched_credit_set(xeninfo.getDomid(), weight, cap, max_boost_period, boost_ratio)
 
     def VM_set_VCPUs_number_live(self, _, vm_ref, num):
         dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
diff -r e0e26c5c0218 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py	Mon Dec 08 09:46:24 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py	Mon Dec 08 16:36:45 2008 +0900
@@ -585,6 +585,10 @@ class XendConfig(dict):
             int(sxp.child_value(sxp_cfg, "cpu_weight", 256))
         cfg["vcpus_params"]["cap"] = \
             int(sxp.child_value(sxp_cfg, "cpu_cap", 0))
+        cfg["vcpus_params"]["max_boost_period"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_max_boost_period", 0))
+        cfg["vcpus_params"]["boost_ratio"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_boost_ratio", 0))
 
         # Only extract options we know about.
         extract_keys = LEGACY_UNSUPPORTED_BY_XENAPI_CFG + \
diff -r e0e26c5c0218 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py	Mon Dec 08 09:46:24 2008 +0900
+++ b/tools/python/xen/xend/XendDomain.py	Mon Dec 08 16:36:45 2008 +0900
@@ -1536,7 +1536,7 @@ class XendDomain:
 
         @param domid: Domain ID or Name
         @type domid: int or string.
-        @rtype: dict with keys 'weight' and 'cap'
+        @rtype: dict with keys 'weight' and 'cap' and 'max_boost_period' and 'boost_ratio'
         @return: credit scheduler parameters
         """
         dominfo = self.domain_lookup_nr(domid)
@@ -1549,20 +1549,26 @@ class XendDomain:
             except Exception, ex:
                 raise XendError(str(ex))
         else:
-            return {'weight' : dominfo.getWeight(),
-                    'cap'    : dominfo.getCap()} 
+            return {'weight'          : dominfo.getWeight(),
+                    'cap'             : dominfo.getCap(),
+                    'max_boost_period': dominfo.getMaxBoostPeriod(),
+                    'boost_ratio'    : dominfo.getBoostRatio()} 
     
-    def domain_sched_credit_set(self, domid, weight = None, cap = None):
+    def domain_sched_credit_set(self, domid, weight = None, cap = None, max_boost_period = None, boost_ratio = None):
         """Set credit scheduler parameters for a domain.
 
         @param domid: Domain ID or Name
         @type domid: int or string.
         @type weight: int
         @type cap: int
+        @type max_boost_period: int
+        @type boost_ratio: int
         @rtype: 0
         """
         set_weight = False
         set_cap = False
+        set_max_boost_period = False
+        set_boost_ratio = False
         dominfo = self.domain_lookup_nr(domid)
         if not dominfo:
             raise XendInvalidDomain(str(domid))
@@ -1581,17 +1587,37 @@ class XendDomain:
             else:
                 set_cap = True
 
+            if max_boost_period is None:
+                max_boost_period = int(~0)
+            elif max_boost_period < 0:
+                raise XendError("max_boost_period is out of range")
+            else:
+                set_max_boost_period = True
+
+            if boost_ratio is None:
+                boost_ratio = int(~0)
+            elif boost_ratio < 0:
+                raise XendError("boost_ratio is out of range")
+            else:
+                set_boost_ratio = True
+
             assert type(weight) == int
             assert type(cap) == int
+            assert type(max_boost_period) == int
+            assert type(boost_ratio) == int
 
             rc = 0
             if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
-                rc = xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap)
+                rc = xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap, max_boost_period, boost_ratio)
             if rc == 0:
                 if set_weight:
                     dominfo.setWeight(weight)
                 if set_cap:
                     dominfo.setCap(cap)
+                if set_max_boost_period:
+                    dominfo.setMaxBoostPeriod(max_boost_period)
+                if set_boost_ratio:
+                    dominfo.setBoostRatio(boost_ratio)
                 self.managed_config_save(dominfo)
             return rc
         except Exception, ex:
diff -r e0e26c5c0218 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py	Mon Dec 08 09:46:24 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py	Mon Dec 08 16:36:45 2008 +0900
@@ -465,7 +465,9 @@ class XendDomainInfo:
                 if xennode.xenschedinfo() == 'credit':
                     xendomains.domain_sched_credit_set(self.getDomid(),
                                                        self.getWeight(),
-                                                       self.getCap())
+                                                       self.getCap(),
+                                                       self.getMaxBoostPeriod(),
+                                                       self.getBoostRatio())
             except:
                 log.exception('VM start failed')
                 self.destroy()
@@ -1606,6 +1608,18 @@ class XendDomainInfo:
     def setWeight(self, cpu_weight):
         self.info['vcpus_params']['weight'] = cpu_weight
 
+    def getMaxBoostPeriod(self):
+        return self.info['vcpus_params']['max_boost_period']
+
+    def setMaxBoostPeriod(self, cpu_max_boost_period):
+        self.info['vcpus_params']['max_boost_period'] = cpu_max_boost_period
+
+    def getBoostRatio(self):
+        return self.info['vcpus_params']['boost_ratio']
+
+    def setBoostRatio(self, cpu_boost_ratio):
+        self.info['vcpus_params']['boost_ratio'] = cpu_boost_ratio
+
     def getRestartCount(self):
         return self._readVm('xend/restart_count')
 
diff -r e0e26c5c0218 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py	Mon Dec 08 09:46:24 2008 +0900
+++ b/tools/python/xen/xm/main.py	Mon Dec 08 16:36:45 2008 +0900
@@ -150,7 +150,7 @@ SUBCOMMAND_HELP = {
     'log'         : ('', 'Print Xend log'),
     'rename'      : ('<Domain> <NewDomainName>', 'Rename a domain.'),
     'sched-sedf'  : ('<Domain> [options]', 'Get/set EDF parameters.'),
-    'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]]]',
+    'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]|-m[=MAXBOOSTPERIOD]|-r[=BOOSTRATIO]]]',
                      'Get/set credit scheduler parameters.'),
     'sysrq'       : ('<Domain> <letter>', 'Send a sysrq to a domain.'),
     'debug-keys'  : ('<Keys>', 'Send debug keys to Xen.'),
@@ -240,6 +240,8 @@ SUBCOMMAND_OPTIONS = {
        ('-d DOMAIN', '--domain=DOMAIN', 'Domain to modify'),
        ('-w WEIGHT', '--weight=WEIGHT', 'Weight (int)'),
        ('-c CAP',    '--cap=CAP',       'Cap (int)'),
+       ('-m MAXBOOSTPERIOD', '--maxboostperiod=MAXBOOSTPERIOD', 'Upper limit of boost period (ms)'),
+       ('-r BOOSTRATIO', '--ratio=BOOSTRATIO', 'Boost ratio per a cpu (int)'),
     ),
     'list': (
        ('-l', '--long',         'Output all VM details in SXP'),
@@ -1578,8 +1580,8 @@ def xm_sched_credit(args):
     check_sched_type('credit')
 
     try:
-        opts, params = getopt.getopt(args, "d:w:c:",
-            ["domain=", "weight=", "cap="])
+        opts, params = getopt.getopt(args, "d:w:c:m:r:",
+            ["domain=", "weight=", "cap=", "maxboostperiod=", "ratio="])
     except getopt.GetoptError, opterr:
         err(opterr)
         usage('sched-credit')
@@ -1587,6 +1589,8 @@ def xm_sched_credit(args):
     domid = None
     weight = None
     cap = None
+    max_boost_period = None
+    boost_ratio = None
 
     for o, a in opts:
         if o in ["-d", "--domain"]:
@@ -1594,18 +1598,22 @@ def xm_sched_credit(args):
         elif o in ["-w", "--weight"]:
             weight = int(a)
         elif o in ["-c", "--cap"]:
-            cap = int(a);
+            cap = int(a)
+        elif o in ["-m", "--maxboostperiod"]:
+            max_boost_period = int(a)
+        elif o in ["-r", "--ratio"]:
+            boost_ratio = int(a);
 
     doms = filter(lambda x : domid_match(domid, x),
                   [parse_doms_info(dom)
                   for dom in getDomains(None, 'all')])
 
-    if weight is None and cap is None:
+    if weight is None and cap is None and max_boost_period is None and boost_ratio is None:
         if domid is not None and doms == []: 
             err("Domain '%s' does not exist." % domid)
             usage('sched-credit')
         # print header if we aren't setting any parameters
-        print '%-33s %4s %6s %4s' % ('Name','ID','Weight','Cap')
+        print '%-33s %4s %6s %4s %8s %5s' % ('Name','ID','Weight','Cap','Max(ms)','Ratio')
         
         for d in doms:
             try:
@@ -1618,16 +1626,18 @@ def xm_sched_credit(args):
             except xmlrpclib.Fault:
                 pass
 
-            if 'weight' not in info or 'cap' not in info:
+            if 'weight' not in info or 'cap' not in info or 'max_boost_period' not in info or 'boost_ratio' not in info:
                 # domain does not support sched-credit?
-                info = {'weight': -1, 'cap': -1}
+                info = {'weight': -1, 'cap': -1, 'max_boost_period':-1, 'boost_ratio':-1}
 
             info['weight'] = int(info['weight'])
             info['cap']    = int(info['cap'])
+            info['max_boost_period'] = int(info['max_boost_period'])
+            info['boost_ratio'] = int(info['boost_ratio'])
             
             info['name']  = d['name']
             info['domid'] = str(d['domid'])
-            print( ("%(name)-32s %(domid)5s %(weight)6d %(cap)4d") % info)
+            print( ("%(name)-32s %(domid)5s %(weight)6d %(cap)4d %(max_boost_period)8d %(boost_ratio)5d") % info)
     else:
         if domid is None:
             # place holder for system-wide scheduler parameters
@@ -1644,6 +1654,14 @@ def xm_sched_credit(args):
                     get_single_vm(domid),
                     "cap",
                     cap)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "max_boost_period",
+                    max_boost_period)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "boost_ratio",
+                    boost_ratio)
             else:
                 server.xenapi.VM.add_to_VCPUs_params(
                     get_single_vm(domid),
@@ -1653,8 +1671,16 @@ def xm_sched_credit(args):
                     get_single_vm(domid),
                     "cap",
                     cap)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "max_boost_period",
+                    max_boost_period)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "boost_ratio",
+                    boost_ratio)
         else:
-            result = server.xend.domain.sched_credit_set(domid, weight, cap)
+            result = server.xend.domain.sched_credit_set(domid, weight, cap, max_boost_period, boost_ratio)
             if result != 0:
                 err(str(result))
 

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC][PATCH 0/4] Modification of credit scheduler rev2
@ 2008-12-18  2:57 NISHIGUCHI Naoki
  2008-12-18  3:00 ` [RFC][PATCH 1/4] sched: more accurate credit scheduling NISHIGUCHI Naoki
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2008-12-18  2:57 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Ian.Pratt, disheng.su, aviv, keir.fraser, sakaia

Hi all,

The patchset is revised version of patches that I was posted 10 days 
ago. This patchset is consist of the following 4 patches.

1. Subtract credit consumed accurately and shorten cpu time per one credit
2. Change the handling of credits over upper bound.
3. Balance credits of each vcpu of a domain
4. Introduce boost credit for latency-sensitive domain

It was not possible to separate these cleanly.
Please apply these patches in numerical order.

Please review these patches.
Any comments are appreciated.

Best regards,
Naoki Nishiguchi

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC][PATCH 1/4] sched: more accurate credit scheduling
  2008-12-18  2:57 [RFC][PATCH 0/4] Modification of credit scheduler rev2 NISHIGUCHI Naoki
@ 2008-12-18  3:00 ` NISHIGUCHI Naoki
  2008-12-18  3:02 ` [RFC][PATCH 2/4] sched: change the handling of credits over upper bound NISHIGUCHI Naoki
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2008-12-18  3:00 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Ian.Pratt, disheng.su, aviv, keir.fraser, sakaia

[-- Attachment #1: Type: text/plain, Size: 399 bytes --]

By applying this patch, the credit scheduler subtracts accurately credit 
consumed and sets correctly priority.
CSCHED_CREDITS_PER_TICK is changed from 100 to 10000, because vcpu's 
credit is subtracted in csched_schedule().

The difference between this patch and last patch is that start_time 
variable was moved from csched_vcpu structure to csched_pcpu structure.

Best regards,
Naoki Nishiguchi

[-- Attachment #2: credit_rev2_1_accurate.patch --]
[-- Type: text/x-patch, Size: 2147 bytes --]

diff -r 6595393a3d28 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Tue Dec 09 16:28:02 2008 +0000
+++ b/xen/common/sched_credit.c	Tue Dec 16 19:15:18 2008 +0900
@@ -42,7 +42,7 @@
 #define CSCHED_MSECS_PER_TICK       10
 #define CSCHED_MSECS_PER_TSLICE     \
     (CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
-#define CSCHED_CREDITS_PER_TICK     100
+#define CSCHED_CREDITS_PER_TICK     10000
 #define CSCHED_CREDITS_PER_TSLICE   \
     (CSCHED_CREDITS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
 #define CSCHED_CREDITS_PER_ACCT     \
@@ -188,6 +188,7 @@ struct csched_pcpu {
     uint32_t runq_sort_last;
     struct timer ticker;
     unsigned int tick;
+    s_time_t start_time;
 };
 
 /*
@@ -545,11 +546,6 @@ csched_vcpu_acct(unsigned int cpu)
         svc->pri = CSCHED_PRI_TS_UNDER;
 
     /*
-     * Update credits
-     */
-    atomic_sub(CSCHED_CREDITS_PER_TICK, &svc->credit);
-
-    /*
      * Put this VCPU and domain back on the active list if it was
      * idling.
      *
@@ -1168,12 +1164,27 @@ csched_schedule(s_time_t now)
 {
     const int cpu = smp_processor_id();
     struct list_head * const runq = RUNQ(cpu);
+    struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
     struct csched_vcpu * const scurr = CSCHED_VCPU(current);
     struct csched_vcpu *snext;
     struct task_slice ret;
+    s_time_t passed = now - spc->start_time;
+    int consumed;
 
     CSCHED_STAT_CRANK(schedule);
     CSCHED_VCPU_CHECK(current);
+
+    /*
+     * Update credit
+     */
+    consumed = ( passed +
+                 (MILLISECS(CSCHED_MSECS_PER_TSLICE) /
+                  CSCHED_CREDITS_PER_TSLICE - 1)
+               ) /
+               ( MILLISECS(CSCHED_MSECS_PER_TSLICE) /
+                 CSCHED_CREDITS_PER_TSLICE );
+    if ( consumed > 0 && !is_idle_vcpu(current) )
+        atomic_sub(consumed, &scurr->credit);
 
     /*
      * Select next runnable local VCPU (ie top of local runq)
@@ -1217,6 +1228,8 @@ csched_schedule(s_time_t now)
      */
     ret.time = MILLISECS(CSCHED_MSECS_PER_TSLICE);
     ret.task = snext->vcpu;
+
+    spc->start_time = now;
 
     CSCHED_VCPU_CHECK(ret.task);
     return ret;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC][PATCH 2/4] sched: change the handling of credits over upper bound
  2008-12-18  2:57 [RFC][PATCH 0/4] Modification of credit scheduler rev2 NISHIGUCHI Naoki
  2008-12-18  3:00 ` [RFC][PATCH 1/4] sched: more accurate credit scheduling NISHIGUCHI Naoki
@ 2008-12-18  3:02 ` NISHIGUCHI Naoki
  2008-12-18  3:04 ` [RFC][PATCH 3/4] sched: balance credits of each vcpu of a domain NISHIGUCHI Naoki
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2008-12-18  3:02 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Ian.Pratt, disheng.su, aviv, keir.fraser, sakaia

[-- Attachment #1: Type: text/plain, Size: 369 bytes --]

By applying this patch, the credit scheduler don't reset vcpu's credit 
(set to 0) when the credit would be over upper bound. And it prevents a 
vcpu from missing becoming active.

The difference between this patch and last patch is when vcpu is put 
back on active list. This patch puts vcpu back on active list only in 
csched_acct().

Best regards,
Naoki Nishiguchi

[-- Attachment #2: credit_rev2_2_preserve.patch --]
[-- Type: text/x-patch, Size: 5273 bytes --]

diff -r b431367fc717 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Wed Dec 17 16:00:48 2008 +0900
+++ b/xen/common/sched_credit.c	Wed Dec 17 16:01:30 2008 +0900
@@ -197,6 +197,7 @@ struct csched_vcpu {
 struct csched_vcpu {
     struct list_head runq_elem;
     struct list_head active_vcpu_elem;
+    struct list_head inactive_vcpu_elem;
     struct csched_dom *sdom;
     struct vcpu *vcpu;
     atomic_t credit;
@@ -232,6 +233,7 @@ struct csched_private {
 struct csched_private {
     spinlock_t lock;
     struct list_head active_sdom;
+    struct list_head inactive_vcpu;
     uint32_t ncpus;
     unsigned int master;
     cpumask_t idlers;
@@ -485,12 +487,9 @@ csched_cpu_pick(struct vcpu *vc)
 }
 
 static inline void
-__csched_vcpu_acct_start(struct csched_vcpu *svc)
+__csched_vcpu_acct_start_locked(struct csched_vcpu *svc)
 {
     struct csched_dom * const sdom = svc->sdom;
-    unsigned long flags;
-
-    spin_lock_irqsave(&csched_priv.lock, flags);
 
     if ( list_empty(&svc->active_vcpu_elem) )
     {
@@ -499,14 +498,13 @@ __csched_vcpu_acct_start(struct csched_v
 
         sdom->active_vcpu_count++;
         list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+        list_del_init(&svc->inactive_vcpu_elem);
         if ( list_empty(&sdom->active_sdom_elem) )
         {
             list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
             csched_priv.weight += sdom->weight;
         }
     }
-
-    spin_unlock_irqrestore(&csched_priv.lock, flags);
 }
 
 static inline void
@@ -521,6 +519,7 @@ __csched_vcpu_acct_stop_locked(struct cs
 
     sdom->active_vcpu_count--;
     list_del_init(&svc->active_vcpu_elem);
+    list_add(&svc->inactive_vcpu_elem, &csched_priv.inactive_vcpu);
     if ( list_empty(&sdom->active_vcpu) )
     {
         BUG_ON( csched_priv.weight < sdom->weight );
@@ -546,18 +545,12 @@ csched_vcpu_acct(unsigned int cpu)
         svc->pri = CSCHED_PRI_TS_UNDER;
 
     /*
-     * Put this VCPU and domain back on the active list if it was
-     * idling.
-     *
      * If it's been active a while, check if we'd be better off
      * migrating it to run elsewhere (see multi-core and multi-thread
      * support in csched_cpu_pick()).
      */
-    if ( list_empty(&svc->active_vcpu_elem) )
-    {
-        __csched_vcpu_acct_start(svc);
-    }
-    else if ( csched_cpu_pick(current) != cpu )
+    if ( !list_empty(&svc->active_vcpu_elem) &&
+         csched_cpu_pick(current) != cpu )
     {
         CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
         CSCHED_STAT_CRANK(migrate_running);
@@ -582,6 +575,7 @@ csched_vcpu_init(struct vcpu *vc)
 
     INIT_LIST_HEAD(&svc->runq_elem);
     INIT_LIST_HEAD(&svc->active_vcpu_elem);
+    INIT_LIST_HEAD(&svc->inactive_vcpu_elem);
     svc->sdom = sdom;
     svc->vcpu = vc;
     atomic_set(&svc->credit, 0);
@@ -597,6 +591,16 @@ csched_vcpu_init(struct vcpu *vc)
             return -1;
     }
 
+    /* Add vcpu to inactive queue in order to start acct */
+    if ( !is_idle_vcpu(vc) )
+    {
+        unsigned long flags;
+
+        spin_lock_irqsave(&csched_priv.lock, flags);
+        list_add(&svc->inactive_vcpu_elem, &csched_priv.inactive_vcpu);
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+
     CSCHED_VCPU_CHECK(vc);
     return 0;
 }
@@ -617,6 +621,9 @@ csched_vcpu_destroy(struct vcpu *vc)
 
     if ( !list_empty(&svc->active_vcpu_elem) )
         __csched_vcpu_acct_stop_locked(svc);
+
+    if ( !list_empty(&svc->inactive_vcpu_elem) )
+        list_del_init(&svc->inactive_vcpu_elem);
 
     spin_unlock_irqrestore(&csched_priv.lock, flags);
 
@@ -835,6 +842,18 @@ csched_acct(void)
 
     spin_lock_irqsave(&csched_priv.lock, flags);
 
+    /* Add vcpu to active list when its credit were consumed by one tick. */
+    list_for_each_safe( iter_vcpu, next_vcpu, &csched_priv.inactive_vcpu )
+    {
+        svc = list_entry(iter_vcpu, struct csched_vcpu, inactive_vcpu_elem);
+
+        if ( atomic_read(&svc->credit)
+             <= CSCHED_CREDITS_PER_TICK * (CSCHED_TICKS_PER_ACCT - 1) )
+        {
+            __csched_vcpu_acct_start_locked(svc);
+        }
+    }
+
     weight_total = csched_priv.weight;
     credit_total = csched_priv.credit;
 
@@ -991,7 +1010,7 @@ csched_acct(void)
                 if ( credit > CSCHED_CREDITS_PER_TSLICE )
                 {
                     __csched_vcpu_acct_stop_locked(svc);
-                    credit = 0;
+                    credit = CSCHED_CREDITS_PER_TSLICE;
                     atomic_set(&svc->credit, credit);
                 }
             }
@@ -1353,6 +1372,17 @@ csched_dump(void)
             csched_dump_vcpu(svc);
         }
     }
+
+    printk("inactive vcpus:\n");
+    loop = 0;
+    list_for_each( iter_svc, &csched_priv.inactive_vcpu )
+    {
+        struct csched_vcpu *svc;
+        svc = list_entry(iter_svc, struct csched_vcpu, inactive_vcpu_elem);
+
+        printk("\t%3d: ", ++loop);
+        csched_dump_vcpu(svc);
+    }
 }
 
 static void
@@ -1360,6 +1390,7 @@ csched_init(void)
 {
     spin_lock_init(&csched_priv.lock);
     INIT_LIST_HEAD(&csched_priv.active_sdom);
+    INIT_LIST_HEAD(&csched_priv.inactive_vcpu);
     csched_priv.ncpus = 0;
     csched_priv.master = UINT_MAX;
     cpus_clear(csched_priv.idlers);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC][PATCH 3/4] sched: balance credits of each vcpu of a domain
  2008-12-18  2:57 [RFC][PATCH 0/4] Modification of credit scheduler rev2 NISHIGUCHI Naoki
  2008-12-18  3:00 ` [RFC][PATCH 1/4] sched: more accurate credit scheduling NISHIGUCHI Naoki
  2008-12-18  3:02 ` [RFC][PATCH 2/4] sched: change the handling of credits over upper bound NISHIGUCHI Naoki
@ 2008-12-18  3:04 ` NISHIGUCHI Naoki
  2008-12-18  3:06 ` [RFC][PATCH 4/4] sched: introduce boost credit for latency-sensitive domain NISHIGUCHI Naoki
  2009-01-13  8:10 ` [RFC][PATCH 0/4] Modification of credit scheduler rev2 Su, Disheng
  4 siblings, 0 replies; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2008-12-18  3:04 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Ian.Pratt, disheng.su, aviv, keir.fraser, sakaia

[-- Attachment #1: Type: text/plain, Size: 163 bytes --]

By applying this patch, the credit scheduler balances credits of each 
active vcpu of a domain.

There is no change in this patch.

Best regards,
Naoki Nishiguchi

[-- Attachment #2: credit_rev2_3_balance.patch --]
[-- Type: text/x-patch, Size: 1565 bytes --]

diff -r 1aec7bbd0ffc xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Wed Dec 17 16:01:52 2008 +0900
+++ b/xen/common/sched_credit.c	Wed Dec 17 16:02:29 2008 +0900
@@ -838,7 +838,8 @@ csched_acct(void)
     int credit_balance;
     int credit_xtra;
     int credit;
-
+    int64_t credit_sum;
+    int credit_average;
 
     spin_lock_irqsave(&csched_priv.lock, flags);
 
@@ -887,6 +888,18 @@ csched_acct(void)
         BUG_ON( sdom->active_vcpu_count == 0 );
         BUG_ON( sdom->weight == 0 );
         BUG_ON( sdom->weight > weight_left );
+
+        /* Compute the average of active VCPUs. */
+        credit_sum = 0;
+        list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+        {
+            svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+            BUG_ON( sdom != svc->sdom );
+
+            credit_sum += atomic_read(&svc->credit);
+        }
+        credit_average = ( credit_sum + (sdom->active_vcpu_count - 1)
+                         ) / sdom->active_vcpu_count;
 
         weight_left -= sdom->weight;
 
@@ -959,8 +972,9 @@ csched_acct(void)
             svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
             BUG_ON( sdom != svc->sdom );
 
-            /* Increment credit */
-            atomic_add(credit_fair, &svc->credit);
+            /* Balance and increment credit */
+            credit = atomic_read(&svc->credit);
+            atomic_add(credit_average - credit + credit_fair, &svc->credit);
             credit = atomic_read(&svc->credit);
 
             /*

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC][PATCH 4/4] sched: introduce boost credit for latency-sensitive domain
  2008-12-18  2:57 [RFC][PATCH 0/4] Modification of credit scheduler rev2 NISHIGUCHI Naoki
                   ` (2 preceding siblings ...)
  2008-12-18  3:04 ` [RFC][PATCH 3/4] sched: balance credits of each vcpu of a domain NISHIGUCHI Naoki
@ 2008-12-18  3:06 ` NISHIGUCHI Naoki
  2009-01-13  8:10 ` [RFC][PATCH 0/4] Modification of credit scheduler rev2 Su, Disheng
  4 siblings, 0 replies; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2008-12-18  3:06 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Ian.Pratt, disheng.su, aviv, keir.fraser, sakaia

[-- Attachment #1: Type: text/plain, Size: 877 bytes --]

I attached the following two patches.
   credit_rev2_4_boost_xen.patch  : modification to xen hypervisor
   credit_rev2_4_boost_tools.patch: modification to tools

By applying these two patches, boost credit is introduced to the credit
scheduler. The credit scheduler comes to be able to give priority to
latency-sensitive domain.

The differences between these patches and last patches are as follows.
- When a vcpu is waked up and set to BOOST state, add 
CSCHED_CREDITS_PER_TICK to boost_credit and subtract 
CSCHED_CREDITS_PER_TICK from credit. This prevents the vcpu from 
returning to UNDER state immediately. Especially dom0 is affected largely.
- Even if the vcpu has boost credit, if current tims slice is 2ms then 
don't send scheduler interrupt.
- If credit of a vcpu is subtracted over CSCHED_CREDITS_PER_TSLICE, 
adjust the credit.

Best regards,
Naoki Nishiguchi

[-- Attachment #2: credit_rev2_4_boost_xen.patch --]
[-- Type: text/x-patch, Size: 21187 bytes --]

diff -r 8bc795246b5b xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Wed Dec 17 16:02:47 2008 +0900
+++ b/xen/common/sched_credit.c	Wed Dec 17 16:04:43 2008 +0900
@@ -47,6 +47,7 @@
     (CSCHED_CREDITS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
 #define CSCHED_CREDITS_PER_ACCT     \
     (CSCHED_CREDITS_PER_TICK * CSCHED_TICKS_PER_ACCT)
+#define CSCHED_MSECS_PER_BOOST_TSLICE 2
 
 
 /*
@@ -189,6 +190,7 @@ struct csched_pcpu {
     struct timer ticker;
     unsigned int tick;
     s_time_t start_time;
+    s_time_t time_slice;
 };
 
 /*
@@ -201,6 +203,8 @@ struct csched_vcpu {
     struct csched_dom *sdom;
     struct vcpu *vcpu;
     atomic_t credit;
+    atomic_t boost_credit;
+    int prev_credit;
     uint16_t flags;
     int16_t pri;
 #ifdef CSCHED_STATS
@@ -225,6 +229,8 @@ struct csched_dom {
     uint16_t active_vcpu_count;
     uint16_t weight;
     uint16_t cap;
+    uint16_t boost_ratio;
+    uint16_t max_boost_period;
 };
 
 /*
@@ -239,8 +245,11 @@ struct csched_private {
     cpumask_t idlers;
     uint32_t weight;
     uint32_t credit;
+    uint32_t boost_credit;
+    uint16_t total_boost_ratio;
     int credit_balance;
     uint32_t runq_sort;
+    s_time_t boost_tslice;
     CSCHED_STATS_DEFINE()
 };
 
@@ -250,6 +259,10 @@ struct csched_private {
  */
 static struct csched_private csched_priv;
 
+/* opt_credit_tslice: time slice for BOOST priority */
+static unsigned int opt_credit_tslice = CSCHED_MSECS_PER_BOOST_TSLICE;
+integer_param("credit_tslice", opt_credit_tslice);
+
 static void csched_tick(void *_cpu);
 
 static inline int
@@ -304,6 +317,7 @@ __runq_tickle(unsigned int cpu, struct c
 {
     struct csched_vcpu * const cur =
         CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
+    struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
     cpumask_t mask;
 
     ASSERT(cur);
@@ -340,6 +354,16 @@ __runq_tickle(unsigned int cpu, struct c
             cpus_or(mask, mask, csched_priv.idlers);
             cpus_and(mask, mask, new->vcpu->cpu_affinity);
         }
+    }
+
+    /* If new VCPU has boost credit, signal the CPU. */
+    if ( cpus_empty(mask) &&
+         new->pri == CSCHED_PRI_TS_BOOST &&
+         spc->time_slice != csched_priv.boost_tslice &&
+         new->sdom->max_boost_period )
+    {
+        CSCHED_STAT_CRANK(tickle_local_other);
+        cpu_set(cpu, mask);
     }
 
     /* Send scheduler interrupts to designated CPUs */
@@ -503,6 +527,8 @@ __csched_vcpu_acct_start_locked(struct c
         {
             list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
             csched_priv.weight += sdom->weight;
+            csched_priv.boost_credit += (sdom->boost_ratio *
+                                         CSCHED_CREDITS_PER_TSLICE) / 100;
         }
     }
 }
@@ -525,6 +551,8 @@ __csched_vcpu_acct_stop_locked(struct cs
         BUG_ON( csched_priv.weight < sdom->weight );
         list_del_init(&sdom->active_sdom_elem);
         csched_priv.weight -= sdom->weight;
+        csched_priv.boost_credit -= (sdom->boost_ratio *
+                                     CSCHED_CREDITS_PER_TSLICE) / 100;
     }
 }
 
@@ -535,14 +563,6 @@ csched_vcpu_acct(unsigned int cpu)
 
     ASSERT( current->processor == cpu );
     ASSERT( svc->sdom != NULL );
-
-    /*
-     * If this VCPU's priority was boosted when it last awoke, reset it.
-     * If the VCPU is found here, then it's consuming a non-negligeable
-     * amount of CPU resources and should no longer be boosted.
-     */
-    if ( svc->pri == CSCHED_PRI_TS_BOOST )
-        svc->pri = CSCHED_PRI_TS_UNDER;
 
     /*
      * If it's been active a while, check if we'd be better off
@@ -579,6 +599,8 @@ csched_vcpu_init(struct vcpu *vc)
     svc->sdom = sdom;
     svc->vcpu = vc;
     atomic_set(&svc->credit, 0);
+    atomic_set(&svc->boost_credit, 0);
+    svc->prev_credit = 0;
     svc->flags = 0U;
     svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
     CSCHED_VCPU_STATS_RESET(svc);
@@ -693,6 +715,8 @@ csched_vcpu_wake(struct vcpu *vc)
          !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
     {
         svc->pri = CSCHED_PRI_TS_BOOST;
+        atomic_add(CSCHED_CREDITS_PER_TICK, &svc->boost_credit);
+        atomic_sub(CSCHED_CREDITS_PER_TICK, &svc->credit);
     }
 
     /* Put the VCPU on the runq and tickle CPUs */
@@ -712,25 +736,73 @@ csched_dom_cntl(
     {
         op->u.credit.weight = sdom->weight;
         op->u.credit.cap = sdom->cap;
+        op->u.credit.max_boost_period = sdom->max_boost_period;
+        op->u.credit.boost_ratio = sdom->boost_ratio;
     }
     else
     {
+        uint16_t weight = (uint16_t)~0U;
+
         ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
 
         spin_lock_irqsave(&csched_priv.lock, flags);
 
-        if ( op->u.credit.weight != 0 )
+        if ( (op->u.credit.weight != 0) &&
+             (sdom->boost_ratio == 0 || op->u.credit.boost_ratio == 0) )
+        {
+            weight = op->u.credit.weight;
+        }
+
+        if ( op->u.credit.cap != (uint16_t)~0U )
+            sdom->cap = op->u.credit.cap;
+
+        if ( (op->u.credit.max_boost_period != (uint16_t)~0U) &&
+             (op->u.credit.max_boost_period >= CSCHED_MSECS_PER_TICK ||
+              op->u.credit.max_boost_period == 0) )
+        {
+            sdom->max_boost_period = op->u.credit.max_boost_period;
+        }
+
+        if ( (op->u.credit.boost_ratio != (uint16_t)~0U) &&
+             ((csched_priv.total_boost_ratio - sdom->boost_ratio +
+               op->u.credit.boost_ratio) <= 100 * csched_priv.ncpus) &&
+             (sdom->max_boost_period || op->u.credit.boost_ratio == 0) )
+        {
+            uint16_t new_bc, old_bc;
+
+            new_bc = (op->u.credit.boost_ratio *
+                      CSCHED_CREDITS_PER_TSLICE) / 100;
+            old_bc = (sdom->boost_ratio *
+                      CSCHED_CREDITS_PER_TSLICE) / 100;
+
+            csched_priv.total_boost_ratio -= sdom->boost_ratio;
+            csched_priv.total_boost_ratio += op->u.credit.boost_ratio;
+
+            sdom->boost_ratio = op->u.credit.boost_ratio;
+
+            if ( !list_empty(&sdom->active_sdom_elem) )
+            {
+                csched_priv.boost_credit -= old_bc;
+                csched_priv.boost_credit += new_bc;
+            }
+            if ( new_bc == 0 )
+            {
+                if ( sdom->weight == 0 )
+                    weight = CSCHED_DEFAULT_WEIGHT;
+            }
+            else
+                weight = 0;
+        }
+
+        if ( weight != (uint16_t)~0U )
         {
             if ( !list_empty(&sdom->active_sdom_elem) )
             {
                 csched_priv.weight -= sdom->weight;
-                csched_priv.weight += op->u.credit.weight;
+                csched_priv.weight += weight;
             }
-            sdom->weight = op->u.credit.weight;
-        }
-
-        if ( op->u.credit.cap != (uint16_t)~0U )
-            sdom->cap = op->u.credit.cap;
+            sdom->weight = weight;
+        }
 
         spin_unlock_irqrestore(&csched_priv.lock, flags);
     }
@@ -759,6 +831,8 @@ csched_dom_init(struct domain *dom)
     sdom->dom = dom;
     sdom->weight = CSCHED_DEFAULT_WEIGHT;
     sdom->cap = 0U;
+    sdom->boost_ratio = 0U;
+    sdom->max_boost_period = 0U;
     dom->sched_priv = sdom;
 
     return 0;
@@ -774,15 +848,16 @@ csched_dom_destroy(struct domain *dom)
 /*
  * This is a O(n) optimized sort of the runq.
  *
- * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
- * through the runq and move up any UNDERs that are preceded by OVERS. We
- * remember the last UNDER to make the move up operation O(1).
+ * Time-share VCPUs can only be one of three priorities, BOOST, UNDER or OVER.
+ * We walk through the runq and move up any BOOSTs that are preceded by UNDERs
+ * or OVERs, and any UNDERs that are preceded by OVERS. We remember the last
+ * BOOST and UNDER to make the move up operation O(1).
  */
 static void
 csched_runq_sort(unsigned int cpu)
 {
     struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
-    struct list_head *runq, *elem, *next, *last_under;
+    struct list_head *runq, *elem, *next, *last_boost, *last_under;
     struct csched_vcpu *svc_elem;
     unsigned long flags;
     int sort_epoch;
@@ -797,14 +872,26 @@ csched_runq_sort(unsigned int cpu)
 
     runq = &spc->runq;
     elem = runq->next;
-    last_under = runq;
+    last_boost = last_under = runq;
 
     while ( elem != runq )
     {
         next = elem->next;
         svc_elem = __runq_elem(elem);
 
-        if ( svc_elem->pri >= CSCHED_PRI_TS_UNDER )
+        if ( svc_elem->pri == CSCHED_PRI_TS_BOOST )
+        {
+            /* does elem need to move up the runq? */
+            if ( elem->prev != last_boost )
+            {
+                list_del(elem);
+                list_add(elem, last_boost);
+            }
+            if ( last_boost == last_under )
+                last_under = elem;
+            last_boost = elem;
+        }
+        else if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
         {
             /* does elem need to move up the runq? */
             if ( elem->prev != last_under )
@@ -840,6 +927,14 @@ csched_acct(void)
     int credit;
     int64_t credit_sum;
     int credit_average;
+    /* for boost credit */
+    uint32_t bc_total;
+    uint32_t bc_fair;
+    int boost_credit;
+    int max_boost_credit;
+    int64_t bc_sum;
+    int bc_average;
+
 
     spin_lock_irqsave(&csched_priv.lock, flags);
 
@@ -848,8 +943,12 @@ csched_acct(void)
     {
         svc = list_entry(iter_vcpu, struct csched_vcpu, inactive_vcpu_elem);
 
-        if ( atomic_read(&svc->credit)
-             <= CSCHED_CREDITS_PER_TICK * (CSCHED_TICKS_PER_ACCT - 1) )
+        max_boost_credit = svc->sdom->max_boost_period *
+                           (CSCHED_CREDITS_PER_TSLICE/CSCHED_MSECS_PER_TSLICE);
+        if ( (atomic_read(&svc->credit)
+              <= CSCHED_CREDITS_PER_TICK * (CSCHED_TICKS_PER_ACCT - 1)) ||
+             (atomic_read(&svc->boost_credit)
+              <= (max_boost_credit - CSCHED_CREDITS_PER_TICK)) )
         {
             __csched_vcpu_acct_start_locked(svc);
         }
@@ -857,6 +956,7 @@ csched_acct(void)
 
     weight_total = csched_priv.weight;
     credit_total = csched_priv.credit;
+    bc_total = csched_priv.boost_credit;
 
     /* Converge balance towards 0 when it drops negative */
     if ( csched_priv.credit_balance < 0 )
@@ -865,7 +965,7 @@ csched_acct(void)
         CSCHED_STAT_CRANK(acct_balance);
     }
 
-    if ( unlikely(weight_total == 0) )
+    if ( unlikely(weight_total == 0 && bc_total == 0) )
     {
         csched_priv.credit_balance = 0;
         spin_unlock_irqrestore(&csched_priv.lock, flags);
@@ -880,26 +980,59 @@ csched_acct(void)
     credit_xtra = 0;
     credit_cap = 0U;
 
+    /* Firstly, subtract boost credits from credit_total. */
+    if ( bc_total != 0 )
+    {
+        credit_total -= bc_total;
+        credit_balance += bc_total;
+    }
+
+    /* Avoid 0 divide error */
+    if ( weight_total == 0 )
+        weight_total = 1;
+
     list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
     {
         sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
 
         BUG_ON( is_idle_domain(sdom->dom) );
         BUG_ON( sdom->active_vcpu_count == 0 );
-        BUG_ON( sdom->weight == 0 );
         BUG_ON( sdom->weight > weight_left );
 
-        /* Compute the average of active VCPUs. */
+        max_boost_credit = sdom->max_boost_period *
+                           (CSCHED_CREDITS_PER_TSLICE / CSCHED_MSECS_PER_TSLICE);
+
+        /*
+         *  Compute the average of active VCPUs
+         *  and adjust credit for comsumption too much.
+         */
         credit_sum = 0;
+        bc_sum = 0;
         list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
         {
+            int adjust;
+
             svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
             BUG_ON( sdom != svc->sdom );
 
+            credit = atomic_read(&svc->credit);
+            boost_credit = atomic_read(&svc->boost_credit);
+            adjust = svc->prev_credit - (credit + boost_credit)
+                   - CSCHED_CREDITS_PER_TSLICE;
+            if ( adjust > 0 )
+            {
+                if ( max_boost_credit != 0 )
+                    atomic_add(adjust, &svc->boost_credit);
+                else
+                    atomic_add(adjust, &svc->credit);
+            }
             credit_sum += atomic_read(&svc->credit);
+            bc_sum += atomic_read(&svc->boost_credit);
         }
         credit_average = ( credit_sum + (sdom->active_vcpu_count - 1)
                          ) / sdom->active_vcpu_count;
+        bc_average = ( bc_sum + (sdom->active_vcpu_count - 1)
+                     ) / sdom->active_vcpu_count;
 
         weight_left -= sdom->weight;
 
@@ -934,7 +1067,9 @@ csched_acct(void)
 
         if ( credit_fair < credit_peak )
         {
-            credit_xtra = 1;
+            /* credit_fair is 0 if weight is 0. */
+            if ( sdom->weight != 0 )
+                credit_xtra = 1;
         }
         else
         {
@@ -966,6 +1101,10 @@ csched_acct(void)
         credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
                       ) / sdom->active_vcpu_count;
 
+        /* Compute fair share of boost credit per VCPU */
+        bc_fair = ( ((sdom->boost_ratio * CSCHED_CREDITS_PER_ACCT)/100) +
+                    (sdom->active_vcpu_count - 1)
+                  ) / sdom->active_vcpu_count;
 
         list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
         {
@@ -976,6 +1115,54 @@ csched_acct(void)
             credit = atomic_read(&svc->credit);
             atomic_add(credit_average - credit + credit_fair, &svc->credit);
             credit = atomic_read(&svc->credit);
+
+            /* Balance and increment boost credit */
+            boost_credit = atomic_read(&svc->boost_credit);
+            atomic_add(bc_average - boost_credit + bc_fair, &svc->boost_credit);
+            boost_credit = atomic_read(&svc->boost_credit);
+
+            /*
+             * Upper bound on credits.
+             * Add excess to boost credit.
+             */
+            if ( credit > CSCHED_CREDITS_PER_TSLICE )
+            {
+                atomic_add(credit - CSCHED_CREDITS_PER_TSLICE,
+                           &svc->boost_credit);
+                boost_credit = atomic_read(&svc->boost_credit);
+                credit = CSCHED_CREDITS_PER_TSLICE;
+                atomic_set(&svc->credit, credit);
+            }
+            /*
+             * Upper bound on boost credits.
+             * Add excess to credit.
+             */
+            if ( boost_credit > max_boost_credit )
+            {
+                atomic_add(boost_credit - max_boost_credit, &svc->credit);
+                credit = atomic_read(&svc->credit);
+                boost_credit = max_boost_credit;
+                atomic_set(&svc->boost_credit, boost_credit);
+            }
+            /*
+             * If credit is negative,
+             * boost credits compensate credit.
+             */
+            if ( credit < 0 && boost_credit > 0 )
+            {
+                if ( boost_credit > -credit )
+                {
+                    atomic_sub(-credit, &svc->boost_credit);
+                    atomic_add(-credit, &svc->credit);
+                }
+                else
+                {
+                    atomic_sub(boost_credit, &svc->boost_credit);
+                    atomic_add(boost_credit, &svc->credit);
+                }
+                credit = atomic_read(&svc->credit);
+                boost_credit = atomic_read(&svc->boost_credit);
+            }
 
             /*
              * Recompute priority or, if VCPU is idling, remove it from
@@ -1005,7 +1192,10 @@ csched_acct(void)
             }
             else
             {
-                svc->pri = CSCHED_PRI_TS_UNDER;
+                if ( boost_credit > 0 )
+                    svc->pri = CSCHED_PRI_TS_BOOST;
+                else
+                    svc->pri = CSCHED_PRI_TS_UNDER;
 
                 /* Unpark any capped domains whose credits go positive */
                 if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
@@ -1020,18 +1210,36 @@ csched_acct(void)
                     svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
                 }
 
-                /* Upper bound on credits means VCPU stops earning */
+                /*
+                 * Upper bound on credits and boost credits means VCPU stops
+                 * earning
+                 */
                 if ( credit > CSCHED_CREDITS_PER_TSLICE )
                 {
-                    __csched_vcpu_acct_stop_locked(svc);
                     credit = CSCHED_CREDITS_PER_TSLICE;
                     atomic_set(&svc->credit, credit);
+
+                    if ( boost_credit >= max_boost_credit )
+                    {
+                        __csched_vcpu_acct_stop_locked(svc);
+                    }
                 }
             }
 
-            CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
-            CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
-            credit_balance += credit;
+            /* save credit for adjustment */
+            svc->prev_credit = credit + boost_credit;
+
+            if ( sdom->boost_ratio == 0 )
+            {
+                CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
+                CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
+                credit_balance += credit;
+            }
+            else
+            {
+                CSCHED_VCPU_STAT_SET(svc, credit_last, boost_credit);
+                CSCHED_VCPU_STAT_SET(svc, credit_incr, bc_fair);
+            }
         }
     }
 
@@ -1216,6 +1424,22 @@ csched_schedule(s_time_t now)
                ) /
                ( MILLISECS(CSCHED_MSECS_PER_TSLICE) /
                  CSCHED_CREDITS_PER_TSLICE );
+    if ( scurr->pri == CSCHED_PRI_TS_BOOST )
+    {
+        int boost_credit = atomic_read(&scurr->boost_credit);
+
+        if ( boost_credit > consumed )
+        {
+            atomic_sub(consumed, &scurr->boost_credit);
+            consumed = 0;
+        }
+        else
+        {
+            atomic_sub(boost_credit, &scurr->boost_credit);
+            consumed -= boost_credit;
+            scurr->pri = CSCHED_PRI_TS_UNDER;
+        }
+    }
     if ( consumed > 0 && !is_idle_vcpu(current) )
         atomic_sub(consumed, &scurr->credit);
 
@@ -1259,9 +1483,20 @@ csched_schedule(s_time_t now)
     /*
      * Return task to run next...
      */
-    ret.time = MILLISECS(CSCHED_MSECS_PER_TSLICE);
+    if ( snext->pri == CSCHED_PRI_TS_BOOST )
+    {
+        struct csched_vcpu * const svc = __runq_elem(runq->next);
+
+        if ( svc->pri == CSCHED_PRI_TS_BOOST )
+            ret.time = csched_priv.boost_tslice;
+        else
+            ret.time = MILLISECS(CSCHED_MSECS_PER_TICK);
+    }
+    else
+        ret.time = MILLISECS(CSCHED_MSECS_PER_TSLICE);
     ret.task = snext->vcpu;
 
+    spc->time_slice  = ret.time;
     spc->start_time = now;
 
     CSCHED_VCPU_CHECK(ret.task);
@@ -1282,7 +1517,11 @@ csched_dump_vcpu(struct csched_vcpu *svc
 
     if ( sdom )
     {
-        printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
+        printk(" credit=%i bc=%i [w=%u,bc=%i]",
+               atomic_read(&svc->credit),
+               atomic_read(&svc->boost_credit),
+               sdom->weight,
+               (sdom->boost_ratio * CSCHED_CREDITS_PER_TSLICE)/100);
 #ifdef CSCHED_STATS
         printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
                 svc->stats.credit_last,
@@ -1348,6 +1587,8 @@ csched_dump(void)
            "\tcredit balance     = %d\n"
            "\tweight             = %u\n"
            "\trunq_sort          = %u\n"
+           "\tboost_credit       = %u\n"
+           "\ttotal_boost_ratio  = %u\n"
            "\tdefault-weight     = %d\n"
            "\tmsecs per tick     = %dms\n"
            "\tcredits per tick   = %d\n"
@@ -1359,6 +1600,8 @@ csched_dump(void)
            csched_priv.credit_balance,
            csched_priv.weight,
            csched_priv.runq_sort,
+           csched_priv.boost_credit,
+           csched_priv.total_boost_ratio,
            CSCHED_DEFAULT_WEIGHT,
            CSCHED_MSECS_PER_TICK,
            CSCHED_CREDITS_PER_TICK,
@@ -1412,6 +1655,9 @@ csched_init(void)
     csched_priv.credit = 0U;
     csched_priv.credit_balance = 0;
     csched_priv.runq_sort = 0U;
+    csched_priv.boost_credit = 0;
+    csched_priv.total_boost_ratio = 0;
+    csched_priv.boost_tslice = MILLISECS(opt_credit_tslice);
     CSCHED_STATS_RESET();
 }
 
diff -r 8bc795246b5b xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Wed Dec 17 16:02:47 2008 +0900
+++ b/xen/include/public/domctl.h	Wed Dec 17 16:04:43 2008 +0900
@@ -311,6 +311,8 @@ struct xen_domctl_scheduler_op {
         struct xen_domctl_sched_credit {
             uint16_t weight;
             uint16_t cap;
+            uint16_t max_boost_period;
+            uint16_t boost_ratio;
         } credit;
     } u;
 };

[-- Attachment #3: credit_rev2_4_boost_tools.patch --]
[-- Type: text/x-patch, Size: 15698 bytes --]

diff -r 9dfd98cac0cc tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c	Wed Dec 17 16:05:11 2008 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c	Wed Dec 17 16:05:42 2008 +0900
@@ -1284,18 +1284,26 @@ static PyObject *pyxc_sched_credit_domai
     uint32_t domid;
     uint16_t weight;
     uint16_t cap;
-    static char *kwd_list[] = { "domid", "weight", "cap", NULL };
-    static char kwd_type[] = "I|HH";
+    uint16_t max_boost_period;
+    uint16_t boost_ratio;
+    static char *kwd_list[] = { "domid", "weight", "cap",
+                                "max_boost_period", "boost_ratio", NULL };
+    static char kwd_type[] = "I|HHhh";
     struct xen_domctl_sched_credit sdom;
     
     weight = 0;
     cap = (uint16_t)~0U;
+    max_boost_period = (uint16_t)~0U;
+    boost_ratio = (uint16_t)~0U;
     if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, 
-                                     &domid, &weight, &cap) )
+                                     &domid, &weight, &cap,
+                                     &max_boost_period, &boost_ratio) )
         return NULL;
 
     sdom.weight = weight;
     sdom.cap = cap;
+    sdom.max_boost_period = max_boost_period;
+    sdom.boost_ratio = boost_ratio;
 
     if ( xc_sched_credit_domain_set(self->xc_handle, domid, &sdom) != 0 )
         return pyxc_error_to_exception();
@@ -1315,9 +1323,11 @@ static PyObject *pyxc_sched_credit_domai
     if ( xc_sched_credit_domain_get(self->xc_handle, domid, &sdom) != 0 )
         return pyxc_error_to_exception();
 
-    return Py_BuildValue("{s:H,s:H}",
-                         "weight",  sdom.weight,
-                         "cap",     sdom.cap);
+    return Py_BuildValue("{s:H,s:H,s:i,s:i}",
+                         "weight",           sdom.weight,
+                         "cap",              sdom.cap,
+                         "max_boost_period", sdom.max_boost_period,
+                         "boost_ratio",      sdom.boost_ratio);
 }
 
 static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
@@ -1723,8 +1733,11 @@ static PyMethodDef pyxc_methods[] = {
       METH_KEYWORDS, "\n"
       "Set the scheduling parameters for a domain when running with the\n"
       "SMP credit scheduler.\n"
-      " domid     [int]:   domain id to set\n"
-      " weight    [short]: domain's scheduling weight\n"
+      " domid            [int]:   domain id to set\n"
+      " weight           [short]: domain's scheduling weight\n"
+      " cap              [short]: cap\n"
+      " max_boost_period [short]: upper limit in BOOST priority\n"
+      " boost_ratio      [short]; domain's boost ratio per a cpu\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
     { "sched_credit_domain_get",
@@ -1732,9 +1745,12 @@ static PyMethodDef pyxc_methods[] = {
       METH_VARARGS, "\n"
       "Get the scheduling parameters for a domain when running with the\n"
       "SMP credit scheduler.\n"
-      " domid     [int]:   domain id to get\n"
+      " domid            [int]:   domain id to get\n"
       "Returns:   [dict]\n"
-      " weight    [short]: domain's scheduling weight\n"},
+      " weight           [short]: domain's scheduling weight\n"
+      " cap              [short]: cap\n"
+      " max_boost_period [short]: upper limit in BOOST priority\n"
+      " boost_ratio      [short]: domain's boost ratio per a cpu\n"},
 
     { "evtchn_alloc_unbound", 
       (PyCFunction)pyxc_evtchn_alloc_unbound,
diff -r 9dfd98cac0cc tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py	Wed Dec 17 16:05:11 2008 +0900
+++ b/tools/python/xen/xend/XendAPI.py	Wed Dec 17 16:05:42 2008 +0900
@@ -1505,10 +1505,14 @@ class XendAPI(object):
 
         #need to update sched params aswell
         if 'weight' in xeninfo.info['vcpus_params'] \
-           and 'cap' in xeninfo.info['vcpus_params']:
+           and 'cap' in xeninfo.info['vcpus_params'] \
+           and 'max_boost_period' in xeninfo.info['vcpus_params'] \
+           and 'boost_ratio' in xeninfo.info['vcpus_params']:
             weight = xeninfo.info['vcpus_params']['weight']
             cap = xeninfo.info['vcpus_params']['cap']
-            xendom.domain_sched_credit_set(xeninfo.getDomid(), weight, cap)
+            max_boost_period = xeninfo.info['vcpus_params']['max_boost_period']
+            boost_ratio = xeninfo.info['vcpus_params']['boost_ratio']
+            xendom.domain_sched_credit_set(xeninfo.getDomid(), weight, cap, max_boost_period, boost_ratio)
 
     def VM_set_VCPUs_number_live(self, _, vm_ref, num):
         dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
diff -r 9dfd98cac0cc tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py	Wed Dec 17 16:05:11 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py	Wed Dec 17 16:05:42 2008 +0900
@@ -589,6 +589,10 @@ class XendConfig(dict):
             int(sxp.child_value(sxp_cfg, "cpu_weight", 256))
         cfg["vcpus_params"]["cap"] = \
             int(sxp.child_value(sxp_cfg, "cpu_cap", 0))
+        cfg["vcpus_params"]["max_boost_period"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_max_boost_period", 0))
+        cfg["vcpus_params"]["boost_ratio"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_boost_ratio", 0))
 
         # Only extract options we know about.
         extract_keys = LEGACY_UNSUPPORTED_BY_XENAPI_CFG + \
diff -r 9dfd98cac0cc tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py	Wed Dec 17 16:05:11 2008 +0900
+++ b/tools/python/xen/xend/XendDomain.py	Wed Dec 17 16:05:42 2008 +0900
@@ -1536,7 +1536,7 @@ class XendDomain:
 
         @param domid: Domain ID or Name
         @type domid: int or string.
-        @rtype: dict with keys 'weight' and 'cap'
+        @rtype: dict with keys 'weight' and 'cap' and 'max_boost_period' and 'boost_ratio'
         @return: credit scheduler parameters
         """
         dominfo = self.domain_lookup_nr(domid)
@@ -1549,20 +1549,26 @@ class XendDomain:
             except Exception, ex:
                 raise XendError(str(ex))
         else:
-            return {'weight' : dominfo.getWeight(),
-                    'cap'    : dominfo.getCap()} 
+            return {'weight'          : dominfo.getWeight(),
+                    'cap'             : dominfo.getCap(),
+                    'max_boost_period': dominfo.getMaxBoostPeriod(),
+                    'boost_ratio'    : dominfo.getBoostRatio()} 
     
-    def domain_sched_credit_set(self, domid, weight = None, cap = None):
+    def domain_sched_credit_set(self, domid, weight = None, cap = None, max_boost_period = None, boost_ratio = None):
         """Set credit scheduler parameters for a domain.
 
         @param domid: Domain ID or Name
         @type domid: int or string.
         @type weight: int
         @type cap: int
+        @type max_boost_period: int
+        @type boost_ratio: int
         @rtype: 0
         """
         set_weight = False
         set_cap = False
+        set_max_boost_period = False
+        set_boost_ratio = False
         dominfo = self.domain_lookup_nr(domid)
         if not dominfo:
             raise XendInvalidDomain(str(domid))
@@ -1581,17 +1587,37 @@ class XendDomain:
             else:
                 set_cap = True
 
+            if max_boost_period is None:
+                max_boost_period = int(~0)
+            elif max_boost_period < 0:
+                raise XendError("max_boost_period is out of range")
+            else:
+                set_max_boost_period = True
+
+            if boost_ratio is None:
+                boost_ratio = int(~0)
+            elif boost_ratio < 0:
+                raise XendError("boost_ratio is out of range")
+            else:
+                set_boost_ratio = True
+
             assert type(weight) == int
             assert type(cap) == int
+            assert type(max_boost_period) == int
+            assert type(boost_ratio) == int
 
             rc = 0
             if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
-                rc = xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap)
+                rc = xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap, max_boost_period, boost_ratio)
             if rc == 0:
                 if set_weight:
                     dominfo.setWeight(weight)
                 if set_cap:
                     dominfo.setCap(cap)
+                if set_max_boost_period:
+                    dominfo.setMaxBoostPeriod(max_boost_period)
+                if set_boost_ratio:
+                    dominfo.setBoostRatio(boost_ratio)
                 self.managed_config_save(dominfo)
             return rc
         except Exception, ex:
diff -r 9dfd98cac0cc tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py	Wed Dec 17 16:05:11 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py	Wed Dec 17 16:05:42 2008 +0900
@@ -465,7 +465,9 @@ class XendDomainInfo:
                 if xennode.xenschedinfo() == 'credit':
                     xendomains.domain_sched_credit_set(self.getDomid(),
                                                        self.getWeight(),
-                                                       self.getCap())
+                                                       self.getCap(),
+                                                       self.getMaxBoostPeriod(),
+                                                       self.getBoostRatio())
             except:
                 log.exception('VM start failed')
                 self.destroy()
@@ -1618,6 +1620,18 @@ class XendDomainInfo:
     def setWeight(self, cpu_weight):
         self.info['vcpus_params']['weight'] = cpu_weight
 
+    def getMaxBoostPeriod(self):
+        return self.info['vcpus_params']['max_boost_period']
+
+    def setMaxBoostPeriod(self, cpu_max_boost_period):
+        self.info['vcpus_params']['max_boost_period'] = cpu_max_boost_period
+
+    def getBoostRatio(self):
+        return self.info['vcpus_params']['boost_ratio']
+
+    def setBoostRatio(self, cpu_boost_ratio):
+        self.info['vcpus_params']['boost_ratio'] = cpu_boost_ratio
+
     def getRestartCount(self):
         return self._readVm('xend/restart_count')
 
diff -r 9dfd98cac0cc tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py	Wed Dec 17 16:05:11 2008 +0900
+++ b/tools/python/xen/xm/main.py	Wed Dec 17 16:05:42 2008 +0900
@@ -150,7 +150,7 @@ SUBCOMMAND_HELP = {
     'log'         : ('', 'Print Xend log'),
     'rename'      : ('<Domain> <NewDomainName>', 'Rename a domain.'),
     'sched-sedf'  : ('<Domain> [options]', 'Get/set EDF parameters.'),
-    'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]]]',
+    'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]|-m[=MAXBOOSTPERIOD]|-r[=BOOSTRATIO]]]',
                      'Get/set credit scheduler parameters.'),
     'sysrq'       : ('<Domain> <letter>', 'Send a sysrq to a domain.'),
     'debug-keys'  : ('<Keys>', 'Send debug keys to Xen.'),
@@ -240,6 +240,8 @@ SUBCOMMAND_OPTIONS = {
        ('-d DOMAIN', '--domain=DOMAIN', 'Domain to modify'),
        ('-w WEIGHT', '--weight=WEIGHT', 'Weight (int)'),
        ('-c CAP',    '--cap=CAP',       'Cap (int)'),
+       ('-m MAXBOOSTPERIOD', '--maxboostperiod=MAXBOOSTPERIOD', 'Upper limit of boost period (ms)'),
+       ('-r BOOSTRATIO', '--ratio=BOOSTRATIO', 'Boost ratio per a cpu (int)'),
     ),
     'list': (
        ('-l', '--long',         'Output all VM details in SXP'),
@@ -1578,8 +1580,8 @@ def xm_sched_credit(args):
     check_sched_type('credit')
 
     try:
-        opts, params = getopt.getopt(args, "d:w:c:",
-            ["domain=", "weight=", "cap="])
+        opts, params = getopt.getopt(args, "d:w:c:m:r:",
+            ["domain=", "weight=", "cap=", "maxboostperiod=", "ratio="])
     except getopt.GetoptError, opterr:
         err(opterr)
         usage('sched-credit')
@@ -1587,6 +1589,8 @@ def xm_sched_credit(args):
     domid = None
     weight = None
     cap = None
+    max_boost_period = None
+    boost_ratio = None
 
     for o, a in opts:
         if o in ["-d", "--domain"]:
@@ -1594,18 +1598,22 @@ def xm_sched_credit(args):
         elif o in ["-w", "--weight"]:
             weight = int(a)
         elif o in ["-c", "--cap"]:
-            cap = int(a);
+            cap = int(a)
+        elif o in ["-m", "--maxboostperiod"]:
+            max_boost_period = int(a)
+        elif o in ["-r", "--ratio"]:
+            boost_ratio = int(a);
 
     doms = filter(lambda x : domid_match(domid, x),
                   [parse_doms_info(dom)
                   for dom in getDomains(None, 'all')])
 
-    if weight is None and cap is None:
+    if weight is None and cap is None and max_boost_period is None and boost_ratio is None:
         if domid is not None and doms == []: 
             err("Domain '%s' does not exist." % domid)
             usage('sched-credit')
         # print header if we aren't setting any parameters
-        print '%-33s %4s %6s %4s' % ('Name','ID','Weight','Cap')
+        print '%-33s %4s %6s %4s %8s %5s' % ('Name','ID','Weight','Cap','Max(ms)','Ratio')
         
         for d in doms:
             try:
@@ -1618,16 +1626,18 @@ def xm_sched_credit(args):
             except xmlrpclib.Fault:
                 pass
 
-            if 'weight' not in info or 'cap' not in info:
+            if 'weight' not in info or 'cap' not in info or 'max_boost_period' not in info or 'boost_ratio' not in info:
                 # domain does not support sched-credit?
-                info = {'weight': -1, 'cap': -1}
+                info = {'weight': -1, 'cap': -1, 'max_boost_period':-1, 'boost_ratio':-1}
 
             info['weight'] = int(info['weight'])
             info['cap']    = int(info['cap'])
+            info['max_boost_period'] = int(info['max_boost_period'])
+            info['boost_ratio'] = int(info['boost_ratio'])
             
             info['name']  = d['name']
             info['domid'] = str(d['domid'])
-            print( ("%(name)-32s %(domid)5s %(weight)6d %(cap)4d") % info)
+            print( ("%(name)-32s %(domid)5s %(weight)6d %(cap)4d %(max_boost_period)8d %(boost_ratio)5d") % info)
     else:
         if domid is None:
             # place holder for system-wide scheduler parameters
@@ -1644,6 +1654,14 @@ def xm_sched_credit(args):
                     get_single_vm(domid),
                     "cap",
                     cap)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "max_boost_period",
+                    max_boost_period)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "boost_ratio",
+                    boost_ratio)
             else:
                 server.xenapi.VM.add_to_VCPUs_params(
                     get_single_vm(domid),
@@ -1653,8 +1671,16 @@ def xm_sched_credit(args):
                     get_single_vm(domid),
                     "cap",
                     cap)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "max_boost_period",
+                    max_boost_period)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "boost_ratio",
+                    boost_ratio)
         else:
-            result = server.xend.domain.sched_credit_set(domid, weight, cap)
+            result = server.xend.domain.sched_credit_set(domid, weight, cap, max_boost_period, boost_ratio)
             if result != 0:
                 err(str(result))
 

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2008-12-18  2:57 [RFC][PATCH 0/4] Modification of credit scheduler rev2 NISHIGUCHI Naoki
                   ` (3 preceding siblings ...)
  2008-12-18  3:06 ` [RFC][PATCH 4/4] sched: introduce boost credit for latency-sensitive domain NISHIGUCHI Naoki
@ 2009-01-13  8:10 ` Su, Disheng
  2009-01-15  2:04   ` NISHIGUCHI Naoki
  4 siblings, 1 reply; 17+ messages in thread
From: Su, Disheng @ 2009-01-13  8:10 UTC (permalink / raw)
  To: NISHIGUCHI Naoki, George Dunlap, xen-devel@lists.xensource.com
  Cc: Ian.Pratt@eu.citrix.com, Su, Disheng, aviv@neocleus.com,
	keir.fraser@eu.citrix.com, sakaia@jp.fujitsu.com

Hi Naoki,
	Thanks for your excellent work.
	This days, I tested the playing audio/video with your patches. With the default credit scheduler, the audio effect is really bad(a lot of audio glitches). But I got a better result with your patches. I list my findings here, FYI.

	1. What's the latency requirement for audio? I am not good at this one:) I find some links regarding to it( http://www.soundonsound.com/sos/jan05/articles/pcmusician.htm and http://www.podcomplex.com/blog/setting-buffers-and-latency-for-your-audio-interface/).  In native env, setting the buffer size of audio hardware to produce a latency of 23ms is acceptable even for many musicians. It's safe to say we have to schedule in the VM for each 23ms for such case in virtual env when playing audio in VM. Even worse for Vista, which has 10ms requirement ( http://blogs.technet.com/markrussinovich/archive/2007/08/27/1833290.aspx ). Apparently, the default credit scheduler can't handle well for this case.
	
	2. Test env:
		hardware: 
			Cpu: INTEL Core 2 Duo E6850
			Chipset: 82G33
			Memory: 2G
		software:
			Xen upstream(cs: 18881)
		doms configuration:
			guest A: primary HVM guest(integreted graphic card, sound, USB controller directly assigned), playing mp3 with WMP in foreground + copying large files(e.g. 2G) in background. 2 vcpus, 1G memory. Guest OS is Windows XP or Vista.
			guest B: secondary HVM guest(also copying large files in guest, no devices assigned). 2 vcpus, 128M memory. Guest OS is Windows XP.
	
	3. Configure the scheduler and Xen:
		a. the weight of guest B must be lower as much as possible(e.g. 10 for it, but 256 for guest A and dom0). Guest B is competing with Guest A for dom0. The lower the weight, the lesser chance to be scheduled in. 
		b. the boost credit needs to be larger as much as possible.(e.g 1000 for both primary guest and dom0). To make sure the guest A stays in boost priority longer when doing heavy I/O.
		c. vcpus of guest A need to be pinned to physical cpu. Without pinned and guest is smp, the scheduler will dynamically migrate vcpus between physcial cpus, and the audio glitches is also obvious. One of possible reason is high freq of migration and the small runtime when the vcpu be scheduled in. The migration rate is about 60~110 per second, and each migration has the migration cost(such as cache, TLB miss, etc..). And the runtime is small, 90% of runtime is less than 30us. It sounds not reasonable to migrate a vcpu, but it just runs for a tens of microseconds.
	With this configuration, both xp/vista guest works well, no glitches usually.
	
	4. issues left:
		a. Abrupt glitches are still generated when the QEMU emulated mouse being used and moving mouse quickly in guest A. Passing-through USB mouse/keyboard to guest A, then no glitches.
		b. vcpu migration. As said before, without vcpu pinned, glitches are obvious.
		c. the limitation of weight for guest B. I have to set the weight of guest B to 10. It may not be reasonable in real usage case.

	Do you have the experience with audio? I don't know I have properly configured your scheduler or not. Hope the your scheduler can solve the audio issues also.

NISHIGUCHI Naoki wrote:
> Hi all,
> 
> The patchset is revised version of patches that I was posted 10 days
> ago. This patchset is consist of the following 4 patches.
> 
> 1. Subtract credit consumed accurately and shorten cpu time per one
> credit 
> 2. Change the handling of credits over upper bound.
> 3. Balance credits of each vcpu of a domain
> 4. Introduce boost credit for latency-sensitive domain
> 
> It was not possible to separate these cleanly.
> Please apply these patches in numerical order.
> 
> Please review these patches.
> Any comments are appreciated.
> 
> Best regards,
> Naoki Nishiguchi



Best Regards,
Disheng, Su

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-13  8:10 ` [RFC][PATCH 0/4] Modification of credit scheduler rev2 Su, Disheng
@ 2009-01-15  2:04   ` NISHIGUCHI Naoki
  2009-01-15  2:56     ` Tian, Kevin
  2009-01-15  4:55     ` Su, Disheng
  0 siblings, 2 replies; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2009-01-15  2:04 UTC (permalink / raw)
  To: Su, Disheng, xen-devel@lists.xensource.com
  Cc: George Dunlap, Ian.Pratt@eu.citrix.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com, sakaia@jp.fujitsu.com

Hi Disheng,

Thank you for evaluating patches and reporting results.

Su, Disheng wrote:
> Hi Naoki,
> 	Thanks for your excellent work.
> 	This days, I tested the playing audio/video with your patches. With the default credit scheduler, the audio effect is really bad(a lot of audio glitches). But I got a better result with your patches. I list my findings here, FYI.
> 
> 	1. What's the latency requirement for audio? I am not good at this one:) I find some links regarding to it( http://www.soundonsound.com/sos/jan05/articles/pcmusician.htm and http://www.podcomplex.com/blog/setting-buffers-and-latency-for-your-audio-interface/).  In native env, setting the buffer size of audio hardware to produce a latency of 23ms is acceptable even for many musicians. It's safe to say we have to schedule in the VM for each 23ms for such case in virtual env when playing audio in VM. Even worse for Vista, which has 10ms requirement ( http://blogs.technet.com/markrussinovich/archive/2007/08/27/1833290.aspx ). Apparently, the default credit scheduler can't handle well for this case.

Thanks for your information.
I'll see these links.

> 	2. Test env:
> 		hardware: 
> 			Cpu: INTEL Core 2 Duo E6850
> 			Chipset: 82G33
> 			Memory: 2G
> 		software:
> 			Xen upstream(cs: 18881)
> 		doms configuration:
> 			guest A: primary HVM guest(integreted graphic card, sound, USB controller directly assigned), playing mp3 with WMP in foreground + copying large files(e.g. 2G) in background. 2 vcpus, 1G memory. Guest OS is Windows XP or Vista.
> 			guest B: secondary HVM guest(also copying large files in guest, no devices assigned). 2 vcpus, 128M memory. Guest OS is Windows XP.
> 	
> 	3. Configure the scheduler and Xen:
> 		a. the weight of guest B must be lower as much as possible(e.g. 10 for it, but 256 for guest A and dom0). Guest B is competing with Guest A for dom0. The lower the weight, the lesser chance to be scheduled in. 
> 		b. the boost credit needs to be larger as much as possible.(e.g 1000 for both primary guest and dom0). To make sure the guest A stays in boost priority longer when doing heavy I/O.
> 		c. vcpus of guest A need to be pinned to physical cpu. Without pinned and guest is smp, the scheduler will dynamically migrate vcpus between physcial cpus, and the audio glitches is also obvious. One of possible reason is high freq of migration and the small runtime when the vcpu be scheduled in. The migration rate is about 60~110 per second, and each migration has the migration cost(such as cache, TLB miss, etc..). And the runtime is small, 90% of runtime is less than 30us. It sounds not reasonable to migrate a vcpu, but it just runs for a tens of microseconds.
> 	With this configuration, both xp/vista guest works well, no glitches usually.
> 	
> 	4. issues left:
> 		a. Abrupt glitches are still generated when the QEMU emulated mouse being used and moving mouse quickly in guest A. Passing-through USB mouse/keyboard to guest A, then no glitches.

I also noticed that. Though I don't know the precise cause, I found that 
dom0 and guest A would consume largely CPU time (hundreds of 
milliseconds) in such situation. In this case, the priority of dom0 and 
guest A falls rapidly, then guest B runs until the priority of dom0 and 
guest A becomes BOOST. In worst case, it will take about 120ms.

I tried to solve this issue as follows, but the scheduler correctly 
didn't schedule according to the weight of a domain.
- In csched_schedule(), if a vcpu runs over current time slice then the 
time slice is subtracted from the vcpu's credit.

I think to need investigate deeply.

> 		b. vcpu migration. As said before, without vcpu pinned, glitches are obvious.

I think that this issue would be solved by adding the condition for 
migrating the vcpu.
e.g. If the vcpu has boost credit, don't migrate the vcpu.

I'll try to test.

> 		c. the limitation of weight for guest B. I have to set the weight of guest B to 10. It may not be reasonable in real usage case.

Is copying large files in background on guest A indispensable?
In my test, guest A runs only video playing.
I think that my approach couldn't solve this issue.

> 	Do you have the experience with audio? I don't know I have properly configured your scheduler or not. Hope the your scheduler can solve the audio issues also.

Sorry, I don't have the experience with audio.
But I'll try to reproduce your configuration and investigate.

Regards,
Naoki Nishiguchi

> 
> NISHIGUCHI Naoki wrote:
>> Hi all,
>>
>> The patchset is revised version of patches that I was posted 10 days
>> ago. This patchset is consist of the following 4 patches.
>>
>> 1. Subtract credit consumed accurately and shorten cpu time per one
>> credit 
>> 2. Change the handling of credits over upper bound.
>> 3. Balance credits of each vcpu of a domain
>> 4. Introduce boost credit for latency-sensitive domain
>>
>> It was not possible to separate these cleanly.
>> Please apply these patches in numerical order.
>>
>> Please review these patches.
>> Any comments are appreciated.
>>
>> Best regards,
>> Naoki Nishiguchi
> 
> 
> 
> Best Regards,
> Disheng, Su
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  2:04   ` NISHIGUCHI Naoki
@ 2009-01-15  2:56     ` Tian, Kevin
  2009-01-15  4:42       ` NISHIGUCHI Naoki
  2009-01-15  4:55     ` Su, Disheng
  1 sibling, 1 reply; 17+ messages in thread
From: Tian, Kevin @ 2009-01-15  2:56 UTC (permalink / raw)
  To: 'NISHIGUCHI Naoki', Su, Disheng,
	xen-devel@lists.xensource.com
  Cc: George Dunlap, Ian.Pratt@eu.citrix.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com, sakaia@jp.fujitsu.com

>From:NISHIGUCHI Naoki
>Sent: Thursday, January 15, 2009 10:05 AM
>> 	4. issues left:
>> 		a. Abrupt glitches are still generated when the 
>QEMU emulated mouse being used and moving mouse quickly in 
>guest A. Passing-through USB mouse/keyboard to guest A, then 
>no glitches.
>
>I also noticed that. Though I don't know the precise cause, I 
>found that 
>dom0 and guest A would consume largely CPU time (hundreds of 
>milliseconds) in such situation. In this case, the priority of 
>dom0 and 
>guest A falls rapidly, then guest B runs until the priority of 
>dom0 and 
>guest A becomes BOOST. In worst case, it will take about 120ms.

I remember that Disheng once told me that BOOST only happens
when vcpu is waken up and its current priority is UNDER. In your
case guest A should be in OVER after running hundreds of ms, 
and then it waits enough long time to become UNDER and then 
BOOST. If this is the case, your enhancement on BOOST level
seems only solving part of the latency issue. Here either assigning
a static priority, or adding more BOOST source (like event, intr,
etc) seems more complete solution.

>
>> 		b. vcpu migration. As said before, without vcpu 
>pinned, glitches are obvious.
>
>I think that this issue would be solved by adding the condition for 
>migrating the vcpu.
>e.g. If the vcpu has boost credit, don't migrate the vcpu.

Is it over-kill? how about you already get 3 BOOST vcpu in 
runqueue of current cpu, when other cpus are all running
OVER vcpus? Boost itself looks not the only determinative 
factor for migration, and instead what you concern is the 
relative priority in system wide.

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  2:56     ` Tian, Kevin
@ 2009-01-15  4:42       ` NISHIGUCHI Naoki
  2009-01-15  5:04         ` Tian, Kevin
  0 siblings, 1 reply; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2009-01-15  4:42 UTC (permalink / raw)
  To: Tian, Kevin, Su, Disheng, xen-devel@lists.xensource.com
  Cc: George Dunlap, Ian.Pratt@eu.citrix.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com, sakaia@jp.fujitsu.com

Hi, Kevin

Tian, Kevin wrote:
>> From:NISHIGUCHI Naoki
>> Sent: Thursday, January 15, 2009 10:05 AM
>>> 	4. issues left:
>>> 		a. Abrupt glitches are still generated when the 
>> QEMU emulated mouse being used and moving mouse quickly in 
>> guest A. Passing-through USB mouse/keyboard to guest A, then 
>> no glitches.
>>
>> I also noticed that. Though I don't know the precise cause, I 
>> found that 
>> dom0 and guest A would consume largely CPU time (hundreds of 
>> milliseconds) in such situation. In this case, the priority of 
>> dom0 and 
>> guest A falls rapidly, then guest B runs until the priority of 
>> dom0 and 
>> guest A becomes BOOST. In worst case, it will take about 120ms.
> 
> I remember that Disheng once told me that BOOST only happens
> when vcpu is waken up and its current priority is UNDER. In your
> case guest A should be in OVER after running hundreds of ms, 
> and then it waits enough long time to become UNDER and then 
> BOOST. If this is the case, your enhancement on BOOST level
> seems only solving part of the latency issue. Here either assigning
> a static priority, or adding more BOOST source (like event, intr,
> etc) seems more complete solution.

In my case, though the vcpu should be switched to other vcpu in time 
slice, the cpu running the vcpu doesn't schedule during hundreds of ms. 
I don't know why this happens.
In credit scheduler, credit consumed by the vcpu must be subtracted. 
Therefore I think it is correct that dom0 and guest A are OVER because 
my approach is to boost the vcpu within the range of weight.

I think assigning a static priority is one solution. However, I think 
that it affects credit accounting because we don't know how long the 
domain with the static priority (probably highest priority) is run.

About adding more BOOST source, could you explain more to me?

>>> 		b. vcpu migration. As said before, without vcpu 
>> pinned, glitches are obvious.
>>
>> I think that this issue would be solved by adding the condition for 
>> migrating the vcpu.
>> e.g. If the vcpu has boost credit, don't migrate the vcpu.
> 
> Is it over-kill? how about you already get 3 BOOST vcpu in 
> runqueue of current cpu, when other cpus are all running
> OVER vcpus? Boost itself looks not the only determinative 
> factor for migration, and instead what you concern is the 
> relative priority in system wide.

Yes, you are right.
I'll consider about runqueue of each cpu and so on.

Thanks for your advice.

Regards,
Naoki

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  2:04   ` NISHIGUCHI Naoki
  2009-01-15  2:56     ` Tian, Kevin
@ 2009-01-15  4:55     ` Su, Disheng
  2009-01-15  5:19       ` NISHIGUCHI Naoki
  1 sibling, 1 reply; 17+ messages in thread
From: Su, Disheng @ 2009-01-15  4:55 UTC (permalink / raw)
  To: NISHIGUCHI Naoki, xen-devel@lists.xensource.com
  Cc: Su, Disheng, George Dunlap, sakaia@jp.fujitsu.com,
	Ian.Pratt@eu.citrix.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com

NISHIGUCHI Naoki wrote:
> 
>> 		c. the limitation of weight for guest B. I have to set the weight
>> of guest B to 10. It may not be reasonable in real usage case. 
> 
> Is copying large files in background on guest A indispensable?
> In my test, guest A runs only video playing.
> I think that my approach couldn't solve this issue.

You know, guest A is the primary guest to end user, so we can't make any assumption about the user's operation in guest A, which is the big challenge for client virtualization IMO.
Weight, Cap, Boost credit, are all can be used together, or adding new mechanism, such as static priority as Kevin said, to solve the problem.

> 
>> 	Do you have the experience with audio? I don't know I have properly
>> configured your scheduler or not. Hope the your scheduler can solve
>> the audio issues also.  
> 
> Sorry, I don't have the experience with audio.
> But I'll try to reproduce your configuration and investigate.
> 

Glad to see you have interest with audio also. Any problem when you reproduce the audio issues, pls let me know.

> Regards,
> Naoki Nishiguchi
> 
>> 


Best Regards,
Disheng, Su

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  4:42       ` NISHIGUCHI Naoki
@ 2009-01-15  5:04         ` Tian, Kevin
  2009-01-15  6:05           ` NISHIGUCHI Naoki
  0 siblings, 1 reply; 17+ messages in thread
From: Tian, Kevin @ 2009-01-15  5:04 UTC (permalink / raw)
  To: 'NISHIGUCHI Naoki', Su, Disheng,
	xen-devel@lists.xensource.com
  Cc: George Dunlap, Ian.Pratt@eu.citrix.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com, sakaia@jp.fujitsu.com

>From: NISHIGUCHI Naoki [mailto:nisiguti@jp.fujitsu.com] 
>Sent: Thursday, January 15, 2009 12:43 PM
>
>Hi, Kevin
>
>Tian, Kevin wrote:
>>> From:NISHIGUCHI Naoki
>>> Sent: Thursday, January 15, 2009 10:05 AM
>>>> 	4. issues left:
>>>> 		a. Abrupt glitches are still generated when the 
>>> QEMU emulated mouse being used and moving mouse quickly in 
>>> guest A. Passing-through USB mouse/keyboard to guest A, then 
>>> no glitches.
>>>
>>> I also noticed that. Though I don't know the precise cause, I 
>>> found that 
>>> dom0 and guest A would consume largely CPU time (hundreds of 
>>> milliseconds) in such situation. In this case, the priority of 
>>> dom0 and 
>>> guest A falls rapidly, then guest B runs until the priority of 
>>> dom0 and 
>>> guest A becomes BOOST. In worst case, it will take about 120ms.
>> 
>> I remember that Disheng once told me that BOOST only happens
>> when vcpu is waken up and its current priority is UNDER. In your
>> case guest A should be in OVER after running hundreds of ms, 
>> and then it waits enough long time to become UNDER and then 
>> BOOST. If this is the case, your enhancement on BOOST level
>> seems only solving part of the latency issue. Here either assigning
>> a static priority, or adding more BOOST source (like event, intr,
>> etc) seems more complete solution.
>
>In my case, though the vcpu should be switched to other vcpu in time 
>slice, the cpu running the vcpu doesn't schedule during 
>hundreds of ms. 
>I don't know why this happens.

What's running within your guest B? Unless full cpu intensive workload
happens within guest B, there's chance for guest B to issue block 
hypercall once it enters idle loop, and then once it's blocked, Xen 
credit scheduler can pick dom0 or guest A anyway. So 1st thing you
could figure out the activity within guest B.

If guest B does be always busy, then you may need to check the 30ms
credit allocation algorithm in credit scheduler. It looks like some sequence
that guest A may be always granted as OVER priority due to its earlier
overrun, until guestB also overruns a similar length. Then in this punish
period, guest A has no chance to be boosted with all cycles granted to
guest B instead. if it's intended for fairness p.o.v, it may not suit for rt
usage.

>In credit scheduler, credit consumed by the vcpu must be subtracted. 
>Therefore I think it is correct that dom0 and guest A are OVER because 
>my approach is to boost the vcpu within the range of weight.
>
>I think assigning a static priority is one solution. However, I think 
>that it affects credit accounting because we don't know how long the 
>domain with the static priority (probably highest priority) is run.

It could be one configurable option for some client usages, where 
a coarse-level static priority could better ensure the deterministic
to satisfy specific rt requirement.

>
>About adding more BOOST source, could you explain more to me?

Current the only source for boost is the wakeup event on a vcpu
with UNDER priority to catch up which is simply from fairness p.o.v
But for vcpu with RT requirement, more boost sources can be added.
E.g. when audio interrupt (either emulated, or passthrough), boost 
target vcpu and trigger a reschedule softirq immediately to reduce 
uncertainty of schedule latency. We need such a manual boost 
interface which is then inserted into some critical event paths where
we believe immediate schedule is necessary. Disheng is working on
this area now, I think. :-)

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  4:55     ` Su, Disheng
@ 2009-01-15  5:19       ` NISHIGUCHI Naoki
  0 siblings, 0 replies; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2009-01-15  5:19 UTC (permalink / raw)
  To: Su, Disheng, xen-devel@lists.xensource.com
  Cc: George Dunlap, sakaia@jp.fujitsu.com, aviv@neocleus.com,
	Ian.Pratt@eu.citrix.com, keir.fraser@eu.citrix.com

Su, Disheng wrote:
> NISHIGUCHI Naoki wrote:
>>> 		c. the limitation of weight for guest B. I have to set the weight
>>> of guest B to 10. It may not be reasonable in real usage case. 
>> Is copying large files in background on guest A indispensable?
>> In my test, guest A runs only video playing.
>> I think that my approach couldn't solve this issue.
> 
> You know, guest A is the primary guest to end user, so we can't make any assumption about the user's operation in guest A, which is the big challenge for client virtualization IMO.
> Weight, Cap, Boost credit, are all can be used together, or adding new mechanism, such as static priority as Kevin said, to solve the problem.

I see.
That is really the big challenge.
I think we should experiment in various configuration and clarify some 
problems.

>>> 	Do you have the experience with audio? I don't know I have properly
>>> configured your scheduler or not. Hope the your scheduler can solve
>>> the audio issues also.  
>> Sorry, I don't have the experience with audio.
>> But I'll try to reproduce your configuration and investigate.
>>
> 
> Glad to see you have interest with audio also. Any problem when you reproduce the audio issues, pls let me know.

Thanks.

Regards,
Naoki

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  5:04         ` Tian, Kevin
@ 2009-01-15  6:05           ` NISHIGUCHI Naoki
  2009-01-15  6:41             ` Tian, Kevin
  0 siblings, 1 reply; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2009-01-15  6:05 UTC (permalink / raw)
  To: Tian, Kevin, xen-devel@lists.xensource.com
  Cc: Su, Disheng, George Dunlap, sakaia@jp.fujitsu.com,
	Ian.Pratt@eu.citrix.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com

Tian, Kevin wrote:
>>>>> 	4. issues left:
>>>>> 		a. Abrupt glitches are still generated when the 
>>>> QEMU emulated mouse being used and moving mouse quickly in 
>>>> guest A. Passing-through USB mouse/keyboard to guest A, then 
>>>> no glitches.
>>>>
>>>> I also noticed that. Though I don't know the precise cause, I 
>>>> found that 
>>>> dom0 and guest A would consume largely CPU time (hundreds of 
>>>> milliseconds) in such situation. In this case, the priority of 
>>>> dom0 and 
>>>> guest A falls rapidly, then guest B runs until the priority of 
>>>> dom0 and 
>>>> guest A becomes BOOST. In worst case, it will take about 120ms.
>>> I remember that Disheng once told me that BOOST only happens
>>> when vcpu is waken up and its current priority is UNDER. In your
>>> case guest A should be in OVER after running hundreds of ms, 
>>> and then it waits enough long time to become UNDER and then 
>>> BOOST. If this is the case, your enhancement on BOOST level
>>> seems only solving part of the latency issue. Here either assigning
>>> a static priority, or adding more BOOST source (like event, intr,
>>> etc) seems more complete solution.
>> In my case, though the vcpu should be switched to other vcpu in time 
>> slice, the cpu running the vcpu doesn't schedule during 
>> hundreds of ms. 
>> I don't know why this happens.
> 
> What's running within your guest B? Unless full cpu intensive workload
> happens within guest B, there's chance for guest B to issue block 
> hypercall once it enters idle loop, and then once it's blocked, Xen 
> credit scheduler can pick dom0 or guest A anyway. So 1st thing you
> could figure out the activity within guest B.
> 
> If guest B does be always busy, then you may need to check the 30ms
> credit allocation algorithm in credit scheduler. It looks like some sequence
> that guest A may be always granted as OVER priority due to its earlier
> overrun, until guestB also overruns a similar length. Then in this punish
> period, guest A has no chance to be boosted with all cycles granted to
> guest B instead. if it's intended for fairness p.o.v, it may not suit for rt
> usage.

Sorry, I didn't explain well.
I mean that softirq for scheduling (SCHEDULE_SOFTIRQ) might not occur 
during hundreds of ms. I found similar issue when connecting vncviewer 
to guest B. Guest B runs nothing. But I don't use Disheng's configuration.
I assumed that this issue (Disheng said) is the same issue as mine.

>> In credit scheduler, credit consumed by the vcpu must be subtracted. 
>> Therefore I think it is correct that dom0 and guest A are OVER because 
>> my approach is to boost the vcpu within the range of weight.
>>
>> I think assigning a static priority is one solution. However, I think 
>> that it affects credit accounting because we don't know how long the 
>> domain with the static priority (probably highest priority) is run.
> 
> It could be one configurable option for some client usages, where 
> a coarse-level static priority could better ensure the deterministic
> to satisfy specific rt requirement.

I see.

>> About adding more BOOST source, could you explain more to me?
> 
> Current the only source for boost is the wakeup event on a vcpu
> with UNDER priority to catch up which is simply from fairness p.o.v
> But for vcpu with RT requirement, more boost sources can be added.
> E.g. when audio interrupt (either emulated, or passthrough), boost 
> target vcpu and trigger a reschedule softirq immediately to reduce 
> uncertainty of schedule latency. We need such a manual boost 
> interface which is then inserted into some critical event paths where
> we believe immediate schedule is necessary. Disheng is working on
> this area now, I think. :-)

Thanks.

Regards,
Naoki

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  6:05           ` NISHIGUCHI Naoki
@ 2009-01-15  6:41             ` Tian, Kevin
  2009-01-15  7:01               ` NISHIGUCHI Naoki
  0 siblings, 1 reply; 17+ messages in thread
From: Tian, Kevin @ 2009-01-15  6:41 UTC (permalink / raw)
  To: 'NISHIGUCHI Naoki', xen-devel@lists.xensource.com
  Cc: Su, Disheng, George Dunlap, sakaia@jp.fujitsu.com,
	Ian.Pratt@eu.citrix.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com

>From: NISHIGUCHI Naoki [mailto:nisiguti@jp.fujitsu.com] 
>Sent: Thursday, January 15, 2009 2:06 PM
>> If guest B does be always busy, then you may need to check the 30ms
>> credit allocation algorithm in credit scheduler. It looks 
>like some sequence
>> that guest A may be always granted as OVER priority due to 
>its earlier
>> overrun, until guestB also overruns a similar length. Then 
>in this punish
>> period, guest A has no chance to be boosted with all cycles 
>granted to
>> guest B instead. if it's intended for fairness p.o.v, it may 
>not suit for rt
>> usage.
>
>Sorry, I didn't explain well.
>I mean that softirq for scheduling (SCHEDULE_SOFTIRQ) might not occur 
>during hundreds of ms. I found similar issue when connecting vncviewer 
>to guest B. Guest B runs nothing. But I don't use Disheng's 
>configuration.
>I assumed that this issue (Disheng said) is the same issue as mine.

Could you make sure of your statistics? Every schedule will have a
30ms timer set, regardless of whether current vcpu is repicked or a
new vcpu is chosen. s_timer_fn then issues SCHEDULE_SOFTIRQ
in 30ms interval.

My above writing is more about that time-sharing purpose for boost 
is not enough toward rt purpose.

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  6:41             ` Tian, Kevin
@ 2009-01-15  7:01               ` NISHIGUCHI Naoki
  2009-01-15  7:04                 ` Tian, Kevin
  0 siblings, 1 reply; 17+ messages in thread
From: NISHIGUCHI Naoki @ 2009-01-15  7:01 UTC (permalink / raw)
  To: Tian, Kevin, xen-devel@lists.xensource.com
  Cc: Su, Disheng, George Dunlap, Ian.Pratt@eu.citrix.com,
	sakaia@jp.fujitsu.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com

Tian, Kevin wrote:
>> From: NISHIGUCHI Naoki [mailto:nisiguti@jp.fujitsu.com] 
>> Sent: Thursday, January 15, 2009 2:06 PM
>>> If guest B does be always busy, then you may need to check the 30ms
>>> credit allocation algorithm in credit scheduler. It looks 
>> like some sequence
>>> that guest A may be always granted as OVER priority due to 
>> its earlier
>>> overrun, until guestB also overruns a similar length. Then 
>> in this punish
>>> period, guest A has no chance to be boosted with all cycles 
>> granted to
>>> guest B instead. if it's intended for fairness p.o.v, it may 
>> not suit for rt
>>> usage.
>> Sorry, I didn't explain well.
>> I mean that softirq for scheduling (SCHEDULE_SOFTIRQ) might not occur 
>> during hundreds of ms. I found similar issue when connecting vncviewer 
>> to guest B. Guest B runs nothing. But I don't use Disheng's 
>> configuration.
>> I assumed that this issue (Disheng said) is the same issue as mine.
> 
> Could you make sure of your statistics? Every schedule will have a
> 30ms timer set, regardless of whether current vcpu is repicked or a
> new vcpu is chosen. s_timer_fn then issues SCHEDULE_SOFTIRQ
> in 30ms interval.

When connecting vncviewer to guest B, s_timer_fn wasn't called in 30ms 
interval.

> My above writing is more about that time-sharing purpose for boost 
> is not enough toward rt purpose.

I agree that my approach is not enough for rt usage.

Regards,
Naoki

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: RE: [RFC][PATCH 0/4] Modification of credit scheduler rev2
  2009-01-15  7:01               ` NISHIGUCHI Naoki
@ 2009-01-15  7:04                 ` Tian, Kevin
  0 siblings, 0 replies; 17+ messages in thread
From: Tian, Kevin @ 2009-01-15  7:04 UTC (permalink / raw)
  To: 'NISHIGUCHI Naoki', xen-devel@lists.xensource.com
  Cc: Su, Disheng, George Dunlap, Ian.Pratt@eu.citrix.com,
	sakaia@jp.fujitsu.com, aviv@neocleus.com,
	keir.fraser@eu.citrix.com

>From: NISHIGUCHI Naoki [mailto:nisiguti@jp.fujitsu.com] 
>Sent: Thursday, January 15, 2009 3:02 PM
>
>When connecting vncviewer to guest B, s_timer_fn wasn't called in 30ms 
>interval.
>

Then I would think it as a bug possibly from screwed system time? :-)

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2009-01-15  7:04 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-12-18  2:57 [RFC][PATCH 0/4] Modification of credit scheduler rev2 NISHIGUCHI Naoki
2008-12-18  3:00 ` [RFC][PATCH 1/4] sched: more accurate credit scheduling NISHIGUCHI Naoki
2008-12-18  3:02 ` [RFC][PATCH 2/4] sched: change the handling of credits over upper bound NISHIGUCHI Naoki
2008-12-18  3:04 ` [RFC][PATCH 3/4] sched: balance credits of each vcpu of a domain NISHIGUCHI Naoki
2008-12-18  3:06 ` [RFC][PATCH 4/4] sched: introduce boost credit for latency-sensitive domain NISHIGUCHI Naoki
2009-01-13  8:10 ` [RFC][PATCH 0/4] Modification of credit scheduler rev2 Su, Disheng
2009-01-15  2:04   ` NISHIGUCHI Naoki
2009-01-15  2:56     ` Tian, Kevin
2009-01-15  4:42       ` NISHIGUCHI Naoki
2009-01-15  5:04         ` Tian, Kevin
2009-01-15  6:05           ` NISHIGUCHI Naoki
2009-01-15  6:41             ` Tian, Kevin
2009-01-15  7:01               ` NISHIGUCHI Naoki
2009-01-15  7:04                 ` Tian, Kevin
2009-01-15  4:55     ` Su, Disheng
2009-01-15  5:19       ` NISHIGUCHI Naoki
  -- strict thread matches above, loose matches on Subject: below --
2008-12-05 10:01 [RFC][PATCH 0/4] Modification of credit scheduler NISHIGUCHI Naoki
2008-12-08  8:53 ` [RFC][PATCH 4/4] sched: introduce boost credit for latency-sensitive domain NISHIGUCHI Naoki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.