xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Hui Lv <hui.lv@intel.com>
To: xen-devel@lists.xensource.com, George.Dunlap@eu.citrix.com,
	raistlin@linux.it, JBeulich@suse.com, Ian.Campbell@citrix.com
Cc: hui.lv@intel.com
Subject: [PATCH] xen/sched_credit: Use delay to control scheduling frequency
Date: Mon, 09 Jan 2012 05:22:36 -0500	[thread overview]
Message-ID: <fe8d0ca867aa280cbfb3.1326104556@wsm-ep-n0> (raw)

Updated the warning sentence for ratelimit_us.

This patch can improve Xen performance:
1. Basically, the "delay method" can achieve 11% overall performance boost for SPECvirt than original credit scheduler.
2. We have tried 1ms delay and 10ms delay, there is no big difference between these two configurations. (1ms is enough to achieve a good performance)
3. We have compared different load level response time/latency (low, high, peak), "delay method" didn't bring very much response time increase.
4. 1ms delay can reduce 30% context switch at peak performance, where produces the benefits. (int sched_ratelimit_us = 1000 is the recommended setting)

Signed-off-by: Hui Lv <hui.lv@intel.com>
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

Acked-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r a4bff36780a3 -r fe8d0ca867aa xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Fri Dec 16 18:46:27 2011 +0000
+++ b/xen/common/sched_credit.c	Mon Jan 09 05:21:35 2012 -0500
@@ -172,6 +172,7 @@ struct csched_private {
     uint32_t credit;
     int credit_balance;
     uint32_t runq_sort;
+    unsigned ratelimit_us;
     /* Period of master and tick in milliseconds */
     unsigned tslice_ms, tick_period_us, ticks_per_tslice;
     unsigned credits_per_tslice;
@@ -1297,10 +1298,15 @@ csched_schedule(
     struct csched_private *prv = CSCHED_PRIV(ops);
     struct csched_vcpu *snext;
     struct task_slice ret;
+    s_time_t runtime, tslice;
 
     CSCHED_STAT_CRANK(schedule);
     CSCHED_VCPU_CHECK(current);
 
+    runtime = now - current->runstate.state_entry_time;
+    if ( runtime < 0 ) /* Does this ever happen? */
+        runtime = 0;
+
     if ( !is_idle_vcpu(scurr->vcpu) )
     {
         /* Update credits of a non-idle VCPU. */
@@ -1313,6 +1319,35 @@ csched_schedule(
         scurr->pri = CSCHED_PRI_IDLE;
     }
 
+    /* Choices, choices:
+     * - If we have a tasklet, we need to run the idle vcpu no matter what.
+     * - If sched rate limiting is in effect, and the current vcpu has
+     *   run for less than that amount of time, continue the current one,
+     *   but with a shorter timeslice and return it immediately
+     * - Otherwise, chose the one with the highest priority (which may
+     *   be the one currently running)
+     * - If the currently running one is TS_OVER, see if there
+     *   is a higher priority one waiting on the runqueue of another
+     *   cpu and steal it.
+     */
+
+    /* If we have schedule rate limiting enabled, check to see
+     * how long we've run for. */
+    if ( !tasklet_work_scheduled
+         && prv->ratelimit_us
+         && vcpu_runnable(current)
+         && !is_idle_vcpu(current)
+         && runtime < MICROSECS(prv->ratelimit_us) )
+    {
+        snext = scurr;
+        snext->start_time += now;
+        perfc_incr(delay_ms);
+        tslice = MICROSECS(prv->ratelimit_us);
+        ret.migrated = 0;
+        goto out;
+    }
+    tslice = MILLISECS(prv->tslice_ms);
+
     /*
      * Select next runnable local VCPU (ie top of local runq)
      */
@@ -1367,11 +1402,12 @@ csched_schedule(
     if ( !is_idle_vcpu(snext->vcpu) )
         snext->start_time += now;
 
+out:
     /*
      * Return task to run next...
      */
     ret.time = (is_idle_vcpu(snext->vcpu) ?
-                -1 : MILLISECS(prv->tslice_ms));
+                -1 : tslice);
     ret.task = snext->vcpu;
 
     CSCHED_VCPU_CHECK(ret.task);
@@ -1533,6 +1569,15 @@ csched_init(struct scheduler *ops)
     prv->tick_period_us = prv->tslice_ms * 1000 / prv->ticks_per_tslice;
     prv->credits_per_tslice = CSCHED_CREDITS_PER_MSEC * prv->tslice_ms;
 
+    if ( MICROSECS(sched_ratelimit_us) > MILLISECS(sched_credit_tslice_ms) )
+    {
+        printk("WARNING: sched_ratelimit_us >" 
+               "sched_credit_tslice_ms is undefined\n"
+               "Setting ratelimit_us to 1000 * tslice_ms\n");
+        prv->ratelimit_us = 1000 * prv->tslice_ms;
+    }
+    else
+        prv->ratelimit_us = sched_ratelimit_us;
     return 0;
 }
 
diff -r a4bff36780a3 -r fe8d0ca867aa xen/common/schedule.c
--- a/xen/common/schedule.c	Fri Dec 16 18:46:27 2011 +0000
+++ b/xen/common/schedule.c	Mon Jan 09 05:21:35 2012 -0500
@@ -47,6 +47,11 @@ string_param("sched", opt_sched);
 bool_t sched_smt_power_savings = 0;
 boolean_param("sched_smt_power_savings", sched_smt_power_savings);
 
+/* Default scheduling rate limit: 1ms 
+ * The behavior when sched_ratelimit_us is greater than sched_credit_tslice_ms is undefined
+ * */
+int sched_ratelimit_us = 1000;
+integer_param("sched_ratelimit_us", sched_ratelimit_us);
 /* Various timer handlers. */
 static void s_timer_fn(void *unused);
 static void vcpu_periodic_timer_fn(void *data);
diff -r a4bff36780a3 -r fe8d0ca867aa xen/include/xen/perfc_defn.h
--- a/xen/include/xen/perfc_defn.h	Fri Dec 16 18:46:27 2011 +0000
+++ b/xen/include/xen/perfc_defn.h	Mon Jan 09 05:21:35 2012 -0500
@@ -16,6 +16,7 @@ PERFCOUNTER(sched_irq,              "sch
 PERFCOUNTER(sched_run,              "sched: runs through scheduler")
 PERFCOUNTER(sched_ctx,              "sched: context switches")
 
+PERFCOUNTER(delay_ms,               "csched: delay")
 PERFCOUNTER(vcpu_check,             "csched: vcpu_check")
 PERFCOUNTER(schedule,               "csched: schedule")
 PERFCOUNTER(acct_run,               "csched: acct_run")
diff -r a4bff36780a3 -r fe8d0ca867aa xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h	Fri Dec 16 18:46:27 2011 +0000
+++ b/xen/include/xen/sched-if.h	Mon Jan 09 05:21:35 2012 -0500
@@ -16,6 +16,11 @@ extern struct cpupool *cpupool0;
 /* cpus currently in no cpupool */
 extern cpumask_t cpupool_free_cpus;
 
+/* Scheduler generic parameters
+ * */
+extern int sched_ratelimit_us;
+
+
 /*
  * In order to allow a scheduler to remap the lock->cpu mapping,
  * we have a per-cpu pointer, along with a pre-allocated set of

             reply	other threads:[~2012-01-09 10:22 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-09 10:22 Hui Lv [this message]
2012-01-10 10:05 ` [PATCH] xen/sched_credit: Use delay to control scheduling frequency George Dunlap
2012-01-24 12:01   ` George Dunlap
2012-01-24 14:17     ` Keir Fraser
2012-01-24 14:31       ` George Dunlap
  -- strict thread matches above, loose matches on Subject: below --
2011-12-26  8:46 Hui Lv
2012-01-02  8:19 ` Jan Beulich
2012-01-05  4:08   ` Lv, Hui
2012-01-06 19:50   ` George Dunlap
2012-01-06 19:56 ` George Dunlap
2012-01-08 12:03   ` Lv, Hui
2012-01-09  9:49     ` George Dunlap
2011-12-19 22:13 Hui Lv
2011-12-20  9:07 ` Ian Campbell
2011-12-20  9:44   ` Lv, Hui
2011-12-20 10:09     ` Dario Faggioli
2011-12-20 10:26       ` George Dunlap
2011-12-20 10:51         ` Dario Faggioli
2011-12-20 12:14         ` Lv, Hui
2011-12-20 10:21   ` George Dunlap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fe8d0ca867aa280cbfb3.1326104556@wsm-ep-n0 \
    --to=hui.lv@intel.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=raistlin@linux.it \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).