From mboxrd@z Thu Jan  1 00:00:00 1970
From: Keir Fraser <keir@xen.org>
Subject: Re: [PATCH] xen/sched_credit: Use delay to control
 scheduling frequency
Date: Tue, 24 Jan 2012 14:17:40 +0000
Message-ID: <CB446E04.38059%keir@xen.org>
References: <CAFLBxZYa=yWp2aBrbN09MLyqhqEM86=zHAeQDuEEHeQY-6vJOg@mail.gmail.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="iso-8859-1"
Content-Transfer-Encoding: quoted-printable
Return-path: <xen-devel-bounces@lists.xensource.com>
In-Reply-To: <CAFLBxZYa=yWp2aBrbN09MLyqhqEM86=zHAeQDuEEHeQY-6vJOg@mail.gmail.com>
List-Unsubscribe: <http://lists.xensource.com/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xensource.com>
List-Help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-Subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
Sender: xen-devel-bounces@lists.xensource.com
Errors-To: xen-devel-bounces@lists.xensource.com
To: George Dunlap <George.Dunlap@eu.citrix.com>
Cc: xen-devel@lists.xensource.com, raistlin@linux.it, JBeulich@suse.com
List-Id: xen-devel@lists.xenproject.org

On 24/01/2012 12:01, "George Dunlap" <George.Dunlap@eu.citrix.com> wrote:

> Ping?

It's been in the tree for a week.

> On Tue, Jan 10, 2012 at 10:05 AM, George Dunlap
> <George.Dunlap@eu.citrix.com> wrote:
>> On Mon, Jan 9, 2012 at 10:22 AM, Hui Lv <hui.lv@intel.com> wrote:
>>> Updated the warning sentence for ratelimit_us.
>>> =

>>> This patch can improve Xen performance:
>>> 1. Basically, the "delay method" can achieve 11% overall performance bo=
ost
>>> for SPECvirt than original credit scheduler.
>>> 2. We have tried 1ms delay and 10ms delay, there is no big difference
>>> between these two configurations. (1ms is enough to achieve a good
>>> performance)
>>> 3. We have compared different load level response time/latency (low, hi=
gh,
>>> peak), "delay method" didn't bring very much response time increase.
>>> 4. 1ms delay can reduce 30% context switch at peak performance, where
>>> produces the benefits. (int sched_ratelimit_us =3D 1000 is the recommen=
ded
>>> setting)
>>> =

>>> Signed-off-by: Hui Lv <hui.lv@intel.com>
>>> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
>>> =

>>> Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
>> =

>> Just confirming this:
>> Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
>> =

>> Thanks, Hui.
>> =A0-George
>> =

>>> =

>>> diff -r a4bff36780a3 -r fe8d0ca867aa xen/common/sched_credit.c
>>> --- a/xen/common/sched_credit.c Fri Dec 16 18:46:27 2011 +0000
>>> +++ b/xen/common/sched_credit.c Mon Jan 09 05:21:35 2012 -0500
>>> @@ -172,6 +172,7 @@ struct csched_private {
>>> =A0 =A0 uint32_t credit;
>>> =A0 =A0 int credit_balance;
>>> =A0 =A0 uint32_t runq_sort;
>>> + =A0 =A0unsigned ratelimit_us;
>>> =A0 =A0 /* Period of master and tick in milliseconds */
>>> =A0 =A0 unsigned tslice_ms, tick_period_us, ticks_per_tslice;
>>> =A0 =A0 unsigned credits_per_tslice;
>>> @@ -1297,10 +1298,15 @@ csched_schedule(
>>> =A0 =A0 struct csched_private *prv =3D CSCHED_PRIV(ops);
>>> =A0 =A0 struct csched_vcpu *snext;
>>> =A0 =A0 struct task_slice ret;
>>> + =A0 =A0s_time_t runtime, tslice;
>>> =

>>> =A0 =A0 CSCHED_STAT_CRANK(schedule);
>>> =A0 =A0 CSCHED_VCPU_CHECK(current);
>>> =

>>> + =A0 =A0runtime =3D now - current->runstate.state_entry_time;
>>> + =A0 =A0if ( runtime < 0 ) /* Does this ever happen? */
>>> + =A0 =A0 =A0 =A0runtime =3D 0;
>>> +
>>> =A0 =A0 if ( !is_idle_vcpu(scurr->vcpu) )
>>> =A0 =A0 {
>>> =A0 =A0 =A0 =A0 /* Update credits of a non-idle VCPU. */
>>> @@ -1313,6 +1319,35 @@ csched_schedule(
>>> =A0 =A0 =A0 =A0 scurr->pri =3D CSCHED_PRI_IDLE;
>>> =A0 =A0 }
>>> =

>>> + =A0 =A0/* Choices, choices:
>>> + =A0 =A0 * - If we have a tasklet, we need to run the idle vcpu no mat=
ter what.
>>> + =A0 =A0 * - If sched rate limiting is in effect, and the current vcpu=
 has
>>> + =A0 =A0 * =A0 run for less than that amount of time, continue the cur=
rent one,
>>> + =A0 =A0 * =A0 but with a shorter timeslice and return it immediately
>>> + =A0 =A0 * - Otherwise, chose the one with the highest priority (which=
 may
>>> + =A0 =A0 * =A0 be the one currently running)
>>> + =A0 =A0 * - If the currently running one is TS_OVER, see if there
>>> + =A0 =A0 * =A0 is a higher priority one waiting on the runqueue of ano=
ther
>>> + =A0 =A0 * =A0 cpu and steal it.
>>> + =A0 =A0 */
>>> +
>>> + =A0 =A0/* If we have schedule rate limiting enabled, check to see
>>> + =A0 =A0 * how long we've run for. */
>>> + =A0 =A0if ( !tasklet_work_scheduled
>>> + =A0 =A0 =A0 =A0 && prv->ratelimit_us
>>> + =A0 =A0 =A0 =A0 && vcpu_runnable(current)
>>> + =A0 =A0 =A0 =A0 && !is_idle_vcpu(current)
>>> + =A0 =A0 =A0 =A0 && runtime < MICROSECS(prv->ratelimit_us) )
>>> + =A0 =A0{
>>> + =A0 =A0 =A0 =A0snext =3D scurr;
>>> + =A0 =A0 =A0 =A0snext->start_time +=3D now;
>>> + =A0 =A0 =A0 =A0perfc_incr(delay_ms);
>>> + =A0 =A0 =A0 =A0tslice =3D MICROSECS(prv->ratelimit_us);
>>> + =A0 =A0 =A0 =A0ret.migrated =3D 0;
>>> + =A0 =A0 =A0 =A0goto out;
>>> + =A0 =A0}
>>> + =A0 =A0tslice =3D MILLISECS(prv->tslice_ms);
>>> +
>>> =A0 =A0 /*
>>> =A0 =A0 =A0* Select next runnable local VCPU (ie top of local runq)
>>> =A0 =A0 =A0*/
>>> @@ -1367,11 +1402,12 @@ csched_schedule(
>>> =A0 =A0 if ( !is_idle_vcpu(snext->vcpu) )
>>> =A0 =A0 =A0 =A0 snext->start_time +=3D now;
>>> =

>>> +out:
>>> =A0 =A0 /*
>>> =A0 =A0 =A0* Return task to run next...
>>> =A0 =A0 =A0*/
>>> =A0 =A0 ret.time =3D (is_idle_vcpu(snext->vcpu) ?
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0-1 : MILLISECS(prv->tslice_ms));
>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0-1 : tslice);
>>> =A0 =A0 ret.task =3D snext->vcpu;
>>> =

>>> =A0 =A0 CSCHED_VCPU_CHECK(ret.task);
>>> @@ -1533,6 +1569,15 @@ csched_init(struct scheduler *ops)
>>> =A0 =A0 prv->tick_period_us =3D prv->tslice_ms * 1000 / prv->ticks_per_=
tslice;
>>> =A0 =A0 prv->credits_per_tslice =3D CSCHED_CREDITS_PER_MSEC * prv->tsli=
ce_ms;
>>> =

>>> + =A0 =A0if ( MICROSECS(sched_ratelimit_us) > MILLISECS(sched_credit_ts=
lice_ms)
>>> )
>>> + =A0 =A0{
>>> + =A0 =A0 =A0 =A0printk("WARNING: sched_ratelimit_us >"
>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 "sched_credit_tslice_ms is undefined\n"
>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 "Setting ratelimit_us to 1000 * tslice_ms=
\n");
>>> + =A0 =A0 =A0 =A0prv->ratelimit_us =3D 1000 * prv->tslice_ms;
>>> + =A0 =A0}
>>> + =A0 =A0else
>>> + =A0 =A0 =A0 =A0prv->ratelimit_us =3D sched_ratelimit_us;
>>> =A0 =A0 return 0;
>>> =A0}
>>> =

>>> diff -r a4bff36780a3 -r fe8d0ca867aa xen/common/schedule.c
>>> --- a/xen/common/schedule.c =A0 =A0 Fri Dec 16 18:46:27 2011 +0000
>>> +++ b/xen/common/schedule.c =A0 =A0 Mon Jan 09 05:21:35 2012 -0500
>>> @@ -47,6 +47,11 @@ string_param("sched", opt_sched);
>>> =A0bool_t sched_smt_power_savings =3D 0;
>>> =A0boolean_param("sched_smt_power_savings", sched_smt_power_savings);
>>> =

>>> +/* Default scheduling rate limit: 1ms
>>> + * The behavior when sched_ratelimit_us is greater than
>>> sched_credit_tslice_ms is undefined
>>> + * */
>>> +int sched_ratelimit_us =3D 1000;
>>> +integer_param("sched_ratelimit_us", sched_ratelimit_us);
>>> =A0/* Various timer handlers. */
>>> =A0static void s_timer_fn(void *unused);
>>> =A0static void vcpu_periodic_timer_fn(void *data);
>>> diff -r a4bff36780a3 -r fe8d0ca867aa xen/include/xen/perfc_defn.h
>>> --- a/xen/include/xen/perfc_defn.h =A0 =A0 =A0Fri Dec 16 18:46:27 2011 =
+0000
>>> +++ b/xen/include/xen/perfc_defn.h =A0 =A0 =A0Mon Jan 09 05:21:35 2012 =
-0500
>>> @@ -16,6 +16,7 @@ PERFCOUNTER(sched_irq, =A0 =A0 =A0 =A0 =A0 =A0 =A0"sch
>>> =A0PERFCOUNTER(sched_run, =A0 =A0 =A0 =A0 =A0 =A0 =A0"sched: runs throu=
gh scheduler")
>>> =A0PERFCOUNTER(sched_ctx, =A0 =A0 =A0 =A0 =A0 =A0 =A0"sched: context sw=
itches")
>>> =

>>> +PERFCOUNTER(delay_ms, =A0 =A0 =A0 =A0 =A0 =A0 =A0 "csched: delay")
>>> =A0PERFCOUNTER(vcpu_check, =A0 =A0 =A0 =A0 =A0 =A0 "csched: vcpu_check")
>>> =A0PERFCOUNTER(schedule, =A0 =A0 =A0 =A0 =A0 =A0 =A0 "csched: schedule")
>>> =A0PERFCOUNTER(acct_run, =A0 =A0 =A0 =A0 =A0 =A0 =A0 "csched: acct_run")
>>> diff -r a4bff36780a3 -r fe8d0ca867aa xen/include/xen/sched-if.h
>>> --- a/xen/include/xen/sched-if.h =A0 =A0 =A0 =A0Fri Dec 16 18:46:27 201=
1 +0000
>>> +++ b/xen/include/xen/sched-if.h =A0 =A0 =A0 =A0Mon Jan 09 05:21:35 201=
2 -0500
>>> @@ -16,6 +16,11 @@ extern struct cpupool *cpupool0;
>>> =A0/* cpus currently in no cpupool */
>>> =A0extern cpumask_t cpupool_free_cpus;
>>> =

>>> +/* Scheduler generic parameters
>>> + * */
>>> +extern int sched_ratelimit_us;
>>> +
>>> +
>>> =A0/*
>>> =A0* In order to allow a scheduler to remap the lock->cpu mapping,
>>> =A0* we have a per-cpu pointer, along with a pre-allocated set of
>>> =

>>> _______________________________________________
>>> Xen-devel mailing list
>>> Xen-devel@lists.xensource.com
>>> http://lists.xensource.com/xen-devel