From mboxrd@z Thu Jan 1 00:00:00 1970 From: Keir Fraser Subject: Re: [PATCH] xen/sched_credit: Use delay to control scheduling frequency Date: Tue, 24 Jan 2012 14:17:40 +0000 Message-ID: References: Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: George Dunlap Cc: xen-devel@lists.xensource.com, raistlin@linux.it, JBeulich@suse.com List-Id: xen-devel@lists.xenproject.org On 24/01/2012 12:01, "George Dunlap" wrote: > Ping? It's been in the tree for a week. > On Tue, Jan 10, 2012 at 10:05 AM, George Dunlap > wrote: >> On Mon, Jan 9, 2012 at 10:22 AM, Hui Lv wrote: >>> Updated the warning sentence for ratelimit_us. >>> = >>> This patch can improve Xen performance: >>> 1. Basically, the "delay method" can achieve 11% overall performance bo= ost >>> for SPECvirt than original credit scheduler. >>> 2. We have tried 1ms delay and 10ms delay, there is no big difference >>> between these two configurations. (1ms is enough to achieve a good >>> performance) >>> 3. We have compared different load level response time/latency (low, hi= gh, >>> peak), "delay method" didn't bring very much response time increase. >>> 4. 1ms delay can reduce 30% context switch at peak performance, where >>> produces the benefits. (int sched_ratelimit_us =3D 1000 is the recommen= ded >>> setting) >>> = >>> Signed-off-by: Hui Lv >>> Signed-off-by: George Dunlap >>> = >>> Acked-by: George Dunlap >> = >> Just confirming this: >> Acked-by: George Dunlap >> = >> Thanks, Hui. >> =A0-George >> = >>> = >>> diff -r a4bff36780a3 -r fe8d0ca867aa xen/common/sched_credit.c >>> --- a/xen/common/sched_credit.c Fri Dec 16 18:46:27 2011 +0000 >>> +++ b/xen/common/sched_credit.c Mon Jan 09 05:21:35 2012 -0500 >>> @@ -172,6 +172,7 @@ struct csched_private { >>> =A0 =A0 uint32_t credit; >>> =A0 =A0 int credit_balance; >>> =A0 =A0 uint32_t runq_sort; >>> + =A0 =A0unsigned ratelimit_us; >>> =A0 =A0 /* Period of master and tick in milliseconds */ >>> =A0 =A0 unsigned tslice_ms, tick_period_us, ticks_per_tslice; >>> =A0 =A0 unsigned credits_per_tslice; >>> @@ -1297,10 +1298,15 @@ csched_schedule( >>> =A0 =A0 struct csched_private *prv =3D CSCHED_PRIV(ops); >>> =A0 =A0 struct csched_vcpu *snext; >>> =A0 =A0 struct task_slice ret; >>> + =A0 =A0s_time_t runtime, tslice; >>> = >>> =A0 =A0 CSCHED_STAT_CRANK(schedule); >>> =A0 =A0 CSCHED_VCPU_CHECK(current); >>> = >>> + =A0 =A0runtime =3D now - current->runstate.state_entry_time; >>> + =A0 =A0if ( runtime < 0 ) /* Does this ever happen? */ >>> + =A0 =A0 =A0 =A0runtime =3D 0; >>> + >>> =A0 =A0 if ( !is_idle_vcpu(scurr->vcpu) ) >>> =A0 =A0 { >>> =A0 =A0 =A0 =A0 /* Update credits of a non-idle VCPU. */ >>> @@ -1313,6 +1319,35 @@ csched_schedule( >>> =A0 =A0 =A0 =A0 scurr->pri =3D CSCHED_PRI_IDLE; >>> =A0 =A0 } >>> = >>> + =A0 =A0/* Choices, choices: >>> + =A0 =A0 * - If we have a tasklet, we need to run the idle vcpu no mat= ter what. >>> + =A0 =A0 * - If sched rate limiting is in effect, and the current vcpu= has >>> + =A0 =A0 * =A0 run for less than that amount of time, continue the cur= rent one, >>> + =A0 =A0 * =A0 but with a shorter timeslice and return it immediately >>> + =A0 =A0 * - Otherwise, chose the one with the highest priority (which= may >>> + =A0 =A0 * =A0 be the one currently running) >>> + =A0 =A0 * - If the currently running one is TS_OVER, see if there >>> + =A0 =A0 * =A0 is a higher priority one waiting on the runqueue of ano= ther >>> + =A0 =A0 * =A0 cpu and steal it. >>> + =A0 =A0 */ >>> + >>> + =A0 =A0/* If we have schedule rate limiting enabled, check to see >>> + =A0 =A0 * how long we've run for. */ >>> + =A0 =A0if ( !tasklet_work_scheduled >>> + =A0 =A0 =A0 =A0 && prv->ratelimit_us >>> + =A0 =A0 =A0 =A0 && vcpu_runnable(current) >>> + =A0 =A0 =A0 =A0 && !is_idle_vcpu(current) >>> + =A0 =A0 =A0 =A0 && runtime < MICROSECS(prv->ratelimit_us) ) >>> + =A0 =A0{ >>> + =A0 =A0 =A0 =A0snext =3D scurr; >>> + =A0 =A0 =A0 =A0snext->start_time +=3D now; >>> + =A0 =A0 =A0 =A0perfc_incr(delay_ms); >>> + =A0 =A0 =A0 =A0tslice =3D MICROSECS(prv->ratelimit_us); >>> + =A0 =A0 =A0 =A0ret.migrated =3D 0; >>> + =A0 =A0 =A0 =A0goto out; >>> + =A0 =A0} >>> + =A0 =A0tslice =3D MILLISECS(prv->tslice_ms); >>> + >>> =A0 =A0 /* >>> =A0 =A0 =A0* Select next runnable local VCPU (ie top of local runq) >>> =A0 =A0 =A0*/ >>> @@ -1367,11 +1402,12 @@ csched_schedule( >>> =A0 =A0 if ( !is_idle_vcpu(snext->vcpu) ) >>> =A0 =A0 =A0 =A0 snext->start_time +=3D now; >>> = >>> +out: >>> =A0 =A0 /* >>> =A0 =A0 =A0* Return task to run next... >>> =A0 =A0 =A0*/ >>> =A0 =A0 ret.time =3D (is_idle_vcpu(snext->vcpu) ? >>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0-1 : MILLISECS(prv->tslice_ms)); >>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0-1 : tslice); >>> =A0 =A0 ret.task =3D snext->vcpu; >>> = >>> =A0 =A0 CSCHED_VCPU_CHECK(ret.task); >>> @@ -1533,6 +1569,15 @@ csched_init(struct scheduler *ops) >>> =A0 =A0 prv->tick_period_us =3D prv->tslice_ms * 1000 / prv->ticks_per_= tslice; >>> =A0 =A0 prv->credits_per_tslice =3D CSCHED_CREDITS_PER_MSEC * prv->tsli= ce_ms; >>> = >>> + =A0 =A0if ( MICROSECS(sched_ratelimit_us) > MILLISECS(sched_credit_ts= lice_ms) >>> ) >>> + =A0 =A0{ >>> + =A0 =A0 =A0 =A0printk("WARNING: sched_ratelimit_us >" >>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 "sched_credit_tslice_ms is undefined\n" >>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 "Setting ratelimit_us to 1000 * tslice_ms= \n"); >>> + =A0 =A0 =A0 =A0prv->ratelimit_us =3D 1000 * prv->tslice_ms; >>> + =A0 =A0} >>> + =A0 =A0else >>> + =A0 =A0 =A0 =A0prv->ratelimit_us =3D sched_ratelimit_us; >>> =A0 =A0 return 0; >>> =A0} >>> = >>> diff -r a4bff36780a3 -r fe8d0ca867aa xen/common/schedule.c >>> --- a/xen/common/schedule.c =A0 =A0 Fri Dec 16 18:46:27 2011 +0000 >>> +++ b/xen/common/schedule.c =A0 =A0 Mon Jan 09 05:21:35 2012 -0500 >>> @@ -47,6 +47,11 @@ string_param("sched", opt_sched); >>> =A0bool_t sched_smt_power_savings =3D 0; >>> =A0boolean_param("sched_smt_power_savings", sched_smt_power_savings); >>> = >>> +/* Default scheduling rate limit: 1ms >>> + * The behavior when sched_ratelimit_us is greater than >>> sched_credit_tslice_ms is undefined >>> + * */ >>> +int sched_ratelimit_us =3D 1000; >>> +integer_param("sched_ratelimit_us", sched_ratelimit_us); >>> =A0/* Various timer handlers. */ >>> =A0static void s_timer_fn(void *unused); >>> =A0static void vcpu_periodic_timer_fn(void *data); >>> diff -r a4bff36780a3 -r fe8d0ca867aa xen/include/xen/perfc_defn.h >>> --- a/xen/include/xen/perfc_defn.h =A0 =A0 =A0Fri Dec 16 18:46:27 2011 = +0000 >>> +++ b/xen/include/xen/perfc_defn.h =A0 =A0 =A0Mon Jan 09 05:21:35 2012 = -0500 >>> @@ -16,6 +16,7 @@ PERFCOUNTER(sched_irq, =A0 =A0 =A0 =A0 =A0 =A0 =A0"sch >>> =A0PERFCOUNTER(sched_run, =A0 =A0 =A0 =A0 =A0 =A0 =A0"sched: runs throu= gh scheduler") >>> =A0PERFCOUNTER(sched_ctx, =A0 =A0 =A0 =A0 =A0 =A0 =A0"sched: context sw= itches") >>> = >>> +PERFCOUNTER(delay_ms, =A0 =A0 =A0 =A0 =A0 =A0 =A0 "csched: delay") >>> =A0PERFCOUNTER(vcpu_check, =A0 =A0 =A0 =A0 =A0 =A0 "csched: vcpu_check") >>> =A0PERFCOUNTER(schedule, =A0 =A0 =A0 =A0 =A0 =A0 =A0 "csched: schedule") >>> =A0PERFCOUNTER(acct_run, =A0 =A0 =A0 =A0 =A0 =A0 =A0 "csched: acct_run") >>> diff -r a4bff36780a3 -r fe8d0ca867aa xen/include/xen/sched-if.h >>> --- a/xen/include/xen/sched-if.h =A0 =A0 =A0 =A0Fri Dec 16 18:46:27 201= 1 +0000 >>> +++ b/xen/include/xen/sched-if.h =A0 =A0 =A0 =A0Mon Jan 09 05:21:35 201= 2 -0500 >>> @@ -16,6 +16,11 @@ extern struct cpupool *cpupool0; >>> =A0/* cpus currently in no cpupool */ >>> =A0extern cpumask_t cpupool_free_cpus; >>> = >>> +/* Scheduler generic parameters >>> + * */ >>> +extern int sched_ratelimit_us; >>> + >>> + >>> =A0/* >>> =A0* In order to allow a scheduler to remap the lock->cpu mapping, >>> =A0* we have a per-cpu pointer, along with a pre-allocated set of >>> = >>> _______________________________________________ >>> Xen-devel mailing list >>> Xen-devel@lists.xensource.com >>> http://lists.xensource.com/xen-devel