From mboxrd@z Thu Jan 1 00:00:00 1970 From: Juergen Gross Subject: Re: Hypervisor crash(!) on xl cpupool-numa-split Date: Thu, 10 Feb 2011 07:42:39 +0100 Message-ID: <4D5388DF.8040900@ts.fujitsu.com> References: <4D41FD3A.5090506@amd.com> <201102021539.06664.stephan.diestelhorst@amd.com> <4D4974D1.1080503@ts.fujitsu.com> <201102021701.05665.stephan.diestelhorst@amd.com> <4D4A43B7.5040707@ts.fujitsu.com> <4D4A72D8.3020502@ts.fujitsu.com> <4D4C08B6.30600@amd.com> <4D4FE7E2.9070605@amd.com> <4D4FF452.6060508@ts.fujitsu.com> <4D50D80F.9000007@ts.fujitsu.com> <4D517051.10402@amd.com> <4D529BD9.5050200@amd.com> <4D52A2CD.9090507@ts.fujitsu.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------000709040507080402040305" Return-path: In-Reply-To: <4D52A2CD.9090507@ts.fujitsu.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Andre Przywara Cc: George Dunlap , "xen-devel@lists.xensource.com" , "Diestelhorst, Stephan" List-Id: xen-devel@lists.xenproject.org This is a multi-part message in MIME format. --------------000709040507080402040305 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit On 02/09/11 15:21, Juergen Gross wrote: > Andre, George, > > > What seems to be interesting: I think the problem did always occur when > a new cpupool was created and the first cpu was moved to it. > > I think my previous assumption regarding the master_ticker was not too bad. > I think somehow the master_ticker of the new cpupool is becoming active > before the scheduler is really initialized properly. This could happen, if > enough time is spent between alloc_pdata for the cpu to be moved and the > critical section in schedule_cpu_switch(). > > The solution should be to activate the timers only if the scheduler is > ready for them. > > George, do you think the master_ticker should be stopped in suspend_ticker > as well? I still see potential problems for entering deep C-States. I think > I'll prepare a patch which will keep the master_ticker active for the > C-State case and migrate it for the schedule_cpu_switch() case. Okay, here is a patch for this. It ran on my 4-core machine without any problems. Andre, could you give it a try? Juergen -- Juergen Gross Principal Developer Operating Systems TSP ES&S SWE OS6 Telephone: +49 (0) 89 3222 2967 Fujitsu Technology Solutions e-mail: juergen.gross@ts.fujitsu.com Domagkstr. 28 Internet: ts.fujitsu.com D-80807 Muenchen Company details: ts.fujitsu.com/imprint.html --------------000709040507080402040305 Content-Type: text/x-patch; name="ticker.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="ticker.patch" diff -r 1967c7c290eb xen/common/sched_credit.c --- a/xen/common/sched_credit.c Wed Feb 09 12:03:09 2011 +0000 +++ b/xen/common/sched_credit.c Thu Feb 10 07:39:27 2011 +0100 @@ -50,6 +50,8 @@ (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TSLICE) #define CSCHED_CREDITS_PER_ACCT \ (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_ACCT) +#define CSCHED_ACCT_TSLICE \ + (MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT) /* @@ -170,6 +172,7 @@ struct csched_private { uint32_t ncpus; struct timer master_ticker; unsigned int master; + int master_active; cpumask_t idlers; cpumask_t cpus; uint32_t weight; @@ -320,6 +323,7 @@ csched_free_pdata(const struct scheduler struct csched_private *prv = CSCHED_PRIV(ops); struct csched_pcpu *spc = pcpu; unsigned long flags; + uint64_t now = NOW(); if ( spc == NULL ) return; @@ -334,10 +338,16 @@ csched_free_pdata(const struct scheduler { prv->master = first_cpu(prv->cpus); migrate_timer(&prv->master_ticker, prv->master); + if ( prv->master_active ) + set_timer(&prv->master_ticker, now + CSCHED_ACCT_TSLICE + - now % CSCHED_ACCT_TSLICE); } kill_timer(&spc->ticker); if ( prv->ncpus == 0 ) + { kill_timer(&prv->master_ticker); + prv->master_active = 0; + } spin_unlock_irqrestore(&prv->lock, flags); @@ -367,12 +377,10 @@ csched_alloc_pdata(const struct schedule { prv->master = cpu; init_timer(&prv->master_ticker, csched_acct, prv, cpu); - set_timer(&prv->master_ticker, NOW() + - MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT); + prv->master_active = 0; } init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu); - set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK)); INIT_LIST_HEAD(&spc->runq); spc->runq_sort_last = prv->runq_sort; @@ -1138,8 +1146,7 @@ csched_acct(void* dummy) prv->runq_sort++; out: - set_timer( &prv->master_ticker, NOW() + - MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT ); + set_timer( &prv->master_ticker, NOW() + CSCHED_ACCT_TSLICE ); } static void @@ -1529,24 +1536,39 @@ csched_deinit(const struct scheduler *op xfree(prv); } -static void csched_tick_suspend(const struct scheduler *ops, unsigned int cpu) +static void csched_tick_suspend(const struct scheduler *ops, unsigned int cpu, int temp) { + struct csched_private *prv; struct csched_pcpu *spc; + prv = CSCHED_PRIV(ops); spc = CSCHED_PCPU(cpu); stop_timer(&spc->ticker); + if ( (prv->master == cpu) && !temp ) + { + prv->master = cycle_cpu(prv->master, prv->cpus); + migrate_timer(&prv->master_ticker, prv->master); + } } static void csched_tick_resume(const struct scheduler *ops, unsigned int cpu) { + struct csched_private *prv; struct csched_pcpu *spc; uint64_t now = NOW(); + prv = CSCHED_PRIV(ops); spc = CSCHED_PCPU(cpu); set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK) - now % MILLISECS(CSCHED_MSECS_PER_TICK) ); + if ( (prv->master == cpu) && !prv->master_active ) + { + set_timer(&prv->master_ticker, now + CSCHED_ACCT_TSLICE + - now % CSCHED_ACCT_TSLICE); + prv->master_active = 1; + } } static struct csched_private _csched_priv; diff -r 1967c7c290eb xen/common/schedule.c --- a/xen/common/schedule.c Wed Feb 09 12:03:09 2011 +0000 +++ b/xen/common/schedule.c Thu Feb 10 07:39:27 2011 +0100 @@ -1208,6 +1208,8 @@ static int cpu_schedule_up(unsigned int if ( (ops.alloc_pdata != NULL) && ((sd->sched_priv = ops.alloc_pdata(&ops, cpu)) == NULL) ) return -ENOMEM; + if ( ops.tick_resume != NULL ) + ops.tick_resume(&ops, cpu); return 0; } @@ -1286,6 +1288,8 @@ void __init scheduler_init(void) if ( ops.alloc_pdata && !(this_cpu(schedule_data).sched_priv = ops.alloc_pdata(&ops, 0)) ) BUG(); + if ( ops.tick_resume != NULL ) + ops.tick_resume(&ops, 0); } int schedule_cpu_switch(unsigned int cpu, struct cpupool *c) @@ -1312,7 +1316,7 @@ int schedule_cpu_switch(unsigned int cpu pcpu_schedule_lock_irqsave(cpu, flags); - SCHED_OP(old_ops, tick_suspend, cpu); + SCHED_OP(old_ops, tick_suspend, cpu, 0); vpriv_old = idle->sched_priv; idle->sched_priv = vpriv; per_cpu(scheduler, cpu) = new_ops; @@ -1392,7 +1396,7 @@ void sched_tick_suspend(void) unsigned int cpu = smp_processor_id(); sched = per_cpu(scheduler, cpu); - SCHED_OP(sched, tick_suspend, cpu); + SCHED_OP(sched, tick_suspend, cpu, 1); } void sched_tick_resume(void) diff -r 1967c7c290eb xen/include/xen/sched-if.h --- a/xen/include/xen/sched-if.h Wed Feb 09 12:03:09 2011 +0000 +++ b/xen/include/xen/sched-if.h Thu Feb 10 07:39:27 2011 +0100 @@ -175,7 +175,7 @@ struct scheduler { void (*dump_settings) (const struct scheduler *); void (*dump_cpu_state) (const struct scheduler *, int); - void (*tick_suspend) (const struct scheduler *, unsigned int); + void (*tick_suspend) (const struct scheduler *, unsigned int, int); void (*tick_resume) (const struct scheduler *, unsigned int); }; --------------000709040507080402040305 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --------------000709040507080402040305--