From mboxrd@z Thu Jan 1 00:00:00 1970 From: Juergen Gross Subject: Re: Hypervisor crash(!) on xl cpupool-numa-split Date: Mon, 07 Feb 2011 14:32:02 +0100 Message-ID: <4D4FF452.6060508@ts.fujitsu.com> References: <4D41FD3A.5090506@amd.com> <201102021539.06664.stephan.diestelhorst@amd.com> <4D4974D1.1080503@ts.fujitsu.com> <201102021701.05665.stephan.diestelhorst@amd.com> <4D4A43B7.5040707@ts.fujitsu.com> <4D4A72D8.3020502@ts.fujitsu.com> <4D4C08B6.30600@amd.com> <4D4FE7E2.9070605@amd.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------070602010600020302010307" Return-path: In-Reply-To: <4D4FE7E2.9070605@amd.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Andre Przywara Cc: George Dunlap , "xen-devel@lists.xensource.com" , "Diestelhorst, Stephan" List-Id: xen-devel@lists.xenproject.org This is a multi-part message in MIME format. --------------070602010600020302010307 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit On 02/07/11 13:38, Andre Przywara wrote: > Juergen, > > as promised some more debug data. This is from c/s 22858 with Stephans > debug patch (attached). > We get the following dump when the hypervisor crashes, note that the > first lock is different from the second and subsequent ones: > > (XEN) sched_credit.c, 572: prv: ffff831836df2970 &prv->lock: > ffff831836df2970 prv->weight: 256 sdom->active_vcpu_count: 3 > sdom->weight: 256 > (XEN) sched_credit.c, 572: prv: ffff830437ffa5e0 &prv->lock: > ffff830437ffa5e0 prv->weight: 768 sdom->active_vcpu_count: 4 > sdom->weight: 256 > (XEN) sched_credit.c, 572: prv: ffff830437ffa5e0 &prv->lock: > ffff830437ffa5e0 prv->weight: 1024 sdom->active_vcpu_count: 5 > sdom->weight: 256 > (XEN) sched_credit.c, 572: prv: ffff830437ffa5e0 &prv->lock: > ffff830437ffa5e0 prv->weight: 1280 sdom->active_vcpu_count: 6 > sdom->weight: 256 > > .... > > Hope that gives you an idea. I attach the whole log for your reference. Hmm, could it be your log wasn't created with the attached patch? I'm missing Dom-Id and VCPU from the printk() above, which would be interesting (at least I hope so)... Additionally printing the local pcpu number would help, too. And could you add a printk for the new prv address in csched_init()? It would be nice if you could enable cpupool diag output. Please use the attached patch (includes the previous patch for executing the cpu move on the cpu to be moved, plus some diag printk corrections). Juergen -- Juergen Gross Principal Developer Operating Systems TSP ES&S SWE OS6 Telephone: +49 (0) 89 3222 2967 Fujitsu Technology Solutions e-mail: juergen.gross@ts.fujitsu.com Domagkstr. 28 Internet: ts.fujitsu.com D-80807 Muenchen Company details: ts.fujitsu.com/imprint.html --------------070602010600020302010307 Content-Type: text/x-patch; name="diag.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="diag.patch" diff -r 7ada6faef565 xen/common/cpupool.c --- a/xen/common/cpupool.c Sun Feb 06 17:26:31 2011 +0000 +++ b/xen/common/cpupool.c Mon Feb 07 14:26:50 2011 +0100 @@ -35,7 +35,7 @@ static DEFINE_SPINLOCK(cpupool_lock); DEFINE_PER_CPU(struct cpupool *, cpupool); -#define cpupool_dprintk(x...) ((void)0) +#define cpupool_dprintk(x...) printk(x) static struct cpupool *alloc_cpupool_struct(void) { @@ -227,14 +227,30 @@ static int cpupool_assign_cpu_locked(str return 0; } +static long cpupool_assign_cpu_helper(void *info) +{ + int cpu = cpupool_moving_cpu; + long ret; + + cpupool_dprintk("cpupool_assign_cpu(pool=%d,cpu=%d)\n", + cpupool_cpu_moving->cpupool_id, cpu); + BUG_ON(!is_idle_vcpu(current)); + BUG_ON(cpu != smp_processor_id()); + spin_lock(&cpupool_lock); + ret = cpupool_assign_cpu_locked(cpupool_cpu_moving, cpu); + spin_unlock(&cpupool_lock); + return ret; +} + static long cpupool_unassign_cpu_helper(void *info) { int cpu = cpupool_moving_cpu; long ret; - cpupool_dprintk("cpupool_unassign_cpu(pool=%d,cpu=%d) ret %ld\n", - cpupool_id, cpu, ret); - + cpupool_dprintk("cpupool_unassign_cpu(pool=%d,cpu=%d)\n", + cpupool_cpu_moving->cpupool_id, cpu); + BUG_ON(!is_idle_vcpu(current)); + BUG_ON(cpu != smp_processor_id()); spin_lock(&cpupool_lock); ret = cpu_disable_scheduler(cpu); cpu_set(cpu, cpupool_free_cpus); @@ -258,9 +274,51 @@ out: } /* + * assign a specific cpu to a cpupool + * we must be sure to run on the cpu to be assigned in idle! to achieve this + * the main functionality is performed via continue_hypercall_on_cpu on the + * specific cpu. + * possible failures: + * - cpu not free + * - cpu just being unplugged + */ +int cpupool_assign_cpu(struct cpupool *c, unsigned int cpu) +{ + int ret; + + cpupool_dprintk("cpupool_assign_cpu(pool=%d,cpu=%d)\n", + c->cpupool_id, cpu); + + spin_lock(&cpupool_lock); + ret = -EBUSY; + if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) ) + goto out; + if ( cpu_isset(cpu, cpupool_locked_cpus) ) + goto out; + + ret = 0; + if ( !cpu_isset(cpu, cpupool_free_cpus) && (cpu != cpupool_moving_cpu) ) + goto out; + + cpupool_moving_cpu = cpu; + atomic_inc(&c->refcnt); + cpupool_cpu_moving = c; + cpu_clear(cpu, c->cpu_valid); + spin_unlock(&cpupool_lock); + + return continue_hypercall_on_cpu(cpu, cpupool_assign_cpu_helper, c); + +out: + spin_unlock(&cpupool_lock); + cpupool_dprintk("cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n", + c->cpupool_id, cpu, ret); + return ret; +} + +/* * unassign a specific cpu from a cpupool - * we must be sure not to run on the cpu to be unassigned! to achieve this - * the main functionality is performed via continue_hypercall_on_cpu on a + * we must be sure to run on the cpu to be unassigned in idle! to achieve this + * the main functionality is performed via continue_hypercall_on_cpu on the * specific cpu. * if the cpu to be removed is the last one of the cpupool no active domain * must be bound to the cpupool. dying domains are moved to cpupool0 as they @@ -271,7 +329,6 @@ out: */ int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu) { - int work_cpu; int ret; struct domain *d; @@ -319,19 +376,12 @@ int cpupool_unassign_cpu(struct cpupool cpu_clear(cpu, c->cpu_valid); spin_unlock(&cpupool_lock); - work_cpu = smp_processor_id(); - if ( work_cpu == cpu ) - { - work_cpu = first_cpu(cpupool0->cpu_valid); - if ( work_cpu == cpu ) - work_cpu = next_cpu(cpu, cpupool0->cpu_valid); - } - return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c); + return continue_hypercall_on_cpu(cpu, cpupool_unassign_cpu_helper, c); out: spin_unlock(&cpupool_lock); cpupool_dprintk("cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n", - cpupool_id, cpu, ret); + c->cpupool_id, cpu, ret); return ret; } @@ -345,7 +395,7 @@ int cpupool_add_domain(struct domain *d, { struct cpupool *c; int rc = 1; - int n_dom; + int n_dom = 0; if ( poolid == CPUPOOLID_NONE ) return 0; @@ -472,27 +522,15 @@ int cpupool_do_sysctl(struct xen_sysctl_ { unsigned cpu; + c = __cpupool_get_by_id(op->cpupool_id, 0); + ret = -ENOENT; + if ( c == NULL ) + break; cpu = op->cpu; - cpupool_dprintk("cpupool_assign_cpu(pool=%d,cpu=%d)\n", - op->cpupool_id, cpu); - spin_lock(&cpupool_lock); if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY ) cpu = first_cpu(cpupool_free_cpus); - ret = -EINVAL; - if ( cpu >= NR_CPUS ) - goto addcpu_out; - ret = -EBUSY; - if ( !cpu_isset(cpu, cpupool_free_cpus) ) - goto addcpu_out; - c = cpupool_find_by_id(op->cpupool_id, 0); - ret = -ENOENT; - if ( c == NULL ) - goto addcpu_out; - ret = cpupool_assign_cpu_locked(c, cpu); - addcpu_out: - spin_unlock(&cpupool_lock); - cpupool_dprintk("cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n", - op->cpupool_id, cpu, ret); + ret = (cpu < NR_CPUS) ? cpupool_assign_cpu(c, cpu) : -EINVAL; + cpupool_put(c); } break; --------------070602010600020302010307 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --------------070602010600020302010307--