From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jeremy Fitzhardinge Subject: Re: What is the current state of Dom0 kernel support? / crash Date: Wed, 29 Jul 2009 13:48:05 -0700 Message-ID: <4A70B585.2070109@goop.org> References: <4A45138A.6010407@goop.org> <20090708221423.GE24960@edu.joroinen.fi> <4A553888.1090302@goop.org> <20090709222414.GL24960@edu.joroinen.fi> <20090715082242.GB24960@edu.joroinen.fi> <20090721130342.GQ24960@edu.joroinen.fi> <4A67651D.7040300@goop.org> <20090722193530.GR24960@edu.joroinen.fi> <20090722195748.GS24960@edu.joroinen.fi> <4A6775C9.5010802@goop.org> <20090722205346.GU24960@edu.joroinen.fi> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: <20090722205346.GU24960@edu.joroinen.fi> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: =?ISO-8859-1?Q?Pasi_K=E4rkk=E4inen?= Cc: xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org On 07/22/09 13:53, Pasi K=E4rkk=E4inen wrote: > On Wed, Jul 22, 2009 at 01:25:45PM -0700, Jeremy Fitzhardinge wrote: > =20 >> On 07/22/09 12:57, Pasi K=E4rkk=E4inen wrote: >> =20 >>> On Wed, Jul 22, 2009 at 10:35:30PM +0300, Pasi K=E4rkk=E4inen wrote: >>> =20 >>> =20 >>>> On Wed, Jul 22, 2009 at 12:14:37PM -0700, Jeremy Fitzhardinge wrote: >>>> =20 >>>> =20 >>>>> On 07/21/09 06:03, Pasi K=E4rkk=E4inen wrote: >>>>> =20 >>>>> =20 >>>>>> I just tried the latest 32b PAE rebase/master tree (2.6.31-rc3). >>>>>> >>>>>> http://pasik.reaktio.net/xen/pv_ops-dom0-debug/pv_ops-dom0-log-10-= rebase-master-with-highpte.txt >>>>>> >>>>>> Checking if this processor honours the WP bit even in supervisor m= ode... >>>>>> BUG: unable to handle kernel NULL pointer dereference at (null) >>>>>> IP: [] xen_evtchn_do_upcall+0xcc/0x13f >>>>>> *pdpt =3D 000000003d275001=20 >>>>>> Thread overran stack, or stack corrupted >>>>>> Oops: 0000 [#1] SMP=20 >>>>>> last sysfs file:=20 >>>>>> Modules linked in: >>>>>> >>>>>> Pid: 0, comm: swapper Not tainted (2.6.31-rc3 #20) P8SC8 >>>>>> EIP: 0061:[] EFLAGS: 00010046 CPU: 0 >>>>>> EIP is at xen_evtchn_do_upcall+0xcc/0x13f >>>>>> EAX: 00000000 EBX: ffffffff ECX: 00000000 EDX: 00000000 >>>>>> ESI: 00000000 EDI: c08ec558 EBP: c087eedc ESP: c087eea0 >>>>>> DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: e021 >>>>>> Process swapper (pid: 0, ti=3Dc087e000 task=3Dc083b1a0 task.ti=3Dc= 087e000) >>>>>> Stack: >>>>>> 00001a6e 00000220 00000200 00000000 00000000 00000000 e3201014 c0= 8ec558 >>>>>> <0> c087eee4 f5681000 e3201010 00000000 00000000 c09017f8 f54ff000= c087ef20 >>>>>> <0> c0409927 00000000 c09017f8 f54ff000 c09017f8 f54ff000 c087ef20= c0843f70 >>>>>> Call Trace: >>>>>> [] ? xen_do_upcall+0x7/0xc >>>>>> [] ? xen_pte_clear+0x9/0x12 >>>>>> [] ? set_pte_vaddr+0xb4/0xc4 >>>>>> [] ? __native_set_fixmap+0x25/0x30 >>>>>> [] ? xen_set_fixmap+0xc7/0xcc >>>>>> [] ? mem_init+0x24a/0x298 >>>>>> [] ? start_kernel+0x14b/0x2cd >>>>>> [] ? unknown_bootoption+0x0/0x18e >>>>>> [] ? i386_start_kernel+0x71/0x79 >>>>>> [] ? xen_start_kernel+0x52a/0x533 >>>>>> Code: d0 89 45 cc 89 55 c8 eb 16 0f bc c8 03 4d d4 8b 04 8a 83 f8 = ff 74 f8 >>>>>> 8b 55 e4 e8 36 de e7 ff 8b 55 f0 8b 45 d0 03=20 >>>>>> 05 1c 0c 97 c0 <8b> 0c 10 8b 55 e8 8b 45 cc 23 0c 82 8b 45 c8 8b 0= 4 82 8b 15 >>>>>> 18=20 >>>>>> EIP: [] xen_evtchn_do_upcall+0xcc/0x13f SS:ESP e021:c087= eea0 >>>>>> CR2: 0000000000000000 >>>>>> ---[ end trace 4eaa2a86a8e2da22 ]--- >>>>>> Kernel panic - not syncing: Fatal exception in interrupt >>>>>> =20 >>>>>> =20 >>>>>> =20 >>>>> Haven't seen that one before. =20 >>>>> >>>>> =20 >>>>> =20 >>>> Ok. I've seen many people report crashes during startup with rebase/= master >>>> on 32b PAE. I assume they're seeing this same issue. >>>> >>>> =20 >>>> =20 >>>>> The stack backtrace is a bit fuzzy; do you have CONFIG_FRAMEPOINTER= enabled? =20 >>>>> And if you have CONFIG_DEBUGINFO enabled, you can map the eip c058c= dcb=20 >>>>> to a specific source line (its not clear to me which pointer is NUL= L). >>>>> >>>>> =20 >>>>> =20 >>>> [root@dom0test linux-2.6-xen]# grep -i CONFIG_FRAMEPOINTER .config >>>> [root@dom0test linux-2.6-xen]# grep -i CONFIG_DEBUGINFO .config >>>> [root@dom0test linux-2.6-xen]#=20 >>>> >>>> Unfortunately those were not enabled.. I'll build a new kernel with >>>> CONFIG_DEBUGINFO enabled. >>>> >>>> =20 >>>> =20 >>> Actually CONFIG_DEBUG_INFO was enabled, if you meant that?=20 >>> =20 >>> =20 >> Yes, that's it. >> =20 >>> (gdb) x/i 0xc058cdcb >>> =20 >>> =20 >> Try "list *0xc058cdcb". >> >> =20 > > (gdb) list *0xc058cdcb > 0xc058cdcb is in active_evtchns (drivers/xen/events.c:237). > 232 > 233 static inline unsigned long active_evtchns(unsigned int cpu, > 234 struct shared_info *= sh, > 235 unsigned int idx) > 236 { > 237 return (sh->evtchn_pending[idx] & > 238 cpu_evtchn_mask(cpu)[idx] & > 239 ~sh->evtchn_mask[idx]); > 240 } > 241 > (gdb) > > > -- Pasi > > =20 Does this help? J Subject: [PATCH] xen: use proper percpu variable for cpu_evtchn_mask cpu_evtchn_mask is a per-cpu mask of event channels, so it should be implemented as a proper per_cpu variable. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/events.c b/drivers/xen/events.c index abad71b..4443b0f 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -93,14 +93,11 @@ static struct irq_info irq_info[NR_IRQS]; static int evtchn_to_irq[NR_EVENT_CHANNELS] =3D { [0 ... NR_EVENT_CHANNELS-1] =3D -1 }; -struct cpu_evtchn_s { - unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG]; -}; -static struct cpu_evtchn_s *cpu_evtchn_mask_p; -static inline unsigned long *cpu_evtchn_mask(int cpu) -{ - return cpu_evtchn_mask_p[cpu].bits; -} + +#define NR_EVENT_CHANNEL_LONGS (NR_EVENT_CHANNELS/BITS_PER_LONG) +static DEFINE_PER_CPU(unsigned long, + cpu_evtchn_mask[NR_EVENT_CHANNEL_LONGS]) =3D + {[0 ... NR_EVENT_CHANNEL_LONGS-1] =3D ~0}; =20 /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) !=3D 0) @@ -223,7 +220,7 @@ static inline unsigned long active_evtchns(unsigned i= nt cpu, unsigned int idx) { return (sh->evtchn_pending[idx] & - cpu_evtchn_mask(cpu)[idx] & + per_cpu(cpu_evtchn_mask, cpu)[idx] & ~sh->evtchn_mask[idx]); } =20 @@ -236,8 +233,8 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsi= gned int cpu) cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif =20 - __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); - __set_bit(chn, cpu_evtchn_mask(cpu)); + __clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))); + __set_bit(chn, per_cpu(cpu_evtchn_mask, cpu)); =20 irq_info[irq].cpu =3D cpu; } @@ -253,8 +250,6 @@ static void init_evtchn_cpu_bindings(void) cpumask_copy(desc->affinity, cpumask_of(0)); } #endif - - memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0))); } =20 static inline void clear_evtchn(int port) @@ -928,10 +923,6 @@ void __init xen_init_IRQ(void) { int i; =20 - cpu_evtchn_mask_p =3D kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), - GFP_KERNEL); - BUG_ON(cpu_evtchn_mask_p =3D=3D NULL); - init_evtchn_cpu_bindings(); =20 /* No event channels are 'live' right now. */