From mboxrd@z Thu Jan  1 00:00:00 1970
From: Jeremy Fitzhardinge <jeremy@goop.org>
Subject: Re: What is the current state of Dom0 kernel support?
	/	crash
Date: Wed, 29 Jul 2009 13:48:05 -0700
Message-ID: <4A70B585.2070109@goop.org>
References: <4A45138A.6010407@goop.org>
	<20090708221423.GE24960@edu.joroinen.fi>
	<4A553888.1090302@goop.org>
	<20090709222414.GL24960@edu.joroinen.fi>
	<20090715082242.GB24960@edu.joroinen.fi>
	<20090721130342.GQ24960@edu.joroinen.fi>
	<4A67651D.7040300@goop.org>
	<20090722193530.GR24960@edu.joroinen.fi>
	<20090722195748.GS24960@edu.joroinen.fi>
	<4A6775C9.5010802@goop.org>
	<20090722205346.GU24960@edu.joroinen.fi>
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Return-path: <xen-devel-bounces@lists.xensource.com>
In-Reply-To: <20090722205346.GU24960@edu.joroinen.fi>
List-Unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xensource.com>
List-Help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-Subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
Sender: xen-devel-bounces@lists.xensource.com
Errors-To: xen-devel-bounces@lists.xensource.com
To: =?ISO-8859-1?Q?Pasi_K=E4rkk=E4inen?= <pasik@iki.fi>
Cc: xen-devel@lists.xensource.com
List-Id: xen-devel@lists.xenproject.org

On 07/22/09 13:53, Pasi K=E4rkk=E4inen wrote:
> On Wed, Jul 22, 2009 at 01:25:45PM -0700, Jeremy Fitzhardinge wrote:
>  =20
>> On 07/22/09 12:57, Pasi K=E4rkk=E4inen wrote:
>>    =20
>>> On Wed, Jul 22, 2009 at 10:35:30PM +0300, Pasi K=E4rkk=E4inen wrote:
>>>  =20
>>>      =20
>>>> On Wed, Jul 22, 2009 at 12:14:37PM -0700, Jeremy Fitzhardinge wrote:
>>>>    =20
>>>>        =20
>>>>> On 07/21/09 06:03, Pasi K=E4rkk=E4inen wrote:
>>>>>      =20
>>>>>          =20
>>>>>> I just tried the latest 32b PAE rebase/master tree (2.6.31-rc3).
>>>>>>
>>>>>> http://pasik.reaktio.net/xen/pv_ops-dom0-debug/pv_ops-dom0-log-10-=
rebase-master-with-highpte.txt
>>>>>>
>>>>>> Checking if this processor honours the WP bit even in supervisor m=
ode...
>>>>>> BUG: unable to handle kernel NULL pointer dereference at (null)
>>>>>> IP: [<c058cdcb>] xen_evtchn_do_upcall+0xcc/0x13f
>>>>>> *pdpt =3D 000000003d275001=20
>>>>>> Thread overran stack, or stack corrupted
>>>>>> Oops: 0000 [#1] SMP=20
>>>>>> last sysfs file:=20
>>>>>> Modules linked in:
>>>>>>
>>>>>> Pid: 0, comm: swapper Not tainted (2.6.31-rc3 #20) P8SC8
>>>>>> EIP: 0061:[<c058cdcb>] EFLAGS: 00010046 CPU: 0
>>>>>> EIP is at xen_evtchn_do_upcall+0xcc/0x13f
>>>>>> EAX: 00000000 EBX: ffffffff ECX: 00000000 EDX: 00000000
>>>>>> ESI: 00000000 EDI: c08ec558 EBP: c087eedc ESP: c087eea0
>>>>>>  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: e021
>>>>>> Process swapper (pid: 0, ti=3Dc087e000 task=3Dc083b1a0 task.ti=3Dc=
087e000)
>>>>>> Stack:
>>>>>>  00001a6e 00000220 00000200 00000000 00000000 00000000 e3201014 c0=
8ec558
>>>>>> <0> c087eee4 f5681000 e3201010 00000000 00000000 c09017f8 f54ff000=
 c087ef20
>>>>>> <0> c0409927 00000000 c09017f8 f54ff000 c09017f8 f54ff000 c087ef20=
 c0843f70
>>>>>> Call Trace:
>>>>>>  [<c0409927>] ? xen_do_upcall+0x7/0xc
>>>>>>  [<c0404581>] ? xen_pte_clear+0x9/0x12
>>>>>>  [<c0427a94>] ? set_pte_vaddr+0xb4/0xc4
>>>>>>  [<c0426c8c>] ? __native_set_fixmap+0x25/0x30
>>>>>>  [<c040471a>] ? xen_set_fixmap+0xc7/0xcc
>>>>>>  [<c0897d86>] ? mem_init+0x24a/0x298
>>>>>>  [<c088367e>] ? start_kernel+0x14b/0x2cd
>>>>>>  [<c088336f>] ? unknown_bootoption+0x0/0x18e
>>>>>>  [<c0883082>] ? i386_start_kernel+0x71/0x79
>>>>>>  [<c0886188>] ? xen_start_kernel+0x52a/0x533
>>>>>> Code: d0 89 45 cc 89 55 c8 eb 16 0f bc c8 03 4d d4 8b 04 8a 83 f8 =
ff 74 f8
>>>>>> 8b 55 e4 e8 36 de e7 ff 8b 55 f0 8b 45 d0 03=20
>>>>>> 05 1c 0c 97 c0 <8b> 0c 10 8b 55 e8 8b 45 cc 23 0c 82 8b 45 c8 8b 0=
4 82 8b 15
>>>>>> 18=20
>>>>>> EIP: [<c058cdcb>] xen_evtchn_do_upcall+0xcc/0x13f SS:ESP e021:c087=
eea0
>>>>>> CR2: 0000000000000000
>>>>>> ---[ end trace 4eaa2a86a8e2da22 ]---
>>>>>> Kernel panic - not syncing: Fatal exception in interrupt
>>>>>>  =20
>>>>>>        =20
>>>>>>            =20
>>>>> Haven't seen that one before. =20
>>>>>
>>>>>      =20
>>>>>          =20
>>>> Ok. I've seen many people report crashes during startup with rebase/=
master
>>>> on 32b PAE. I assume they're seeing this same issue.
>>>>
>>>>    =20
>>>>        =20
>>>>> The stack backtrace is a bit fuzzy; do you have CONFIG_FRAMEPOINTER=
 enabled? =20
>>>>> And if you have CONFIG_DEBUGINFO enabled, you can map the eip c058c=
dcb=20
>>>>> to a specific source line (its not clear to me which pointer is NUL=
L).
>>>>>
>>>>>      =20
>>>>>          =20
>>>> [root@dom0test linux-2.6-xen]# grep -i CONFIG_FRAMEPOINTER .config
>>>> [root@dom0test linux-2.6-xen]# grep -i CONFIG_DEBUGINFO .config
>>>> [root@dom0test linux-2.6-xen]#=20
>>>>
>>>> Unfortunately those were not enabled.. I'll build a new kernel with
>>>> CONFIG_DEBUGINFO enabled.
>>>>
>>>>    =20
>>>>        =20
>>> Actually CONFIG_DEBUG_INFO was enabled, if you meant that?=20
>>>  =20
>>>      =20
>> Yes, that's it.
>>    =20
>>> (gdb) x/i 0xc058cdcb
>>>  =20
>>>      =20
>> Try "list *0xc058cdcb".
>>
>>    =20
>
> (gdb) list *0xc058cdcb
> 0xc058cdcb is in active_evtchns (drivers/xen/events.c:237).
> 232
> 233     static inline unsigned long active_evtchns(unsigned int cpu,
> 234                                                struct shared_info *=
sh,
> 235                                                unsigned int idx)
> 236     {
> 237             return (sh->evtchn_pending[idx] &
> 238                     cpu_evtchn_mask(cpu)[idx] &
> 239                     ~sh->evtchn_mask[idx]);
> 240     }
> 241
> (gdb)
>
>
> -- Pasi
>
>  =20

Does this help?

    J

Subject: [PATCH] xen: use proper percpu variable for cpu_evtchn_mask

cpu_evtchn_mask is a per-cpu mask of event channels, so it should
be implemented as a proper per_cpu variable.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index abad71b..4443b0f 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -93,14 +93,11 @@ static struct irq_info irq_info[NR_IRQS];
 static int evtchn_to_irq[NR_EVENT_CHANNELS] =3D {
 	[0 ... NR_EVENT_CHANNELS-1] =3D -1
 };
-struct cpu_evtchn_s {
-	unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
-};
-static struct cpu_evtchn_s *cpu_evtchn_mask_p;
-static inline unsigned long *cpu_evtchn_mask(int cpu)
-{
-	return cpu_evtchn_mask_p[cpu].bits;
-}
+
+#define NR_EVENT_CHANNEL_LONGS	(NR_EVENT_CHANNELS/BITS_PER_LONG)
+static DEFINE_PER_CPU(unsigned long,
+		      cpu_evtchn_mask[NR_EVENT_CHANNEL_LONGS]) =3D
+	{[0 ... NR_EVENT_CHANNEL_LONGS-1] =3D ~0};
=20
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) !=3D 0)
@@ -223,7 +220,7 @@ static inline unsigned long active_evtchns(unsigned i=
nt cpu,
 					   unsigned int idx)
 {
 	return (sh->evtchn_pending[idx] &
-		cpu_evtchn_mask(cpu)[idx] &
+		per_cpu(cpu_evtchn_mask, cpu)[idx] &
 		~sh->evtchn_mask[idx]);
 }
=20
@@ -236,8 +233,8 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsi=
gned int cpu)
 	cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
 #endif
=20
-	__clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
-	__set_bit(chn, cpu_evtchn_mask(cpu));
+	__clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)));
+	__set_bit(chn, per_cpu(cpu_evtchn_mask, cpu));
=20
 	irq_info[irq].cpu =3D cpu;
 }
@@ -253,8 +250,6 @@ static void init_evtchn_cpu_bindings(void)
 		cpumask_copy(desc->affinity, cpumask_of(0));
 	}
 #endif
-
-	memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0)));
 }
=20
 static inline void clear_evtchn(int port)
@@ -928,10 +923,6 @@ void __init xen_init_IRQ(void)
 {
 	int i;
=20
-	cpu_evtchn_mask_p =3D kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
-				    GFP_KERNEL);
-	BUG_ON(cpu_evtchn_mask_p =3D=3D NULL);
-
 	init_evtchn_cpu_bindings();
=20
 	/* No event channels are 'live' right now. */