Hi, After hitting the pvclock-related issue, I recompiled my 2.6.27-rc6 kernel without CONFIG_KVM_CLOCK. It stays up far longer, but I see the following guest crash when I stress it (with a source build): BUG: unable to handle kernel paging request at d97b8000 IP: [] __slab_alloc+0x1cd/0x3a1 Oops: 0002 [#1] SMP DEBUG_PAGEALLOC Modules linked in: ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4 ipt_REJECT iptable_filter ip_tables bridge stp ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi scsi_transport_iscsi nfs lockd nfs_acl sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_log dm_multipath dm_mod virtio_net floppy pcspkr virtio_pci i2c_piix4 i2c_core sr_mod cdrom ata_piix pata_acpi ata_generic ext3 jbd mbcache [last unloaded: microcode] Pid: 3020, comm: sed Not tainted (2.6.27-rc6 #1) EIP: 0060:[] EFLAGS: 00210006 CPU: 0 EIP is at __slab_alloc+0x1cd/0x3a1 EAX: 5a5a5a5a EBX: 00000009 ECX: 00000800 EDX: 00002000 ESI: c15f9f20 EDI: d97b8000 EBP: c096ed0c ESP: c096ece0 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process sed (pid: 3020, ti=c096e000 task=d93d14e0 task.ti=d9024000) Stack: d97b8000 c096ecec ffffffff 00000020 df83cbe0 00000000 d93d14e0 00000020 00200292 000000b8 00000000 c096ed3c c049738d c06196a1 c18abd58 00000000 c06196a1 00000020 df83cbe0 00200286 000005fa 00000020 d8ed4700 c096ed60 Call Trace: [] ? kmem_cache_alloc+0x63/0xd1 [] ? __alloc_skb+0x2e/0x10c [] ? __alloc_skb+0x2e/0x10c [] ? __alloc_skb+0x2e/0x10c [] ? __netdev_alloc_skb+0x1c/0x39 [] ? try_fill_recv+0x37/0x153 [virtio_net] [] ? lock_release_holdtime+0x43/0x48 [] ? virtnet_poll+0x239/0x2f7 [virtio_net] [] ? net_rx_action+0xde/0x204 [] ? __do_softirq+0x89/0xf1 [] ? __do_softirq+0x0/0xf1 [] ? do_softirq+0x7e/0xdf [] ? handle_fasteoi_irq+0x0/0xbd [] ? irq_exit+0x4c/0x8b [] ? do_IRQ+0xa5/0xbe [] ? common_interrupt+0x28/0x30 [] ? css_put+0x20/0x23 [] ? __mem_cgroup_uncharge_common+0xb2/0xce [] ? mem_cgroup_uncharge_page+0x12/0x14 [] ? page_remove_rmap+0xe4/0xfd [] ? unmap_vmas+0x36d/0x50d [] ? exit_mmap+0x57/0xa2 [] ? mmput+0x3f/0x90 [] ? exit_mm+0xed/0xf5 [] ? do_exit+0x1cc/0x734 [] ? up_read+0x1b/0x2e [] ? sys_exit_group+0x0/0x16 [] ? sys_exit_group+0x14/0x16 [] ? syscall_call+0x7/0xb ======================= Code: fe ff 89 45 d4 8b 45 e4 f6 40 01 08 74 2c 89 f0 e8 54 e5 ff ff ba 00 10 00 00 8b 7d d4 89 c1 b8 5a 5a 5a 5a d3 e2 89 d1 c1 e9 02 ab f6 c2 02 74 02 66 ab f6 c2 01 74 01 aa 8b 5d d4 89 5d f0 EIP: [] __slab_alloc+0x1cd/0x3a1 SS:ESP 0068:c096ece0 Kernel panic - not syncing: Fatal exception in interrupt ------------[ cut here ]------------ WARNING: at kernel/smp.c:332 smp_call_function_mask+0x38/0x18a() Modules linked in: ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4 ipt_REJECT iptable_filter ip_tables bridge stp ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi scsi_transport_iscsi nfs lockd nfs_acl sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_log dm_multipath dm_mod virtio_net floppy pcspkr virtio_pci i2c_piix4 i2c_core sr_mod cdrom ata_piix pata_acpi ata_generic ext3 jbd mbcache [last unloaded: microcode] Pid: 3020, comm: sed Tainted: G D 2.6.27-rc6 #1 [] warn_on_slowpath+0x46/0x6a [] ? __delay+0xe/0x10 [] ? time_hardirqs_off+0xe/0x1f [] ? trace_hardirqs_off_caller+0x15/0x97 [] ? trace_hardirqs_off+0xb/0xd [] ? time_hardirqs_off+0xe/0x1f [] ? time_hardirqs_off+0xe/0x1f [] ? trace_hardirqs_off_caller+0x15/0x97 [] ? trace_hardirqs_off+0xb/0xd [] ? release_console_sem+0x1be/0x1c6 [] smp_call_function_mask+0x38/0x18a [] ? stop_this_cpu+0x0/0x4f [] ? time_hardirqs_off+0xe/0x1f [] ? trace_hardirqs_off_caller+0x15/0x97 [] ? trace_hardirqs_off+0xb/0xd [] ? _spin_unlock_irqrestore+0x3e/0x55 [] ? native_smp_send_stop+0x4/0x6e [] ? ftrace_record_ip+0x1b3/0x1cc [] ? _spin_unlock_irqrestore+0x3e/0x55 [] smp_call_function+0x17/0x19 [] native_smp_send_stop+0x20/0x6e [] panic+0x53/0xf4 [] oops_end+0x87/0x9b [] die+0x5c/0x64 [] do_page_fault+0x4f5/0x5af [] ? do_page_fault+0x0/0x5af [] error_code+0x72/0x78 [] ? shrink_page_list+0x3ba/0x560 [] ? __slab_alloc+0x1cd/0x3a1 [] kmem_cache_alloc+0x63/0xd1 [] ? __alloc_skb+0x2e/0x10c [] ? __alloc_skb+0x2e/0x10c [] __alloc_skb+0x2e/0x10c [] __netdev_alloc_skb+0x1c/0x39 [] try_fill_recv+0x37/0x153 [virtio_net] [] ? lock_release_holdtime+0x43/0x48 [] virtnet_poll+0x239/0x2f7 [virtio_net] [] net_rx_action+0xde/0x204 [] __do_softirq+0x89/0xf1 [] ? __do_softirq+0x0/0xf1 [] do_softirq+0x7e/0xdf [] ? handle_fasteoi_irq+0x0/0xbd [] irq_exit+0x4c/0x8b [] do_IRQ+0xa5/0xbe [] common_interrupt+0x28/0x30 [] ? css_put+0x20/0x23 [] __mem_cgroup_uncharge_common+0xb2/0xce [] mem_cgroup_uncharge_page+0x12/0x14 [] page_remove_rmap+0xe4/0xfd [] unmap_vmas+0x36d/0x50d [] exit_mmap+0x57/0xa2 [] mmput+0x3f/0x90 [] exit_mm+0xed/0xf5 [] do_exit+0x1cc/0x734 [] ? up_read+0x1b/0x2e [] sys_exit_group+0x0/0x16 [] sys_exit_group+0x14/0x16 [] syscall_call+0x7/0xb ======================= ---[ end trace 443e075b33442f93 ]--- I initially thought this might be virtio-related, because the crash ended up near virtio_net. To be sure, I tried again without the virtio NIC, and got a different oops which may be more indicative of the root cause: ========================= [ BUG: held lock freed! ] ------------------------- init/1 is freeing memory d6193000-d6193fff, with a lock still held there! (&anon_vma->lock){--..}, at: [] page_lock_anon_vma+0x3e/0x5d 3 locks held by init/1: #0: (&mm->mmap_sem){----}, at: [] do_page_fault+0x1cc/0x5af #1: (rcu_read_lock){..--}, at: [] page_lock_anon_vma+0x0/0x5d #2: (&anon_vma->lock){--..}, at: [] page_lock_anon_vma+0x3e/0x5d stack backtrace: Pid: 1, comm: init Not tainted 2.6.27-rc6 #1 [] debug_check_no_locks_freed+0xea/0x13d [] free_hot_cold_page+0x56/0x14a [] free_hot_page+0xf/0x11 [] __free_pages+0x2a/0x35 [] __free_slab+0xa3/0xab [] rcu_free_slab+0x13/0x15 [] __rcu_process_callbacks+0x112/0x177 [] rcu_process_callbacks+0x20/0x3a [] __do_softirq+0x89/0xf1 [] ? __do_softirq+0x0/0xf1 [] do_softirq+0x7e/0xdf [] irq_exit+0x4c/0x8b [] smp_apic_timer_interrupt+0x73/0x84 [] apic_timer_interrupt+0x2d/0x34 [] ? lock_acquire+0x6f/0x81 [] ? page_lock_anon_vma+0x3e/0x5d [] _spin_lock+0x23/0x50 [] ? page_lock_anon_vma+0x3e/0x5d [] page_lock_anon_vma+0x3e/0x5d [] page_referenced+0x47/0xfd [] ? need_resched+0x14/0x1e [] shrink_page_list+0x13f/0x560 [] ? trace_hardirqs_off+0xb/0xd [] ? native_sched_clock+0x97/0xb5 [] ? mark_lock+0x20/0x2fa [] ? mark_held_locks+0x45/0x5c [] ? trace_hardirqs_on+0xb/0xd [] ? trace_hardirqs_on_caller+0xf8/0x12d [] shrink_inactive_list+0xdd/0x299 [] shrink_zone+0xe7/0x106 [] do_try_to_free_pages+0x17b/0x2b9 [] try_to_free_pages+0x6c/0x74 [] ? isolate_pages_global+0x0/0x43 [] __alloc_pages_internal+0x22e/0x38a [] __do_page_cache_readahead+0xd5/0x19e [] do_page_cache_readahead+0x30/0x3e [] filemap_fault+0x13b/0x2db [] __do_fault+0x40/0x2ff [] ? trace_hardirqs_off_caller+0x15/0x97 [] ? native_sched_clock+0x97/0xb5 [] ? page_address+0x1a/0x87 [] ? mark_lock+0x20/0x2fa [] handle_mm_fault+0x342/0x6f9 [] ? do_page_fault+0x1cc/0x5af [] ? down_read_trylock+0x3e/0x48 [] do_page_fault+0x286/0x5af [] ? do_page_fault+0x0/0x5af [] error_code+0x72/0x78 Thanks! -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@us.ibm.com