linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* 2.6.18-rc3->rc4 hugetlbfs regression
@ 2006-08-15 15:22 Dave Hansen
  2006-08-16 15:00 ` Adam Litke
  0 siblings, 1 reply; 2+ messages in thread
From: Dave Hansen @ 2006-08-15 15:22 UTC (permalink / raw)
  To: Linux Kernel Mailing List, linux-mm
  Cc: Suzuki Kp, PPC External List, Yao Fei Zhu, lge,
	Nishanth Aravamudan


kernel BUG in cache_free_debugcheck at mm/slab.c:2748!

This is from a ppc64 machine running 2.6.18-rc4.  It didn't apparently
happen with 2.6.18-rc3, but I don't see anything particularly suspect in
the changelogs, so it might be a wee bit more intermittent than it first
appeared.

You can get libhugetlbfs from here: http://libhugetlbfs.sourceforge.net/

Steps to reproduce:
1. boot kernel 2.6.18-rc4 with "hugepages=20"
2. mount none -t hugetlbfs /mnt/hugetlbfs
3. run libhugetlbfs, "make check" trigger xmon.

apple-lp1:/kernel/libhugetlbfs.git # make check
zero_filesize_segment (32):     obj32/zero_filesize_segment:
obj32/zero_filesize_segment: cannot execute binary file
zero_filesize_segment (64):     obj64/zero_filesize_segment:
obj64/zero_filesize_segment: cannot execute binary file
test_root (32): PASS
test_root (64): PASS
meminfo_nohuge (32):    PASS
meminfo_nohuge (64):    PASS
gethugepagesize (32):   PASS
gethugepagesize (64):   PASS
empty_mounts (32):      PASS
empty_mounts (64):      PASS
find_path (32): PASS
find_path (64): PASS
unlinked_fd (32):       PASS
unlinked_fd (64):       PASS
readback (32):  PASS


Additional info:
0:mon> e
cpu 0x0: Vector: 700 (Program Check) at [c0000001cf6e3530]
    pc: c0000000000c7458: .cache_free_debugcheck+0x1d0/0x2b0
    lr: c0000000000c7410: .cache_free_debugcheck+0x188/0x2b0
    sp: c0000001cf6e37b0
   msr: 8000000000021032
  current = 0xc0000001ccaf94e0
  paca    = 0xc000000000622300
    pid   = 6714, comm = readback
kernel BUG in cache_free_debugcheck at mm/slab.c:2748!

0:mon> t
[c0000001cf6e37b0] c0000000000c73cc .cache_free_debugcheck+0x144/0x2b0 (unreliable)
[c0000001cf6e3860] c0000000000c7a04 .kmem_cache_free+0xd8/0x164
[c0000001cf6e3900] c00000000002f630 .pgtable_free_tlb+0xd4/0x144
[c0000001cf6e39a0] c000000000032648 .hugetlb_free_pgd_range+0x1b8/0x26c
[c0000001cf6e3a70] c0000000000b4f68 .free_pgtables+0x90/0x134
[c0000001cf6e3b20] c0000000000b61ac .exit_mmap+0xcc/0x180
[c0000001cf6e3bd0] c00000000006209c .mmput+0x70/0x148
[c0000001cf6e3c60] c000000000067288 .exit_mm+0x118/0x138
[c0000001cf6e3cf0] c0000000000692c4 .do_exit+0x21c/0x958
[c0000001cf6e3da0] c000000000069aa8 .sys_exit_group+0x0/0x8
[c0000001cf6e3e30] c00000000000871c syscall_exit+0x0/0x40
--- Exception: c01 (System Call) at 000000000feb0b78
SP (ffcc4090) is in userspace

0:mon> r
R00 = 000000000000018f   R16 = 00000000100a0000
R01 = c0000001cf6e37b0   R17 = 00000000100b2eb0
R02 = c000000000849ce0   R18 = 00000000100a0000
R03 = c0000001cfe90a08   R19 = 0000000000000000
R04 = c0000001cfe90a10   R20 = 0000000000000000
R05 = ffffffffffffffff   R21 = 00000000e0ffffff
R06 = 0000000000000000   R22 = c0000001cf6e3b90
R07 = c000000000648c38   R23 = 00000000e0ffffff
R08 = 000000000001ffff   R24 = 00000000e1000000
R09 = 0000000000000001   R25 = c00000000002f630
R10 = 0000000000000019   R26 = c0000001cfe90000
R11 = 0000000000000850   R27 = 0000000000000000
R12 = 0000000000000001   R28 = c0000001cfe90978
R13 = c000000000622300   R29 = 000000000000000e
R14 = 0000000010080000   R30 = c00000000065f098
R15 = 0000000000000000   R31 = c000000002b14380
pc  = c0000000000c7458 .cache_free_debugcheck+0x1d0/0x2b0
lr  = c0000000000c7410 .cache_free_debugcheck+0x188/0x2b0
msr = 8000000000021032   cr  = 44000424
ctr = 0000000000000001   xer = 000000002000001a   trap =  700

0:mon> di c0000000000c7458
c0000000000c7458  0b090000      tdnei   r9,0
c0000000000c745c  801f0118      lwz     r0,280(r31)
c0000000000c7460  7809bfe3      rldicl. r9,r0,55,63
c0000000000c7464  41820034      beq     c0000000000c7498        #
.cache_free_debugcheck+0x210/0x2b0
c0000000000c7468  e93f0148      ld      r9,328(r31)
c0000000000c746c  e87f01d2      lwa     r3,464(r31)
c0000000000c7470  7fe4fb78      mr      r4,r31
c0000000000c7474  38a00005      li      r5,5
c0000000000c7478  e9690000      ld      r11,0(r9)
c0000000000c747c  f8410028      std     r2,40(r1)
c0000000000c7480  7c7c1a14      add     r3,r28,r3
c0000000000c7484  7d6903a6      mtctr   r11
c0000000000c7488  e8490008      ld      r2,8(r9)
c0000000000c748c  e9690010      ld      r11,16(r9)
c0000000000c7490  4e800421      bctrl
c0000000000c7494  e8410028      ld      r2,40(r1)


The BUG() hit here :

               *dbg_redzone1(cachep, objp) = RED_INACTIVE;
                *dbg_redzone2(cachep, objp) = RED_INACTIVE;
        }
        if (cachep->flags & SLAB_STORE_USER)
                *dbg_userword(cachep, objp) = caller;

        objnr = obj_to_index(cachep, slabp, objp);

        BUG_ON(objnr >= cachep->num);
        BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); <---- Hit here.

        if (cachep->flags & SLAB_DEBUG_INITIAL) {
                /*


-- Dave

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: 2.6.18-rc3->rc4 hugetlbfs regression
  2006-08-15 15:22 2.6.18-rc3->rc4 hugetlbfs regression Dave Hansen
@ 2006-08-16 15:00 ` Adam Litke
  0 siblings, 0 replies; 2+ messages in thread
From: Adam Litke @ 2006-08-16 15:00 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Suzuki Kp, PPC External List, Linux Kernel Mailing List, linux-mm,
	Yao Fei Zhu, lge, Nishanth Aravamudan

On Tue, 2006-08-15 at 08:22 -0700, Dave Hansen wrote:
> kernel BUG in cache_free_debugcheck at mm/slab.c:2748!

Alright, this one is only triggered when slab debugging is enabled.  The slabs
are assumed to be aligned on a HUGEPTE_TABLE_SIZE boundary.  The free path
makes use of this assumption and uses the lowest nibble to pass around an index
into an array of kmem_cache pointers.  With slab debugging turned on, the slab
is still aligned, but the "working" object pointer is not.  This would break
the assumption above that a full nibble is available for the PGF_CACHENUM_MASK.

The following patch reduces PGF_CACHENUM_MASK to cover only the two least
significant bits, which is enough to cover the current number of 4 pgtable
cache types.  Then use this constant to mask out the appropriate part of the
huge pte pointer.

Signed-off-by: Adam Litke <agl@us.ibm.com>
---
 arch/powerpc/mm/hugetlbpage.c |    2 +-
 include/asm-powerpc/pgalloc.h |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff -upN reference/arch/powerpc/mm/hugetlbpage.c current/arch/powerpc/mm/hugetlbpage.c
--- reference/arch/powerpc/mm/hugetlbpage.c
+++ current/arch/powerpc/mm/hugetlbpage.c
@@ -153,7 +153,7 @@ static void free_hugepte_range(struct mm
 	hpdp->pd = 0;
 	tlb->need_flush = 1;
 	pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM,
-						 HUGEPTE_TABLE_SIZE-1));
+						 PGF_CACHENUM_MASK));
 }
 
 #ifdef CONFIG_PPC_64K_PAGES
diff -upN reference/include/asm-powerpc/pgalloc.h current/include/asm-powerpc/pgalloc.h
--- reference/include/asm-powerpc/pgalloc.h
+++ current/include/asm-powerpc/pgalloc.h
@@ -117,7 +117,7 @@ static inline void pte_free(struct page 
 	pte_free_kernel(page_address(ptepage));
 }
 
-#define PGF_CACHENUM_MASK	0xf
+#define PGF_CACHENUM_MASK	0x3
 
 typedef struct pgtable_free {
 	unsigned long val;


-- 
Adam Litke - (agl at us.ibm.com)
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2006-08-16 15:00 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-15 15:22 2.6.18-rc3->rc4 hugetlbfs regression Dave Hansen
2006-08-16 15:00 ` Adam Litke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).