public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* fix iounmap and a pageattr memleak (x86 and x86-64)
@ 2004-10-28 19:21 Andrea Arcangeli
  2004-11-05  8:07 ` Andrea Arcangeli
  0 siblings, 1 reply; 36+ messages in thread
From: Andrea Arcangeli @ 2004-10-28 19:21 UTC (permalink / raw)
  To: Andrew Morton, Andi Kleen; +Cc: linux-kernel

This first patch fixes silent memleak in the pageattr code that I found
while searching for the bug Andi fixed in the second patch below
(basically reference counting in split page was done on the pmd instead
of the pte).

Signed-off-by: Andrea Arcangeli <andrea@novell.com>

Index: linux-2.5/arch/i386/mm/pageattr.c
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/mm/pageattr.c,v
retrieving revision 1.13
diff -u -p -r1.13 pageattr.c
--- linux-2.5/arch/i386/mm/pageattr.c	27 Aug 2004 17:35:39 -0000	1.13
+++ linux-2.5/arch/i386/mm/pageattr.c	28 Oct 2004 19:11:20 -0000
@@ -117,22 +117,23 @@ __change_page_attr(struct page *page, pg
 	kpte_page = virt_to_page(kpte);
 	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
 		if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
-			pte_t old = *kpte;
-			pte_t standard = mk_pte(page, PAGE_KERNEL); 
 			set_pte_atomic(kpte, mk_pte(page, prot)); 
-			if (pte_same(old,standard))
-				get_page(kpte_page);
 		} else {
 			struct page *split = split_large_page(address, prot); 
 			if (!split)
 				return -ENOMEM;
-			get_page(kpte_page);
 			set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
+			kpte_page = split;
 		}	
+		get_page(kpte_page);
 	} else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
 		set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
 		__put_page(kpte_page);
-	}
+	} else
+		BUG();
+
+	/* memleak and potential failed 2M page regeneration */
+	BUG_ON(!page_count(kpte_page));
 
 	if (cpu_has_pse && (page_count(kpte_page) == 1)) {
 		list_add(&kpte_page->lru, &df_list);
Index: linux-2.5/arch/x86_64/mm/pageattr.c
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/x86_64/mm/pageattr.c,v
retrieving revision 1.12
diff -u -p -r1.12 pageattr.c
--- linux-2.5/arch/x86_64/mm/pageattr.c	27 Jun 2004 17:54:00 -0000	1.12
+++ linux-2.5/arch/x86_64/mm/pageattr.c	28 Oct 2004 19:11:20 -0000
@@ -124,28 +124,33 @@ __change_page_attr(unsigned long address
 	kpte_flags = pte_val(*kpte); 
 	if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
 		if ((kpte_flags & _PAGE_PSE) == 0) { 
-			pte_t old = *kpte;
-			pte_t standard = mk_pte(page, ref_prot); 
-
 			set_pte(kpte, mk_pte(page, prot)); 
-			if (pte_same(old,standard))
-				get_page(kpte_page);
 		} else {
+			/*
+			 * split_large_page will take the reference for this change_page_attr
+			 * on the split page.
+			 */
 			struct page *split = split_large_page(address, prot, ref_prot); 
 			if (!split)
 				return -ENOMEM;
-			get_page(kpte_page);
 			set_pte(kpte,mk_pte(split, ref_prot));
+			kpte_page = split;
 		}	
+		get_page(kpte_page);
 	} else if ((kpte_flags & _PAGE_PSE) == 0) { 
 		set_pte(kpte, mk_pte(page, ref_prot));
 		__put_page(kpte_page);
-	}
+	} else
+		BUG();
 
-	if (page_count(kpte_page) == 1) {
+	switch (page_count(kpte_page)) {
+	case 1:
 		save_page(address, kpte_page); 		     
 		revert_page(address, ref_prot);
-	} 
+		break;
+	case 0:
+		BUG(); /* memleak and failed 2M page regeneration */
+	}
 	return 0;
 } 
 


This below patch from Andi is also needed to make the above work on x86
(otherwise one of my new above BUGS() will trigger signalling the fact
a bug was there). The below patch creates a subtle dependency that
(_PAGE_PCD << 24) must not be zero. It's not the cleanest thing ever,
but since it's an hardware bitflag I doubt it's going to break.

I'm not sure if I'm allowed to add the signedoff for Andi but I think I
should since he wrote the x86-64 version, if not please let me know (I
only backported it to x86 to test my above changes that otherwise would
trigger one of my added BUGs, and I did the other worthless cosmetical
fixes).

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrea Arcangeli <andrea@novell.com>

Index: linux-2.5/arch/i386/mm/ioremap.c
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/mm/ioremap.c,v
retrieving revision 1.23
diff -u -p -r1.23 ioremap.c
--- linux-2.5/arch/i386/mm/ioremap.c	11 Sep 2004 02:01:36 -0000	1.23
+++ linux-2.5/arch/i386/mm/ioremap.c	28 Oct 2004 19:11:20 -0000
@@ -152,7 +152,7 @@ void __iomem * __ioremap(unsigned long p
 	/*
 	 * Ok, go for it..
 	 */
-	area = get_vm_area(size, VM_IOREMAP);
+	area = get_vm_area(size, VM_IOREMAP | (flags << 24));
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
@@ -232,7 +232,7 @@ void iounmap(volatile void __iomem *addr
 		return;
 	} 
 
-	if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { 
+	if ((p->flags >> 24) && p->phys_addr < virt_to_phys(high_memory)) { 
 		change_page_attr(virt_to_page(__va(p->phys_addr)),
 				 p->size >> PAGE_SHIFT,
 				 PAGE_KERNEL); 				 
Index: linux-2.5/arch/x86_64/mm/ioremap.c
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/x86_64/mm/ioremap.c,v
retrieving revision 1.16
diff -u -p -r1.16 ioremap.c
--- linux-2.5/arch/x86_64/mm/ioremap.c	22 Oct 2004 15:01:03 -0000	1.16
+++ linux-2.5/arch/x86_64/mm/ioremap.c	28 Oct 2004 19:11:20 -0000
@@ -128,11 +128,11 @@ void __iomem * __ioremap(unsigned long p
 	if (phys_addr >= 0xA0000 && last_addr < 0x100000)
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
+#ifndef CONFIG_DISCONTIGMEM
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
 	if (phys_addr < virt_to_phys(high_memory)) {
-#ifndef CONFIG_DISCONTIGMEM
 		char *t_addr, *t_end;
  		struct page *page;
 
@@ -142,8 +142,8 @@ void __iomem * __ioremap(unsigned long p
 		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
 			if(!PageReserved(page))
 				return NULL;
-#endif
 	}
+#endif
 
 	/*
 	 * Mappings have to be page-aligned
@@ -155,7 +155,7 @@ void __iomem * __ioremap(unsigned long p
 	/*
 	 * Ok, go for it..
 	 */
-	area = get_vm_area(size, VM_IOREMAP);
+	area = get_vm_area(size, VM_IOREMAP | (flags << 24));
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
@@ -195,12 +195,12 @@ void __iomem *ioremap_nocache (unsigned 
 	if (!p) 
 		return p; 
 
-	if (phys_addr + size < virt_to_phys(high_memory)) { 
+	if (phys_addr + size - 1 < virt_to_phys(high_memory)) { 
 		struct page *ppage = virt_to_page(__va(phys_addr));		
 		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
-		BUG_ON(phys_addr+size > (unsigned long)high_memory);
-		BUG_ON(phys_addr + size < phys_addr);
+		BUG_ON(phys_addr+size >= (unsigned long)high_memory);
+		BUG_ON(phys_addr + size <= phys_addr);
 
 		if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
 			iounmap(p); 
@@ -223,7 +223,7 @@ void iounmap(volatile void __iomem *addr
 		return;
 	} 
 
-	if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { 
+	if ((p->flags >> 24) && p->phys_addr + p->size < virt_to_phys(high_memory)) { 
 		change_page_attr(virt_to_page(__va(p->phys_addr)),
 				 p->size >> PAGE_SHIFT,
 				 PAGE_KERNEL); 				 


If you give them some beating on -mm let me know if you've any problem.
(running with this stuff on my machines right now, so far so good)

^ permalink raw reply	[flat|nested] 36+ messages in thread
* Re: fix iounmap and a pageattr memleak (x86 and x86-64)
@ 2004-11-02 21:21 Dave Hansen
  2004-11-02 22:07 ` Andrea Arcangeli
  2004-11-02 22:34 ` Jason Baron
  0 siblings, 2 replies; 36+ messages in thread
From: Dave Hansen @ 2004-11-02 21:21 UTC (permalink / raw)
  To: andrea; +Cc: linux-mm, linux-kernel, Andi Kleen, Andrew Morton

This patch:

> From: Andrea Arcangeli <andrea@novell.com>
> 
> - fix silent memleak in the pageattr code that I found while searching
>   for the bug Andi fixed in the second patch below (basically reference
>   counting in split page was done on the pmd instead of the pte).
> 
> - Part of this patch is also needed to make the above work on x86 (otherwise
>   one of my new above BUGS() will trigger signalling the fact a bug was
>   there).  The below patch creates a subtle dependency that (_PAGE_PCD << 24)
>   must not be zero.  It's not the cleanest thing ever, but since it's an
>   hardware bitflag I doubt it's going to break.
> 
> Signed-off-by: Andi Kleen <ak@suse.de>
> Signed-off-by: Andrea Arcangeli <andrea@novell.com>
> Signed-off-by: Andrew Morton <akpm@osdl.org>
> ---
> 
>  25-akpm/arch/i386/mm/ioremap.c    |    4 ++--
>  25-akpm/arch/i386/mm/pageattr.c   |   13 +++++++------
>  25-akpm/arch/x86_64/mm/ioremap.c  |   14 +++++++-------
>  25-akpm/arch/x86_64/mm/pageattr.c |   23 ++++++++++++++---------
>  4 files changed, 30 insertions(+), 24 deletions(-)

is hitting this BUG() during bootup:

        /* memleak and potential failed 2M page regeneration */
        BUG_ON(!page_count(kpte_page));

in 2.6.10-rc1-mm2.

Dentry cache hash table entries: 131072 (order: 7, 524288 bytes)
Inode-cache hash table entries: 65536 (order: 6, 262144 bytes)
Memory: 511144k/524288k available (1856k kernel code, 12608k reserved, 
1186k data, 164k init, 0k highmem)
Checking if this processor honours the WP bit even in supervisor mode... Ok.
Mount-cache hash table entries: 512 (order: 0, 4096 bytes)
------------[ cut here ]------------
kernel BUG at arch/i386/mm/pageattr.c:136!
invalid operand: 0000 [#1]
SMP DEBUG_PAGEALLOC
Modules linked in:
CPU:    0
EIP:    0060:[<c0113f48>]    Not tainted VLI
EFLAGS: 00010046   (2.6.10-rc1-mm2)
EIP is at __change_page_attr+0x28c/0x358
eax: ffffffff   ebx: 017ff163   ecx: 00000000   edx: c10001e0
esi: 00000000   edi: c000fff8   ebp: c10001e0   esp: c03f9d98
ds: 007b   es: 007b   ss: 0068
Process swapper (pid: 0, threadinfo=c03f9000 task=c0345b40)
Stack: c102ffe0 00000000 00000000 00000046 c0113ceb c17f9000 c102ff20
        c102ff20 017ff163 00000000 017ff163 00000000 c0113ceb c17fe000
        c102ffc0 c102ffc0 c1000000 c17ff000 c000fff8 017ff163 00000000
        017ff163 00000000 00000000
Call Trace:
  [<c0113ceb>] __change_page_attr+0x2f/0x358
  [<c0113ceb>] __change_page_attr+0x2f/0x358
  [<c011404a>] change_page_attr+0x36/0x54
  [<c0114148>] kernel_map_pages+0x30/0x5f
  [<c0137d80>] __alloc_pages+0x340/0x350
  [<c0137dad>] __get_free_pages+0x1d/0x30
  [<c013adfa>] kmem_getpages+0x26/0xd4
  [<c013c221>] cache_grow+0xb1/0x150
  [<c013c84a>] cache_alloc_refill+0x232/0x280
  [<c013ccbe>] kmem_cache_alloc+0x5a/0x78
  [<c01d4970>] idr_pre_get+0x1c/0x44
  [<c0181b60>] sysfs_fill_super+0x0/0xa4
  [<c01572cc>] set_anon_super+0x10/0xb8
  [<c0156cff>] sget+0xb3/0x148
  [<c0181b60>] sysfs_fill_super+0x0/0xa4
  [<c0157636>] get_sb_single+0x26/0x8c
  [<c0157608>] compare_single+0x0/0x8
  [<c01572bc>] set_anon_super+0x0/0xb8
  [<c0181c1d>] sysfs_get_sb+0x19/0x1d
  [<c0181b60>] sysfs_fill_super+0x0/0xa4
  [<c01576ea>] do_kern_mount+0x4e/0xd0
  [<c015777d>] kern_mount+0x11/0x15
  [<c0409962>] sysfs_init+0x1e/0x50
  [<c0409430>] mnt_init+0xb4/0xc0
  [<c040917a>] vfs_caches_init+0x7e/0x94
  [<c03fa831>] start_kernel+0x12d/0x150
Code: c7 0f 75 f5 f0 ff 4d 04 eb 08 0f 0b 85 00 88 c1 2d c0 8b 45 00 89 
ea f6 c4 80 74 07 8b 55 0c 8d 74 26 00 8b 42 04 83 f8 ff 75 08 <0f> 0b 
88 00 88 c1 2d c0 a1 ac 6c 34 c0 a8 08 0f 84 aa 00 00 00

I'm tracking down now to see exactly what's going on.  This just a 
regular, plain 4-way x86 box with 4GB of RAM.  Removing that BUG_ON() 
lets me boot just fine.

kpte: c000fff8
address: c17ff000
kpte_page: c10001e0
pgprot_val(prot): 00000163
pgprot_val(PAGE_KERNEL): 00000163
(pte_val(*kpte) & _PAGE_PSE): 00000000

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2005-02-15 13:15 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-10-28 19:21 fix iounmap and a pageattr memleak (x86 and x86-64) Andrea Arcangeli
2004-11-05  8:07 ` Andrea Arcangeli
2004-11-05  8:31   ` Andi Kleen
2004-11-05  8:49     ` Andrea Arcangeli
2005-02-14 23:15       ` Andrea Arcangeli
2005-02-15 10:39         ` Andi Kleen
2005-02-15 10:48           ` Andrea Arcangeli
2005-02-15 10:51             ` Andi Kleen
2005-02-15 11:11               ` Andrea Arcangeli
2005-02-15 13:14                 ` Hugh Dickins
  -- strict thread matches above, loose matches on Subject: below --
2004-11-02 21:21 Dave Hansen
2004-11-02 22:07 ` Andrea Arcangeli
2004-11-02 22:21   ` Dave Hansen
2004-11-02 22:29     ` Andrew Morton
2004-11-02 22:34       ` Dave Hansen
2004-11-03  0:54     ` Andrea Arcangeli
2004-11-02 22:45   ` Dave Hansen
2004-11-02 23:00     ` Dave Hansen
2004-11-03  1:35       ` Andrea Arcangeli
2004-11-03  1:43         ` Dave Hansen
2004-11-03  2:26           ` Andrea Arcangeli
2004-11-03  2:48             ` Dave Hansen
2004-11-03  3:05               ` Andrea Arcangeli
2004-11-03 19:37                 ` Dave Hansen
2004-11-05  0:02                 ` Dave Hansen
2004-11-05  0:40                   ` Dave Hansen
2004-11-05  0:53                     ` Andrea Arcangeli
2004-11-05  1:55                       ` Dave Hansen
2004-11-05  2:08                         ` Andrea Arcangeli
2004-11-05  2:23                           ` Dave Hansen
2004-11-05  4:03                             ` Andrea Arcangeli
2004-11-05  4:20                               ` Andrea Arcangeli
2004-11-02 23:04     ` Andrew Morton
2004-11-03  1:40       ` Andrea Arcangeli
2004-11-02 22:34 ` Jason Baron
2004-11-02 23:12   ` Andrea Arcangeli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox