From: Pavel Tatashin <pasha.tatashin@oracle.com>
To: linux-kernel@vger.kernel.org, sparclinux@vger.kernel.org,
	linux-mm@kvack.org, linuxppc-dev@lists.ozlabs.org,
	linux-s390@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
	x86@kernel.org, kasan-dev@googlegroups.com,
	borntraeger@de.ibm.com, heiko.carstens@de.ibm.com,
	davem@davemloft.net, willy@infradead.org, mhocko@kernel.org
Subject: [v4 04/15] mm: discard memblock data later
Date: Wed,  2 Aug 2017 16:38:13 -0400	[thread overview]
Message-ID: <1501706304-869240-5-git-send-email-pasha.tatashin@oracle.com> (raw)
In-Reply-To: <1501706304-869240-1-git-send-email-pasha.tatashin@oracle.com>
There is existing use after free bug when deferred struct pages are
enabled:
The memblock_add() allocates memory for the memory array if more than
128 entries are needed.  See comment in e820__memblock_setup():
  * The bootstrap memblock region count maximum is 128 entries
  * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries
  * than that - so allow memblock resizing.
This memblock memory is freed here:
        free_low_memory_core_early()
We access the freed memblock.memory later in boot when deferred pages are
initialized in this path:
        deferred_init_memmap()
                for_each_mem_pfn_range()
                  __next_mem_pfn_range()
                    type = &memblock.memory;
One possible explanation for why this use-after-free hasn't been hit
before is that the limit of INIT_MEMBLOCK_REGIONS has never been exceeded
at least on systems where deferred struct pages were enabled.
Another reason why we want this problem fixed in this patch series is,
in the next patch, we will need to access memblock.reserved from
deferred_init_memmap().
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
---
 include/linux/memblock.h |  7 +++++--
 mm/memblock.c            | 38 +++++++++++++++++---------------------
 mm/nobootmem.c           | 16 ----------------
 mm/page_alloc.c          |  2 ++
 4 files changed, 24 insertions(+), 39 deletions(-)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 77d427974f57..c89d16c88512 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -61,9 +61,11 @@ extern int memblock_debug;
 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
 #define __init_memblock __meminit
 #define __initdata_memblock __meminitdata
+void memblock_discard(void);
 #else
 #define __init_memblock
 #define __initdata_memblock
+#define memblock_discard()
 #endif
 
 #define memblock_dbg(fmt, ...) \
@@ -74,8 +76,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
 					int nid, ulong flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
-phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
-phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr);
 void memblock_allow_resize(void);
 int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
 int memblock_add(phys_addr_t base, phys_addr_t size);
@@ -110,6 +110,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
 void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
 				phys_addr_t *out_end);
 
+void __memblock_free_early(phys_addr_t base, phys_addr_t size);
+void __memblock_free_late(phys_addr_t base, phys_addr_t size);
+
 /**
  * for_each_mem_range - iterate through memblock areas from type_a and not
  * included in type_b. Or just type_a if type_b is NULL.
diff --git a/mm/memblock.c b/mm/memblock.c
index 2cb25fe4452c..3a2707914064 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
 }
 
 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
-
-phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
-					phys_addr_t *addr)
-{
-	if (memblock.reserved.regions == memblock_reserved_init_regions)
-		return 0;
-
-	*addr = __pa(memblock.reserved.regions);
-
-	return PAGE_ALIGN(sizeof(struct memblock_region) *
-			  memblock.reserved.max);
-}
-
-phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
-					phys_addr_t *addr)
+/**
+ * Discard memory and reserved arrays if they were allocated
+ */
+void __init_memblock memblock_discard(void)
 {
-	if (memblock.memory.regions == memblock_memory_init_regions)
-		return 0;
+	phys_addr_t addr, size;
 
-	*addr = __pa(memblock.memory.regions);
+	if (memblock.reserved.regions != memblock_reserved_init_regions) {
+		addr = __pa(memblock.reserved.regions);
+		size = PAGE_ALIGN(sizeof(struct memblock_region) *
+				  memblock.reserved.max);
+		__memblock_free_late(addr, size);
+	}
 
-	return PAGE_ALIGN(sizeof(struct memblock_region) *
-			  memblock.memory.max);
+	if (memblock.memory.regions == memblock_memory_init_regions) {
+		addr = __pa(memblock.memory.regions);
+		size = PAGE_ALIGN(sizeof(struct memblock_region) *
+				  memblock.memory.max);
+		__memblock_free_late(addr, size);
+	}
 }
-
 #endif
 
 /**
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 36454d0f96ee..3637809a18d0 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void)
 				NULL)
 		count += __free_memory_core(start, end);
 
-#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
-	{
-		phys_addr_t size;
-
-		/* Free memblock.reserved array if it was allocated */
-		size = get_allocated_memblock_reserved_regions_info(&start);
-		if (size)
-			count += __free_memory_core(start, start + size);
-
-		/* Free memblock.memory array if it was allocated */
-		size = get_allocated_memblock_memory_regions_info(&start);
-		if (size)
-			count += __free_memory_core(start, start + size);
-	}
-#endif
-
 	return count;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6d30e914afb6..87fb35ac0b87 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1584,6 +1584,8 @@ void __init page_alloc_init_late(void)
 	/* Reinit limits that are based on free pages after the kernel is up */
 	files_maxfiles_init();
 #endif
+	/* Discard memblock private memory */
+	memblock_discard();
 
 	for_each_populated_zone(zone)
 		set_zone_contiguous(zone);
-- 
2.13.3
next prev parent reply	other threads:[~2017-08-02 20:39 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-02 20:38 [v4 00/15] complete deferred page initialization Pavel Tatashin
2017-08-02 20:38 ` [v4 01/15] x86/mm: reserve only exiting low pages Pavel Tatashin
2017-08-02 20:38 ` [v4 02/15] x86/mm: setting fields in deferred pages Pavel Tatashin
2017-08-02 20:38 ` [v4 03/15] sparc64/mm: " Pavel Tatashin
2017-08-02 20:38 ` Pavel Tatashin [this message]
2017-08-03  4:29   ` [v4 04/15] mm: discard memblock data later kbuild test robot
2017-08-02 20:38 ` [v4 05/15] mm: don't accessed uninitialized struct pages Pavel Tatashin
2017-08-02 20:38 ` [v4 06/15] sparc64: simplify vmemmap_populate Pavel Tatashin
2017-08-02 20:38 ` [v4 07/15] mm: defining memblock_virt_alloc_try_nid_raw Pavel Tatashin
2017-08-02 20:38 ` [v4 08/15] mm: zero struct pages during initialization Pavel Tatashin
2017-08-02 20:38 ` [v4 09/15] sparc64: optimized struct page zeroing Pavel Tatashin
2017-08-03  5:15   ` kbuild test robot
2017-08-02 20:38 ` [v4 10/15] x86/kasan: explicitly zero kasan shadow memory Pavel Tatashin
2017-08-02 20:38 ` [v4 11/15] arm64/kasan: " Pavel Tatashin
2017-08-02 20:38 ` [v4 12/15] mm: explicitly zero pagetable memory Pavel Tatashin
2017-08-02 20:38 ` [v4 13/15] mm: stop zeroing memory during allocation in vmemmap Pavel Tatashin
2017-08-03  4:46   ` kbuild test robot
2017-08-02 20:38 ` [v4 14/15] mm: optimize early system hash allocations Pavel Tatashin
2017-08-03  4:29   ` kbuild test robot
2017-08-02 20:38 ` [v4 15/15] mm: debug for raw alloctor Pavel Tatashin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox
  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):
  git send-email \
    --in-reply-to=1501706304-869240-5-git-send-email-pasha.tatashin@oracle.com \
    --to=pasha.tatashin@oracle.com \
    --cc=borntraeger@de.ibm.com \
    --cc=davem@davemloft.net \
    --cc=heiko.carstens@de.ibm.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mhocko@kernel.org \
    --cc=sparclinux@vger.kernel.org \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY
  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
  Be sure your reply has a Subject: header at the top and a blank line
  before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).