linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Pavel Tatashin <pasha.tatashin@oracle.com>
To: linux-kernel@vger.kernel.org, sparclinux@vger.kernel.org,
	linux-mm@kvack.org, linuxppc-dev@lists.ozlabs.org,
	linux-s390@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
	x86@kernel.org, kasan-dev@googlegroups.com,
	borntraeger@de.ibm.com, heiko.carstens@de.ibm.com,
	davem@davemloft.net, willy@infradead.org, mhocko@kernel.org
Subject: [v4 05/15] mm: don't accessed uninitialized struct pages
Date: Wed,  2 Aug 2017 16:38:14 -0400	[thread overview]
Message-ID: <1501706304-869240-6-git-send-email-pasha.tatashin@oracle.com> (raw)
In-Reply-To: <1501706304-869240-1-git-send-email-pasha.tatashin@oracle.com>

In deferred_init_memmap() where all deferred struct pages are initialized
we have a check like this:

    if (page->flags) {
            VM_BUG_ON(page_zone(page) != zone);
            goto free_range;
    }

This way we are checking if the current deferred page has already been
initialized. It works, because memory for struct pages has been zeroed, and
the only way flags are not zero if it went through __init_single_page()
before.  But, once we change the current behavior and won't zero the memory
in memblock allocator, we cannot trust anything inside "struct page"es
until they are initialized. This patch fixes this.

This patch defines a new accessor memblock_get_reserved_pfn_range()
which returns successive ranges of reserved PFNs.  deferred_init_memmap()
calls it to determine if a PFN and its struct page has already been
initialized.

Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
---
 include/linux/memblock.h |  3 +++
 mm/memblock.c            | 54 ++++++++++++++++++++++++++++++++++++++++++------
 mm/page_alloc.c          | 11 +++++++++-
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c89d16c88512..9d8dabedf5ba 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -321,6 +321,9 @@ int memblock_is_map_memory(phys_addr_t addr);
 int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
 bool memblock_is_reserved(phys_addr_t addr);
 bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
+void memblock_get_reserved_pfn_range(unsigned long pfn,
+				     unsigned long *pfn_start,
+				     unsigned long *pfn_end);
 
 extern void __memblock_dump_all(void);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 3a2707914064..e6df054e3180 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1580,7 +1580,13 @@ void __init memblock_mem_limit_remove_map(phys_addr_t limit)
 	memblock_cap_memory_range(0, max_addr);
 }
 
-static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
+/**
+ * Return index in regions array if addr is within the region. Otherwise
+ * return -1. If -1 is returned and *next_idx is not %NULL, sets it to the
+ * next region index or -1 if there is none.
+ */
+static int __init_memblock memblock_search(struct memblock_type *type,
+					   phys_addr_t addr, int *next_idx)
 {
 	unsigned int left = 0, right = type->cnt;
 
@@ -1595,22 +1601,26 @@ static int __init_memblock memblock_search(struct memblock_type *type, phys_addr
 		else
 			return mid;
 	} while (left < right);
+
+	if (next_idx)
+		*next_idx = (right == type->cnt) ? -1 : right;
+
 	return -1;
 }
 
 bool __init memblock_is_reserved(phys_addr_t addr)
 {
-	return memblock_search(&memblock.reserved, addr) != -1;
+	return memblock_search(&memblock.reserved, addr, NULL) != -1;
 }
 
 bool __init_memblock memblock_is_memory(phys_addr_t addr)
 {
-	return memblock_search(&memblock.memory, addr) != -1;
+	return memblock_search(&memblock.memory, addr, NULL) != -1;
 }
 
 int __init_memblock memblock_is_map_memory(phys_addr_t addr)
 {
-	int i = memblock_search(&memblock.memory, addr);
+	int i = memblock_search(&memblock.memory, addr, NULL);
 
 	if (i == -1)
 		return false;
@@ -1622,7 +1632,7 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
 			 unsigned long *start_pfn, unsigned long *end_pfn)
 {
 	struct memblock_type *type = &memblock.memory;
-	int mid = memblock_search(type, PFN_PHYS(pfn));
+	int mid = memblock_search(type, PFN_PHYS(pfn), NULL);
 
 	if (mid == -1)
 		return -1;
@@ -1646,7 +1656,7 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
  */
 int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
 {
-	int idx = memblock_search(&memblock.memory, base);
+	int idx = memblock_search(&memblock.memory, base, NULL);
 	phys_addr_t end = base + memblock_cap_size(base, &size);
 
 	if (idx == -1)
@@ -1656,6 +1666,38 @@ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size
 }
 
 /**
+ * memblock_get_reserved_pfn_range - search for the next reserved region
+ *
+ * @pfn: start searching from this pfn.
+ *
+ * RETURNS:
+ * [start_pfn, end_pfn), where start_pfn >= pfn. If none is found
+ * start_pfn, and end_pfn are both set to ULONG_MAX.
+ */
+void __init_memblock memblock_get_reserved_pfn_range(unsigned long pfn,
+						     unsigned long *start_pfn,
+						     unsigned long *end_pfn)
+{
+	struct memblock_type *type = &memblock.reserved;
+	int next_idx, idx;
+
+	idx = memblock_search(type, PFN_PHYS(pfn), &next_idx);
+	if (idx == -1 && next_idx == -1) {
+		*start_pfn = ULONG_MAX;
+		*end_pfn = ULONG_MAX;
+		return;
+	}
+
+	if (idx == -1) {
+		idx = next_idx;
+		*start_pfn = PFN_DOWN(type->regions[idx].base);
+	} else {
+		*start_pfn = pfn;
+	}
+	*end_pfn = PFN_DOWN(type->regions[idx].base + type->regions[idx].size);
+}
+
+/**
  * memblock_is_region_reserved - check if a region intersects reserved memory
  * @base: base of region to check
  * @size: size of region to check
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 87fb35ac0b87..99b9e2e06319 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1447,6 +1447,7 @@ static int __init deferred_init_memmap(void *data)
 	pg_data_t *pgdat = data;
 	int nid = pgdat->node_id;
 	struct mminit_pfnnid_cache nid_init_state = { };
+	unsigned long resv_start_pfn = 0, resv_end_pfn = 0;
 	unsigned long start = jiffies;
 	unsigned long nr_pages = 0;
 	unsigned long walk_start, walk_end;
@@ -1491,6 +1492,10 @@ static int __init deferred_init_memmap(void *data)
 			pfn = zone->zone_start_pfn;
 
 		for (; pfn < end_pfn; pfn++) {
+			if (pfn >= resv_end_pfn)
+				memblock_get_reserved_pfn_range(pfn,
+								&resv_start_pfn,
+								&resv_end_pfn);
 			if (!pfn_valid_within(pfn))
 				goto free_range;
 
@@ -1524,7 +1529,11 @@ static int __init deferred_init_memmap(void *data)
 				cond_resched();
 			}
 
-			if (page->flags) {
+			/*
+			 * Check if this page has already been initialized due
+			 * to being reserved during boot in memblock.
+			 */
+			if (pfn >= resv_start_pfn) {
 				VM_BUG_ON(page_zone(page) != zone);
 				goto free_range;
 			}
-- 
2.13.3

  parent reply	other threads:[~2017-08-02 20:39 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-02 20:38 [v4 00/15] complete deferred page initialization Pavel Tatashin
2017-08-02 20:38 ` [v4 01/15] x86/mm: reserve only exiting low pages Pavel Tatashin
2017-08-02 20:38 ` [v4 02/15] x86/mm: setting fields in deferred pages Pavel Tatashin
2017-08-02 20:38 ` [v4 03/15] sparc64/mm: " Pavel Tatashin
2017-08-02 20:38 ` [v4 04/15] mm: discard memblock data later Pavel Tatashin
2017-08-03  4:29   ` kbuild test robot
2017-08-02 20:38 ` Pavel Tatashin [this message]
2017-08-02 20:38 ` [v4 06/15] sparc64: simplify vmemmap_populate Pavel Tatashin
2017-08-02 20:38 ` [v4 07/15] mm: defining memblock_virt_alloc_try_nid_raw Pavel Tatashin
2017-08-02 20:38 ` [v4 08/15] mm: zero struct pages during initialization Pavel Tatashin
2017-08-02 20:38 ` [v4 09/15] sparc64: optimized struct page zeroing Pavel Tatashin
2017-08-03  5:15   ` kbuild test robot
2017-08-02 20:38 ` [v4 10/15] x86/kasan: explicitly zero kasan shadow memory Pavel Tatashin
2017-08-02 20:38 ` [v4 11/15] arm64/kasan: " Pavel Tatashin
2017-08-02 20:38 ` [v4 12/15] mm: explicitly zero pagetable memory Pavel Tatashin
2017-08-02 20:38 ` [v4 13/15] mm: stop zeroing memory during allocation in vmemmap Pavel Tatashin
2017-08-03  4:46   ` kbuild test robot
2017-08-02 20:38 ` [v4 14/15] mm: optimize early system hash allocations Pavel Tatashin
2017-08-03  4:29   ` kbuild test robot
2017-08-02 20:38 ` [v4 15/15] mm: debug for raw alloctor Pavel Tatashin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1501706304-869240-6-git-send-email-pasha.tatashin@oracle.com \
    --to=pasha.tatashin@oracle.com \
    --cc=borntraeger@de.ibm.com \
    --cc=davem@davemloft.net \
    --cc=heiko.carstens@de.ibm.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mhocko@kernel.org \
    --cc=sparclinux@vger.kernel.org \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).