From: Tang Chen <tangchen@cn.fujitsu.com>
To: tj@kernel.org, rjw@sisk.pl, lenb@kernel.org, tglx@linutronix.de,
mingo@elte.hu, hpa@zytor.com, akpm@linux-foundation.org,
trenn@suse.de, yinghai@kernel.org, jiang.liu@huawei.com,
wency@cn.fujitsu.com, laijs@cn.fujitsu.com,
isimatu.yasuaki@jp.fujitsu.com, izumi.taku@jp.fujitsu.com,
mgorman@suse.de, minchan@kernel.org, mina86@mina86.com,
gong.chen@linux.intel.com, vasilis.liaskovitis@profitbricks.com,
lwoodman@redhat.com, riel@redhat.com, jweiner@redhat.com,
prarit@redhat.com, zhangyanfei@cn.fujitsu.com, toshi.kani@hp.com
Cc: x86@kernel.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-acpi@vger.kernel.org
Subject: [PATCH v2 8/9] x86, mem-hotplug: Support initialize page tables from low to high.
Date: Wed, 11 Sep 2013 18:07:36 +0800 [thread overview]
Message-ID: <1378894057-30946-9-git-send-email-tangchen@cn.fujitsu.com> (raw)
In-Reply-To: <1378894057-30946-1-git-send-email-tangchen@cn.fujitsu.com>
init_mem_mapping() is called before SRAT is parsed. And memblock will allocate
memory for page tables. To prevent page tables being allocated within hotpluggable
memory, we will allocate page tables from the end of kernel image to the higher
memory.
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
---
arch/x86/mm/init.c | 133 ++++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 104 insertions(+), 29 deletions(-)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 04664cd..7dae4e3 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -54,11 +54,23 @@ __ref void *alloc_low_pages(unsigned int num)
unsigned long ret;
if (min_pfn_mapped >= max_pfn_mapped)
panic("alloc_low_page: ran out of memory");
+
+ if (memblock_direction_bottom_up()) {
+ ret = memblock_alloc_bottom_up(
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ max_pfn_mapped << PAGE_SHIFT,
+ PAGE_SIZE * num, PAGE_SIZE);
+ if (ret)
+ goto reserve;
+ }
+
ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
max_pfn_mapped << PAGE_SHIFT,
PAGE_SIZE * num , PAGE_SIZE);
if (!ret)
panic("alloc_low_page: can not alloc memory");
+
+reserve:
memblock_reserve(ret, PAGE_SIZE * num);
pfn = ret >> PAGE_SHIFT;
} else {
@@ -401,13 +413,79 @@ static unsigned long __init init_range_memory_mapping(
/* (PUD_SHIFT-PMD_SHIFT)/2 */
#define STEP_SIZE_SHIFT 5
-void __init init_mem_mapping(void)
+
+#ifdef CONFIG_MOVABLE_NODE
+/**
+ * memory_map_from_low - Map [start, end) from low to high
+ * @start: start address of the target memory range
+ * @end: end address of the target memory range
+ *
+ * This function will setup direct mapping for memory range [start, end) in a
+ * heuristic way. In the beginning, step_size is small. The more memory we map
+ * memory in the next loop.
+ */
+static void __init memory_map_from_low(unsigned long start, unsigned long end)
+{
+ unsigned long next, new_mapped_ram_size;
+ unsigned long mapped_ram_size = 0;
+ /* step_size need to be small so pgt_buf from BRK could cover it */
+ unsigned long step_size = PMD_SIZE;
+
+ while (start < end) {
+ if (end - start > step_size) {
+ next = round_up(start + 1, step_size);
+ if (next > end)
+ next = end;
+ } else
+ next = end;
+
+ new_mapped_ram_size = init_range_memory_mapping(start, next);
+ min_pfn_mapped = start >> PAGE_SHIFT;
+ start = next;
+
+ if (new_mapped_ram_size > mapped_ram_size)
+ step_size <<= STEP_SIZE_SHIFT;
+ mapped_ram_size += new_mapped_ram_size;
+ }
+}
+#endif /* CONFIG_MOVABLE_NODE */
+
+/**
+ * memory_map_from_high - Map [start, end) from high to low
+ * @start: start address of the target memory range
+ * @end: end address of the target memory range
+ *
+ * This function is similar to memory_map_from_low() except it maps memory
+ * from high to low.
+ */
+static void __init memory_map_from_high(unsigned long start, unsigned long end)
{
- unsigned long end, real_end, start, last_start;
- unsigned long step_size;
- unsigned long addr;
+ unsigned long prev, new_mapped_ram_size;
unsigned long mapped_ram_size = 0;
- unsigned long new_mapped_ram_size;
+ /* step_size need to be small so pgt_buf from BRK could cover it */
+ unsigned long step_size = PMD_SIZE;
+
+ while (start < end) {
+ if (end > step_size) {
+ prev = round_down(end - 1, step_size);
+ if (prev < start)
+ prev = start;
+ } else
+ prev = start;
+
+ new_mapped_ram_size = init_range_memory_mapping(prev, end);
+ min_pfn_mapped = prev >> PAGE_SHIFT;
+ end = prev;
+
+ if (new_mapped_ram_size > mapped_ram_size)
+ step_size <<= STEP_SIZE_SHIFT;
+ mapped_ram_size += new_mapped_ram_size;
+ }
+}
+
+void __init init_mem_mapping(void)
+{
+ unsigned long end;
probe_page_size_mask();
@@ -417,45 +495,42 @@ void __init init_mem_mapping(void)
end = max_low_pfn << PAGE_SHIFT;
#endif
- /* the ISA range is always mapped regardless of memory holes */
- init_memory_mapping(0, ISA_END_ADDRESS);
+ max_pfn_mapped = 0; /* will get exact value next */
+ min_pfn_mapped = end >> PAGE_SHIFT;
+
+#ifdef CONFIG_MOVABLE_NODE
+ unsigned long kernel_end;
+
+ if (memblock_direction_bottom_up()) {
+ kernel_end = round_up(__pa_symbol(_end), PMD_SIZE);
+
+ memory_map_from_low(kernel_end, end);
+ memory_map_from_low(ISA_END_ADDRESS, kernel_end);
+ goto out;
+ }
+#endif /* CONFIG_MOVABLE_NODE */
+
+ unsigned long addr, real_end;
/* xen has big range in reserved near end of ram, skip it at first.*/
addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE);
real_end = addr + PMD_SIZE;
- /* step_size need to be small so pgt_buf from BRK could cover it */
- step_size = PMD_SIZE;
- max_pfn_mapped = 0; /* will get exact value next */
- min_pfn_mapped = real_end >> PAGE_SHIFT;
- last_start = start = real_end;
-
/*
* We start from the top (end of memory) and go to the bottom.
* The memblock_find_in_range() gets us a block of RAM from the
* end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages
* for page table.
*/
- while (last_start > ISA_END_ADDRESS) {
- if (last_start > step_size) {
- start = round_down(last_start - 1, step_size);
- if (start < ISA_END_ADDRESS)
- start = ISA_END_ADDRESS;
- } else
- start = ISA_END_ADDRESS;
- new_mapped_ram_size = init_range_memory_mapping(start,
- last_start);
- last_start = start;
- min_pfn_mapped = last_start >> PAGE_SHIFT;
- /* only increase step_size after big range get mapped */
- if (new_mapped_ram_size > mapped_ram_size)
- step_size <<= STEP_SIZE_SHIFT;
- mapped_ram_size += new_mapped_ram_size;
- }
+ memory_map_from_high(ISA_END_ADDRESS, real_end);
if (real_end < end)
init_range_memory_mapping(real_end, end);
+out:
+ /* the ISA range is always mapped regardless of memory holes */
+ init_memory_mapping(0, ISA_END_ADDRESS);
+
#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
/* can we preseve max_low_pfn ?*/
--
1.7.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-09-11 10:05 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-11 10:07 [PATCH v2 0/9] x86, memblock: Allocate memory near kernel image before SRAT parsed Tang Chen
2013-09-11 10:07 ` [PATCH v2 1/9] memblock: Introduce allocation direction to memblock Tang Chen
2013-09-11 10:07 ` [PATCH v2 2/9] x86, memblock: Introduce memblock_alloc_bottom_up() " Tang Chen
2013-09-11 10:07 ` [PATCH v2 3/9] x86, dma: Support allocate memory from bottom upwards in dma_contiguous_reserve() Tang Chen
2013-09-11 10:07 ` [PATCH v2 4/9] x86: Support allocate memory from bottom upwards in setup_log_buf() Tang Chen
2013-09-11 10:07 ` [PATCH v2 5/9] x86: Support allocate memory from bottom upwards in relocate_initrd() Tang Chen
2013-09-11 10:07 ` [PATCH v2 6/9] x86, acpi: Support allocate memory from bottom upwards in acpi_initrd_override() Tang Chen
2013-09-11 10:07 ` [PATCH v2 7/9] x86, acpi, crash, kdump: Do reserve_crashkernel() after SRAT is parsed Tang Chen
2013-09-11 10:07 ` Tang Chen [this message]
2013-09-11 10:07 ` [PATCH v2 9/9] mem-hotplug: Introduce movablenode boot option to control memblock allocation direction Tang Chen
2013-09-11 12:51 ` [PATCH v2 0/9] x86, memblock: Allocate memory near kernel image before SRAT parsed Tejun Heo
2013-09-12 10:06 ` Tang Chen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1378894057-30946-9-git-send-email-tangchen@cn.fujitsu.com \
--to=tangchen@cn.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=gong.chen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=isimatu.yasuaki@jp.fujitsu.com \
--cc=izumi.taku@jp.fujitsu.com \
--cc=jiang.liu@huawei.com \
--cc=jweiner@redhat.com \
--cc=laijs@cn.fujitsu.com \
--cc=lenb@kernel.org \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lwoodman@redhat.com \
--cc=mgorman@suse.de \
--cc=mina86@mina86.com \
--cc=minchan@kernel.org \
--cc=mingo@elte.hu \
--cc=prarit@redhat.com \
--cc=riel@redhat.com \
--cc=rjw@sisk.pl \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=toshi.kani@hp.com \
--cc=trenn@suse.de \
--cc=vasilis.liaskovitis@profitbricks.com \
--cc=wency@cn.fujitsu.com \
--cc=x86@kernel.org \
--cc=yinghai@kernel.org \
--cc=zhangyanfei@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).