All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wanpeng Li <liwanp@linux.vnet.ibm.com>
To: Tang Chen <tangchen@cn.fujitsu.com>
Cc: rjw@sisk.pl, lenb@kernel.org, tglx@linutronix.de, mingo@elte.hu,
	hpa@zytor.com, akpm@linux-foundation.org, tj@kernel.org,
	trenn@suse.de, yinghai@kernel.org, jiang.liu@huawei.com,
	wency@cn.fujitsu.com, laijs@cn.fujitsu.com,
	isimatu.yasuaki@jp.fujitsu.com, izumi.taku@jp.fujitsu.com,
	mgorman@suse.de, minchan@kernel.org, mina86@mina86.com,
	gong.chen@linux.intel.com, vasilis.liaskovitis@profitbricks.com,
	lwoodman@redhat.com, riel@redhat.com, jweiner@redhat.com,
	prarit@redhat.com, zhangyanfei@cn.fujitsu.com, x86@kernel.org,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, linux-acpi@vger.kernel.org
Subject: Re: [PATCH 10/11] x86, mem-hotplug: Support initialize page tables from low to high.
Date: Thu, 5 Sep 2013 21:30:27 +0800	[thread overview]
Message-ID: <20130905133027.GA23038@hacker.(null)> (raw)
In-Reply-To: <1377596268-31552-11-git-send-email-tangchen@cn.fujitsu.com>

Hi Tang,
On Tue, Aug 27, 2013 at 05:37:47PM +0800, Tang Chen wrote:
>init_mem_mapping() is called before SRAT is parsed. And memblock will allocate
>memory for page tables. To prevent page tables being allocated within hotpluggable
>memory, we will allocate page tables from the end of kernel image to the higher
>memory.
>
>The order of page tables allocation is controled by movablenode boot option.
>Since the default behavior of page tables initialization procedure is allocate
>page tables from top of the memory downwards, if users don't specify movablenode
>boot option, the kernel will behave as before.
>
>Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
>Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
>---
> arch/x86/mm/init.c |  119 +++++++++++++++++++++++++++++++++++++++------------
> 1 files changed, 91 insertions(+), 28 deletions(-)
>
>diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
>index 793204b..f004d8e 100644
>--- a/arch/x86/mm/init.c
>+++ b/arch/x86/mm/init.c
>@@ -407,13 +407,77 @@ static unsigned long __init init_range_memory_mapping(
>
> /* (PUD_SHIFT-PMD_SHIFT)/2 */
> #define STEP_SIZE_SHIFT 5
>-void __init init_mem_mapping(void)
>+
>+#ifdef CONFIG_MOVABLE_NODE
>+/**
>+ * memory_map_from_low - Map [start, end) from low to high
>+ * @start: start address of the target memory range
>+ * @end: end address of the target memory range
>+ *
>+ * This function will setup direct mapping for memory range [start, end) in a
>+ * heuristic way. In the beginning, step_size is small. The more memory we map
>+ * memory in the next loop.
>+ */
>+static void __init memory_map_from_low(unsigned long start, unsigned long end)
>+{
>+	unsigned long next, new_mapped_ram_size;
>+	unsigned long mapped_ram_size = 0;
>+	/* step_size need to be small so pgt_buf from BRK could cover it */
>+	unsigned long step_size = PMD_SIZE;
>+
>+	while (start < end) {
>+		if (end - start > step_size) {
>+			next = round_up(start + 1, step_size);
>+			if (next > end)
>+				next = end;
>+		} else
>+			next = end;
>+
>+		new_mapped_ram_size = init_range_memory_mapping(start, next);
>+		start = next;
>+
>+		if (new_mapped_ram_size > mapped_ram_size)
>+			step_size <<= STEP_SIZE_SHIFT;
>+		mapped_ram_size += new_mapped_ram_size;
>+	}
>+}
>+#endif /* CONFIG_MOVABLE_NODE */
>+
>+/**
>+ * memory_map_from_high - Map [start, end) from high to low
>+ * @start: start address of the target memory range
>+ * @end: end address of the target memory range
>+ *
>+ * This function is similar to memory_map_from_low() except it maps memory
>+ * from high to low.
>+ */
>+static void __init memory_map_from_high(unsigned long start, unsigned long end)
> {
>-	unsigned long end, real_end, start, last_start;
>-	unsigned long step_size;
>-	unsigned long addr;
>+	unsigned long prev, new_mapped_ram_size;
> 	unsigned long mapped_ram_size = 0;
>-	unsigned long new_mapped_ram_size;
>+	/* step_size need to be small so pgt_buf from BRK could cover it */
>+	unsigned long step_size = PMD_SIZE;
>+
>+	while (start < end) {
>+		if (end > step_size) {
>+			prev = round_down(end - 1, step_size);
>+			if (prev < start)
>+				prev = start;
>+		} else
>+			prev = start;
>+
>+		new_mapped_ram_size = init_range_memory_mapping(prev, end);
>+		end = prev;
>+
>+		if (new_mapped_ram_size > mapped_ram_size)
>+			step_size <<= STEP_SIZE_SHIFT;
>+		mapped_ram_size += new_mapped_ram_size;
>+	}
>+}
>+
>+void __init init_mem_mapping(void)
>+{
>+	unsigned long end;
>
> 	probe_page_size_mask();
>
>@@ -423,44 +487,43 @@ void __init init_mem_mapping(void)
> 	end = max_low_pfn << PAGE_SHIFT;
> #endif
>
>-	/* the ISA range is always mapped regardless of memory holes */
>-	init_memory_mapping(0, ISA_END_ADDRESS);
>+	max_pfn_mapped = 0; /* will get exact value next */
>+	min_pfn_mapped = end >> PAGE_SHIFT;
>+
>+#ifdef CONFIG_MOVABLE_NODE
>+	unsigned long kernel_end;
>+
>+	if (movablenode_enable_srat &&
>+	    memblock.current_order == MEMBLOCK_ORDER_LOW_TO_HIGH) {

I think memblock.current_order == MEMBLOCK_ORDER_LOW_TO_HIGH is always
true if config MOVABLE_NODE and movablenode_enable_srat == true if PATCH
11/11 is applied.

>+		kernel_end = round_up(__pa_symbol(_end), PMD_SIZE);
>+
>+		memory_map_from_low(kernel_end, end);
>+		memory_map_from_low(ISA_END_ADDRESS, kernel_end);

Why split ISA_END_ADDRESS ~ end? 

>+		goto out;
>+	}
>+#endif /* CONFIG_MOVABLE_NODE */
>+
>+	unsigned long addr, real_end;
>
> 	/* xen has big range in reserved near end of ram, skip it at first.*/
> 	addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE);
> 	real_end = addr + PMD_SIZE;
>
>-	/* step_size need to be small so pgt_buf from BRK could cover it */
>-	step_size = PMD_SIZE;
>-	max_pfn_mapped = 0; /* will get exact value next */
>-	min_pfn_mapped = real_end >> PAGE_SHIFT;
>-	last_start = start = real_end;
>-
> 	/*
> 	 * We start from the top (end of memory) and go to the bottom.
> 	 * The memblock_find_in_range() gets us a block of RAM from the
> 	 * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages
> 	 * for page table.
> 	 */
>-	while (last_start > ISA_END_ADDRESS) {
>-		if (last_start > step_size) {
>-			start = round_down(last_start - 1, step_size);
>-			if (start < ISA_END_ADDRESS)
>-				start = ISA_END_ADDRESS;
>-		} else
>-			start = ISA_END_ADDRESS;
>-		new_mapped_ram_size = init_range_memory_mapping(start,
>-							last_start);
>-		last_start = start;
>-		/* only increase step_size after big range get mapped */
>-		if (new_mapped_ram_size > mapped_ram_size)
>-			step_size <<= STEP_SIZE_SHIFT;
>-		mapped_ram_size += new_mapped_ram_size;
>-	}

I think the variables sorted by address is:
ISA_END_ADDRESS -> _end -> real_end -> end 

>+	memory_map_from_high(ISA_END_ADDRESS, real_end);

If this is overlap with work done between #ifdef CONFIG_MOVABLE_NODE and
#endif?

Regards,
Wanpeng LI 

>
> 	if (real_end < end)
> 		init_range_memory_mapping(real_end, end);
>
>+out:
>+	/* the ISA range is always mapped regardless of memory holes */
>+	init_memory_mapping(0, ISA_END_ADDRESS);
>+
> #ifdef CONFIG_X86_64
> 	if (max_pfn > max_low_pfn) {
> 		/* can we preseve max_low_pfn ?*/
>-- 
>1.7.1
>
>--
>To unsubscribe, send a message with 'unsubscribe linux-mm' in
>the body to majordomo@kvack.org.  For more info on Linux MM,
>see: http://www.linux-mm.org/ .
>Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-09-05 13:30 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-27  9:37 [PATCH 00/11] x86, memblock: Allocate memory near kernel image before SRAT parsed Tang Chen
2013-08-27  9:37 ` Tang Chen
2013-08-27  9:37 ` [PATCH 01/11] memblock: Rename current_limit to current_limit_high in memblock Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-08-27  9:37 ` [PATCH 02/11] memblock: Rename memblock_set_current_limit() to memblock_set_current_limit_high() Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-08-27  9:37 ` [PATCH 03/11] memblock: Introduce lowest limit in memblock Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-08-27  9:37 ` [PATCH 04/11] memblock: Introduce memblock_set_current_limit_low() to set lower limit of memblock Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-08-27  9:37 ` [PATCH 05/11] memblock: Introduce allocation order to memblock Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-09-05  9:16   ` Wanpeng Li
2013-09-05  9:16   ` Wanpeng Li
2013-09-05  9:21     ` Tang Chen
2013-09-05  9:21       ` Tang Chen
2013-09-05  9:27       ` Wanpeng Li
2013-09-05  9:27       ` Wanpeng Li
2013-08-27  9:37 ` [PATCH 06/11] memblock: Improve memblock to support allocation from lower address Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-09-04  0:24   ` Toshi Kani
2013-09-04  0:24     ` Toshi Kani
2013-09-04  1:00     ` Tang Chen
2013-09-04  1:00       ` Tang Chen
2013-08-27  9:37 ` [PATCH 07/11] x86, memblock: Set lowest limit for memblock_alloc_base_nid() Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-09-04  0:37   ` Toshi Kani
2013-09-04  0:37     ` Toshi Kani
2013-09-04  2:05     ` Tang Chen
2013-09-04  2:05       ` Tang Chen
2013-09-04 15:22       ` Toshi Kani
2013-09-04 15:22         ` Toshi Kani
2013-08-27  9:37 ` [PATCH 08/11] x86, acpi, memblock: Use __memblock_alloc_base() in acpi_initrd_override() Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-08-28  0:04   ` Rafael J. Wysocki
2013-08-28  0:04     ` Rafael J. Wysocki
2013-08-27  9:37 ` [PATCH 09/11] mem-hotplug: Introduce movablenode boot option to {en|dis}able using SRAT Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-08-27  9:37 ` [PATCH 10/11] x86, mem-hotplug: Support initialize page tables from low to high Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-09-05 13:30   ` Wanpeng Li [this message]
2013-09-06  1:34     ` Tang Chen
2013-09-06  1:34       ` Tang Chen
2013-09-06  2:16       ` Wanpeng Li
2013-09-06  3:09         ` Tang Chen
2013-09-06  3:09           ` Tang Chen
2013-09-06  2:16       ` Wanpeng Li
2013-09-05 13:30   ` Wanpeng Li
2013-08-27  9:37 ` [PATCH 11/11] x86, mem_hotplug: Allocate memory near kernel image before SRAT is parsed Tang Chen
2013-08-27  9:37   ` Tang Chen
2013-09-04 19:40   ` Toshi Kani
2013-09-04 19:40     ` Toshi Kani
2013-08-28  8:03 ` [PATCH 00/11] x86, memblock: Allocate memory near kernel image before SRAT parsed Wanpeng Li
2013-08-28  8:03 ` Wanpeng Li
2013-08-28  9:34   ` Tang Chen
2013-08-28  9:34     ` Tang Chen
2013-08-28 15:19 ` Tejun Heo
2013-08-28 15:19   ` Tejun Heo
2013-08-29  1:30   ` Tang Chen
2013-08-29  1:30     ` Tang Chen
2013-08-29  1:36     ` Wanpeng Li
2013-08-29  1:53       ` Tang Chen
2013-08-29  1:53         ` Tang Chen
2013-08-29  1:36     ` Wanpeng Li
2013-09-02  1:03 ` Tang Chen
2013-09-02  1:03   ` Tang Chen
2013-09-04 19:22 ` Tejun Heo
2013-09-04 19:22   ` Tejun Heo
2013-09-05  9:01   ` Tang Chen
2013-09-05  9:01     ` Tang Chen
2013-09-06  8:58   ` Wanpeng Li
2013-09-06  8:58   ` Wanpeng Li
     [not found]   ` <52299935.0302450a.26c9.ffffb240SMTPIN_ADDED_BROKEN@mx.google.com>
2013-09-06 15:15     ` Tejun Heo
2013-09-06 15:15       ` Tejun Heo
2013-09-06 15:47       ` H. Peter Anvin
2013-09-06 15:47         ` H. Peter Anvin
2013-09-09 12:04         ` Wanpeng Li
2013-09-09 12:04         ` Wanpeng Li
2013-09-09 11:56       ` Wanpeng Li
2013-09-09 11:56       ` Wanpeng Li
     [not found]       ` <522db781.22ab440a.41b1.ffffd825SMTPIN_ADDED_BROKEN@mx.google.com>
2013-09-09 13:58         ` Tejun Heo
2013-09-09 13:58           ` Tejun Heo
2013-09-09 23:58           ` Wanpeng Li
2013-09-09 23:58           ` Wanpeng Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='20130905133027.GA23038@hacker.(null)' \
    --to=liwanp@linux.vnet.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=gong.chen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=isimatu.yasuaki@jp.fujitsu.com \
    --cc=izumi.taku@jp.fujitsu.com \
    --cc=jiang.liu@huawei.com \
    --cc=jweiner@redhat.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lwoodman@redhat.com \
    --cc=mgorman@suse.de \
    --cc=mina86@mina86.com \
    --cc=minchan@kernel.org \
    --cc=mingo@elte.hu \
    --cc=prarit@redhat.com \
    --cc=riel@redhat.com \
    --cc=rjw@sisk.pl \
    --cc=tangchen@cn.fujitsu.com \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=trenn@suse.de \
    --cc=vasilis.liaskovitis@profitbricks.com \
    --cc=wency@cn.fujitsu.com \
    --cc=x86@kernel.org \
    --cc=yinghai@kernel.org \
    --cc=zhangyanfei@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.