All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yinghai Lu <yinghai@kernel.org>
To: Ingo Molnar <mingo@elte.hu>, "H. Peter Anvin" <hpa@zytor.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Christoph Lameter <cl@linux-foundation.org>
Subject: [PATCH 2/2] sparsemem: put mem map for one node together.
Date: Sun, 20 Dec 2009 01:20:28 -0800	[thread overview]
Message-ID: <4B2DEC5C.8000108@kernel.org> (raw)
In-Reply-To: <4B2B4FF9.3090806@kernel.org>


add vmemmap_alloc_block_buf for mem map only.

it will fallback old wayif can not get that big.

it will help system with more memory that use early_res instead of bootmem
that can not handle too many entries

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/mm/init_64.c |    2 
 include/linux/mm.h    |    7 +++
 mm/sparse-vmemmap.c   |   70 +++++++++++++++++++++++++++++++
 mm/sparse.c           |  111 +++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 187 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -961,7 +961,7 @@ vmemmap_populate(struct page *start_page
 			if (pmd_none(*pmd)) {
 				pte_t entry;
 
-				p = vmemmap_alloc_block(PMD_SIZE, node);
+				p = vmemmap_alloc_block_buf(PMD_SIZE, node);
 				if (!p)
 					return -ENOMEM;
 
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1323,12 +1323,19 @@ extern int randomize_va_space;
 const char * arch_vma_name(struct vm_area_struct *vma);
 void print_vma_addr(char *prefix, unsigned long rip);
 
+void sparse_mem_maps_populate_node(struct page **map_map,
+				   unsigned long pnum_begin,
+				   unsigned long pnum_end,
+				   unsigned long map_count,
+				   int nodeid);
+
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
 void *vmemmap_alloc_block(unsigned long size, int node);
+void *vmemmap_alloc_block_buf(unsigned long size, int node);
 void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
 int vmemmap_populate_basepages(struct page *start_page,
 						unsigned long pages, int node);
Index: linux-2.6/mm/sparse-vmemmap.c
===================================================================
--- linux-2.6.orig/mm/sparse-vmemmap.c
+++ linux-2.6/mm/sparse-vmemmap.c
@@ -43,6 +43,8 @@ static void * __init_refok __earlyonly_b
 	return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
 }
 
+static void *buf;
+static void *buf_end;
 
 void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 {
@@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(uns
 				__pa(MAX_DMA_ADDRESS));
 }
 
+/* need to make sure size is all the same during early stage */
+void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
+{
+	void *ptr;
+
+	if (!buf)
+		return vmemmap_alloc_block(size, node);
+
+	/* take the from buf */
+	ptr = (void *)ALIGN((unsigned long)buf, size);
+	if (ptr + size > buf_end)
+		return vmemmap_alloc_block(size, node);
+
+	buf = ptr + size;
+
+	return ptr;
+}
+
 void __meminit vmemmap_verify(pte_t *pte, int node,
 				unsigned long start, unsigned long end)
 {
@@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(p
 	pte_t *pte = pte_offset_kernel(pmd, addr);
 	if (pte_none(*pte)) {
 		pte_t entry;
-		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
 		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -163,3 +183,51 @@ struct page * __meminit sparse_mem_map_p
 
 	return map;
 }
+
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+					  unsigned long pnum_begin,
+					  unsigned long pnum_end,
+					  unsigned long map_count, int nodeid)
+{
+	unsigned long pnum;
+	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+	void *buf_start;
+
+	size = ALIGN(size, PMD_SIZE);
+	buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
+			 PMD_SIZE, __pa(MAX_DMA_ADDRESS));
+
+	if (buf_start) {
+		buf = buf_start;
+		buf_end = buf_start + size * map_count;
+	}
+
+	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+
+		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+		if (map_map[pnum])
+			continue;
+		ms = __nr_to_section(pnum);
+		printk(KERN_ERR "%s: sparsemem memory map backing failed "
+			"some memory will not be available.\n", __func__);
+		ms->section_mem_map = 0;
+	}
+
+	if (buf_start) {
+		/* need to free left buf */
+#ifdef CONFIG_NO_BOOTMEM
+		free_early(__pa(buf_start), __pa(buf_end));
+		if (buf_start < buf)
+			reserve_early_without_check(__pa(buf_start), __pa(buf),
+							 "BOOTMAP");
+#else
+		free_bootmem(__pa(buf), buf_end - buf);
+#endif
+		buf = NULL;
+		buf_end = NULL;
+	}
+}
Index: linux-2.6/mm/sparse.c
===================================================================
--- linux-2.6.orig/mm/sparse.c
+++ linux-2.6/mm/sparse.c
@@ -390,8 +390,65 @@ struct page __init *sparse_mem_map_popul
 		       PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
 	return map;
 }
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+					  unsigned long pnum_begin,
+					  unsigned long pnum_end,
+					  unsigned long map_count, int nodeid)
+{
+	void *map;
+	unsigned long pnum;
+	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+
+	map = alloc_remap(nodeid, size * map_count);
+	if (map) {
+		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+			if (!present_section_nr(pnum))
+				continue;
+			map_map[pnum] = map;
+			map += size;
+		}
+		return;
+	}
+
+	size = PAGE_ALIGN(size);
+	map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count);
+	if (map) {
+		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+			if (!present_section_nr(pnum))
+				continue;
+			map_map[pnum] = map;
+			map += size;
+		}
+		return;
+	}
+
+	/* fallback */
+	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+		if (map_map[pnum])
+			continue;
+		ms = __nr_to_section(pnum);
+		printk(KERN_ERR "%s: sparsemem memory map backing failed "
+			"some memory will not be available.\n", __func__);
+		ms->section_mem_map = 0;
+	}
+}
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
+static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
+				 unsigned long pnum_begin,
+				 unsigned long pnum_end,
+				 unsigned long map_count, int nodeid)
+{
+	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
+					 map_count, nodeid);
+}
+
+#ifndef CONFIG_X86_64
 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 {
 	struct page *map;
@@ -407,6 +464,7 @@ static struct page __init *sparse_early_
 	ms->section_mem_map = 0;
 	return NULL;
 }
+#endif
 
 void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
 {
@@ -420,12 +478,14 @@ void __init sparse_init(void)
 {
 	unsigned long pnum;
 	struct page *map;
+	struct page **map_map;
 	unsigned long *usemap;
 	unsigned long **usemap_map;
-	int size;
+	int size, size2;
 	int nodeid_begin = 0;
 	unsigned long pnum_begin = 0;
 	unsigned long usemap_count;
+	unsigned long map_count;
 
 	/*
 	 * map is using big page (aka 2M in x86 64 bit)
@@ -478,6 +538,48 @@ void __init sparse_init(void)
 	sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
 					 usemap_count, nodeid_begin);
 
+#ifdef CONFIG_X86_64
+	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
+	map_map = alloc_bootmem(size2);
+	if (!map_map)
+		panic("can not allocate map_map\n");
+
+	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+		ms = __nr_to_section(pnum);
+		nodeid_begin = sparse_early_nid(ms);
+		pnum_begin = pnum;
+		break;
+	}
+	map_count = 1;
+	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
+		struct mem_section *ms;
+		int nodeid;
+
+		if (!present_section_nr(pnum))
+			continue;
+		ms = __nr_to_section(pnum);
+		nodeid = sparse_early_nid(ms);
+		if (nodeid == nodeid_begin) {
+			map_count++;
+			continue;
+		}
+		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
+		sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
+						 map_count, nodeid_begin);
+		/* new start, update count etc*/
+		nodeid_begin = nodeid;
+		pnum_begin = pnum;
+		map_count = 1;
+	}
+	/* ok, last chunk */
+	sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
+					 map_count, nodeid_begin);
+#endif
+
 	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -486,7 +588,11 @@ void __init sparse_init(void)
 		if (!usemap)
 			continue;
 
+#ifdef CONFIG_X86_64
+		map = map_map[pnum];
+#else
 		map = sparse_early_mem_map_alloc(pnum);
+#endif
 		if (!map)
 			continue;
 
@@ -496,6 +602,9 @@ void __init sparse_init(void)
 
 	vmemmap_populate_print_last();
 
+#ifdef CONFIG_X86_64
+	free_bootmem(__pa(map_map), size2);
+#endif
 	free_bootmem(__pa(usemap_map), size);
 }
 

  parent reply	other threads:[~2009-12-20  9:22 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <4B2B4C19.6010402@kernel.org>
2009-12-18  9:46 ` [PATCH 1/9] x86: move range related operation to one file Yinghai Lu
2009-12-18 20:10   ` H. Peter Anvin
2009-12-18 20:17     ` Yinghai Lu
2009-12-18 21:26       ` H. Peter Anvin
2009-12-18 23:47         ` Yinghai Lu
2009-12-19  0:25           ` H. Peter Anvin
2009-12-19  0:27             ` Yinghai Lu
2009-12-19  0:34               ` H. Peter Anvin
2009-12-18  9:46 ` [PATCH 2/9] x86: check range in update range Yinghai Lu
2009-12-18 17:23   ` Jesse Barnes
2009-12-18 19:39     ` Yinghai Lu
2009-12-18  9:47 ` [PATCH 3/9] x86: call early_res_to_bootmem one time Yinghai Lu
2009-12-18  9:47 ` [PATCH 4/9] x86: introduce max_early_res and early_res_count Yinghai Lu
2009-12-18  9:47 ` [PATCH 5/9] x86: dynamic increase early_res array size -v2 Yinghai Lu
2009-12-18  9:47 ` [PATCH 6/9] x86: print bootmem free before pci_iommu_alloc and free_all_bootmem -v2 Yinghai Lu
2009-12-18  9:48 ` [PATCH 7/9] x86: make early_node_mem get mem > 4g if possible -v2 Yinghai Lu
2009-12-18  9:48 ` [PATCH 8/9] x86: only call dma32_reserve_bootmem 64bit !CONFIG_NUMA Yinghai Lu
2009-12-18  9:48 ` [PATCH 9/9] x86: make 64 bit use early_res instead of bootmem before slab Yinghai Lu
2009-12-20  9:18   ` [PATCH 1/2] sparsemem: put usemap for one node together Yinghai Lu
2009-12-20  9:20   ` Yinghai Lu [this message]
2009-12-28  8:35     ` [PATCH 2/2] sparsemem: put mem map " Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B2DEC5C.8000108@kernel.org \
    --to=yinghai@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux-foundation.org \
    --cc=hpa@zytor.com \
    --cc=jbarnes@virtuousgeek.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.