linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: x86@kernel.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org, linux-acpi@vger.kernel.org,
	linux-s390@vger.kernel.org, linux-sh@vger.kernel.org,
	linux-ia64@vger.kernel.org, cmetcalf@tilera.com,
	sparclinux@vger.kernel.org
Cc: Len Brown <len.brown@intel.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Jianguo Wu <wujianguo@huawei.com>,
	Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>,
	paulus@samba.org, Minchan Kim <minchan.kim@gmail.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	David Rientjes <rientjes@google.com>,
	Christoph Lameter <cl@linux.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Jiang Liu <liuj97@gmail.com>
Subject: [PATCH v3 08/12] memory-hotplug: remove memmap of sparse-vmemmap
Date: Thu, 1 Nov 2012 17:44:39 +0800	[thread overview]
Message-ID: <1351763083-7905-9-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1351763083-7905-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

All pages of virtual mapping in removed memory cannot be freed, since some pages
used as PGD/PUD includes not only removed memory but also other memory. So the
patch checks whether page can be freed or not.

How to check whether page can be freed or not?
 1. When removing memory, the page structs of the revmoved memory are filled
    with 0FD.
 2. All page structs are filled with 0xFD on PT/PMD, PT/PMD can be cleared.
    In this case, the page used as PT/PMD can be freed.

Applying patch, __remove_section() of CONFIG_SPARSEMEM_VMEMMAP is integrated
into one. So __remove_section() of CONFIG_SPARSEMEM_VMEMMAP is deleted.

Note:  vmemmap_kfree() and vmemmap_free_bootmem() are not implemented for ia64,
ppc, s390, and sparc.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 arch/ia64/mm/discontig.c  |   8 ++++
 arch/powerpc/mm/init_64.c |   8 ++++
 arch/s390/mm/vmem.c       |   8 ++++
 arch/sparc/mm/init_64.c   |   8 ++++
 arch/x86/mm/init_64.c     | 119 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h        |   2 +
 mm/memory_hotplug.c       |  17 +------
 mm/sparse.c               |   5 +-
 8 files changed, 158 insertions(+), 17 deletions(-)

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 33943db..0d23b69 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -823,6 +823,14 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return vmemmap_populate_basepages(start_page, size, node);
 }
 
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 6466440..df7d155 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -298,6 +298,14 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return 0;
 }
 
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 4f4803a..ab69c34 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -236,6 +236,14 @@ out:
 	return ret;
 }
 
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 75a984b..546855d 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2232,6 +2232,14 @@ void __meminit vmemmap_populate_print_last(void)
 	}
 }
 
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 795dae3..e85626d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -998,6 +998,125 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 	return 0;
 }
 
+#define PAGE_INUSE 0xFD
+
+unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
+			    struct page **pp, int *page_size)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+	void *page_addr;
+	unsigned long next;
+
+	*pp = NULL;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd))
+		return pgd_addr_end(addr, end);
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud))
+		return pud_addr_end(addr, end);
+
+	if (!cpu_has_pse) {
+		next = (addr + PAGE_SIZE) & PAGE_MASK;
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd))
+			return next;
+
+		pte = pte_offset_kernel(pmd, addr);
+		if (pte_none(*pte))
+			return next;
+
+		*page_size = PAGE_SIZE;
+		*pp = pte_page(*pte);
+	} else {
+		next = pmd_addr_end(addr, end);
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd))
+			return next;
+
+		*page_size = PMD_SIZE;
+		*pp = pmd_page(*pmd);
+	}
+
+	/*
+	 * Removed page structs are filled with 0xFD.
+	 */
+	memset((void *)addr, PAGE_INUSE, next - addr);
+
+	page_addr = page_address(*pp);
+
+	/*
+	 * Check the page is filled with 0xFD or not.
+	 * memchr_inv() returns the address. In this case, we cannot
+	 * clear PTE/PUD entry, since the page is used by other.
+	 * So we cannot also free the page.
+	 *
+	 * memchr_inv() returns NULL. In this case, we can clear
+	 * PTE/PUD entry, since the page is not used by other.
+	 * So we can also free the page.
+	 */
+	if (memchr_inv(page_addr, PAGE_INUSE, *page_size)) {
+		*pp = NULL;
+		return next;
+	}
+
+	if (!cpu_has_pse)
+		pte_clear(&init_mm, addr, pte);
+	else
+		pmd_clear(pmd);
+
+	return next;
+}
+
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+	unsigned long addr = (unsigned long)memmap;
+	unsigned long end = (unsigned long)(memmap + nr_pages);
+	unsigned long next;
+	struct page *page;
+	int page_size;
+
+	for (; addr < end; addr = next) {
+		page = NULL;
+		page_size = 0;
+		next = find_and_clear_pte_page(addr, end, &page, &page_size);
+		if (!page)
+			continue;
+
+		free_pages((unsigned long)page_address(page),
+			    get_order(page_size));
+		__flush_tlb_one(addr);
+	}
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+	unsigned long addr = (unsigned long)memmap;
+	unsigned long end = (unsigned long)(memmap + nr_pages);
+	unsigned long next;
+	struct page *page;
+	int page_size;
+	unsigned long magic;
+
+	for (; addr < end; addr = next) {
+		page = NULL;
+		page_size = 0;
+		next = find_and_clear_pte_page(addr, end, &page, &page_size);
+		if (!page)
+			continue;
+
+		magic = (unsigned long) page->lru.next;
+		if (magic == SECTION_INFO)
+			put_page_bootmem(page);
+		flush_tlb_kernel_range(addr, end);
+	}
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e5a56f..42b8723 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,8 @@ int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
 				  unsigned long size);
+void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
+void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);
 
 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index db9806c..03153cf 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -312,19 +312,6 @@ static int __meminit __add_section(int nid, struct zone *zone,
 	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static int __remove_section(struct zone *zone, struct mem_section *ms)
-{
-	int ret = -EINVAL;
-
-	if (!valid_section(ms))
-		return ret;
-
-	ret = unregister_memory_section(ms);
-
-	return ret;
-}
-#else
 static int __remove_section(struct zone *zone, struct mem_section *ms)
 {
 	unsigned long flags;
@@ -341,9 +328,9 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
 	pgdat_resize_lock(pgdat, &flags);
 	sparse_remove_one_section(zone, ms);
 	pgdat_resize_unlock(pgdat, &flags);
-	return 0;
+
+	return ret;
 }
-#endif
 
 /*
  * Reasonably generic function for adding memory.  It is
diff --git a/mm/sparse.c b/mm/sparse.c
index fac95f2..ab9d755 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -613,12 +613,13 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 	/* This will make the necessary allocations eventually. */
 	return sparse_mem_map_populate(pnum, nid);
 }
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *page, unsigned long nr_pages)
 {
-	return; /* XXX: Not implemented yet */
+	vmemmap_kfree(page, nr_pages);
 }
 static void free_map_bootmem(struct page *page, unsigned long nr_pages)
 {
+	vmemmap_free_bootmem(page, nr_pages);
 }
 #else
 static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
-- 
1.8.0

  parent reply	other threads:[~2012-11-01  9:39 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-01  9:44 [PATCH v3 00/12] memory-hotplug: hot-remove physical memory Wen Congyang
2012-11-01  9:44 ` [PATCH v3 01/12] memory-hotplug: try to offline the memory twice to avoid dependence Wen Congyang
2012-11-01  9:44 ` [PATCH v3 02/12] memory-hotplug: check whether all memory blocks are offlined or not when removing memory Wen Congyang
2012-11-01  9:44 ` [PATCH v3 03/12] memory-hotplug: remove redundant codes Wen Congyang
2012-11-01  9:44 ` [PATCH v3 04/12] memory-hotplug: remove /sys/firmware/memmap/X sysfs Wen Congyang
2012-11-01  9:44 ` [PATCH v3 05/12] memory-hotplug: introduce new function arch_remove_memory() for removing page table depends on architecture Wen Congyang
2012-11-01  9:44 ` [PATCH v3 06/12] memory-hotplug: unregister memory section on SPARSEMEM_VMEMMAP Wen Congyang
2012-11-20  6:22   ` Jaegeuk Hanse
2012-11-20  6:55     ` Wen Congyang
2012-11-20  6:58       ` Jaegeuk Hanse
2012-11-20  9:37         ` Wen Congyang
2012-11-20 11:03           ` Jaegeuk Hanse
2012-11-20 11:16   ` Jaegeuk Hanse
2012-11-21  3:05     ` Wen Congyang
2012-11-21  4:22       ` Jaegeuk Hanse
2012-11-21  4:42         ` Wen Congyang
2012-11-21  5:03           ` Jaegeuk Hanse
2012-11-21  5:12             ` Wen Congyang
2012-11-21  5:28             ` Wen Congyang
2012-11-01  9:44 ` [PATCH v3 07/12] memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap Wen Congyang
2012-11-01  9:44 ` Wen Congyang [this message]
2012-11-27  5:47   ` [PATCH v3 08/12] memory-hotplug: remove memmap " Jianguo Wu
2012-11-27  6:39     ` Wen Congyang
2012-11-27  6:49     ` Wen Congyang
2012-11-27  7:14       ` Jianguo Wu
2012-11-01  9:44 ` [PATCH v3 09/12] memory-hotplug: remove page table of x86_64 architecture Wen Congyang
2012-11-01  9:44 ` [PATCH v3 10/12] memory-hotplug: memory_hotplug: clear zone when removing the memory Wen Congyang
2012-11-01  9:44 ` [PATCH v3 11/12] memory-hotplug: remove sysfs file of node Wen Congyang
2012-11-19 10:08   ` Yasuaki Ishimatsu
2012-11-20  9:35     ` Wen Congyang
2012-11-26 14:27   ` Jianguo Wu
2012-11-27  2:24     ` Wen Congyang
2012-11-01  9:44 ` [PATCH v3 12/12] memory-hotplug: free node_data when a node is offlined Wen Congyang
2012-11-16 11:55 ` [PATCH v3 00/12] memory-hotplug: hot-remove physical memory Wen Congyang
2012-11-22  1:33 ` Yasuaki Ishimatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1351763083-7905-9-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=cmetcalf@tilera.com \
    --cc=isimatu.yasuaki@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=len.brown@intel.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-ia64@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linux-sh@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=liuj97@gmail.com \
    --cc=minchan.kim@gmail.com \
    --cc=paulus@samba.org \
    --cc=rientjes@google.com \
    --cc=sparclinux@vger.kernel.org \
    --cc=wujianguo@huawei.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).