All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
To: akpm@linux-foundation.org, tglx@linutronix.de, mingo@redhat.com,
	hpa@zytor.com
Cc: tangchen@cn.fujitsu.com, toshi.kani@hp.com,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org, x86@kernel.org,
	guz.fnst@cn.fujitsu.com, zhangyanfei@cn.fujitsu.com
Subject: [PATCH 2/2] x86,mem-hotplug: modify PGD entry when removing memory
Date: Wed, 18 Jun 2014 15:38:37 +0900	[thread overview]
Message-ID: <53A133ED.2090005@jp.fujitsu.com> (raw)
In-Reply-To: <53A132E2.9000605@jp.fujitsu.com>

When hot-adding/removing memory, sync_global_pgds() is called for
synchronizing PGD to PGD entries of all processes MM. But when
hot-removing memory, sync_global_pgds() does not work correctly.

At first, sync_global_pgds() checks whether target PGD is none or not.
And if PGD is none, the PGD is skipped. But when hot-removing memory,
PGD may be none since PGD may be cleared by free_pud_table(). So
when sync_global_pgds() is called after hot-removing memory,
sync_global_pgds() should not skip PGD even if the PGD is none.
And sync_global_pgds() must clear PGD entries of all processes MM.

Currently sync_global_pgds() does not clear PGD entries of all processes
MM when hot-removing memory. So when hot adding memory which is same memory
range as removed memory after hot-removing memory, following call traces
are shown:

kernel BUG at arch/x86/mm/init_64.c:206!
...
 [<ffffffff815e0c80>] kernel_physical_mapping_init+0x1b2/0x1d2
 [<ffffffff815ced94>] init_memory_mapping+0x1d4/0x380
 [<ffffffff8104aebd>] arch_add_memory+0x3d/0xd0
 [<ffffffff815d03d9>] add_memory+0xb9/0x1b0
 [<ffffffff81352415>] acpi_memory_device_add+0x1af/0x28e
 [<ffffffff81325dc4>] acpi_bus_device_attach+0x8c/0xf0
 [<ffffffff813413b9>] acpi_ns_walk_namespace+0xc8/0x17f
 [<ffffffff81325d38>] ? acpi_bus_type_and_status+0xb7/0xb7
 [<ffffffff81325d38>] ? acpi_bus_type_and_status+0xb7/0xb7
 [<ffffffff813418ed>] acpi_walk_namespace+0x95/0xc5
 [<ffffffff81326b4c>] acpi_bus_scan+0x9a/0xc2
 [<ffffffff81326bff>] acpi_scan_bus_device_check+0x8b/0x12e
 [<ffffffff81326cb5>] acpi_scan_device_check+0x13/0x15
 [<ffffffff81320122>] acpi_os_execute_deferred+0x25/0x32
 [<ffffffff8107e02b>] process_one_work+0x17b/0x460
 [<ffffffff8107edfb>] worker_thread+0x11b/0x400
 [<ffffffff8107ece0>] ? rescuer_thread+0x400/0x400
 [<ffffffff81085aef>] kthread+0xcf/0xe0
 [<ffffffff81085a20>] ? kthread_create_on_node+0x140/0x140
 [<ffffffff815fc76c>] ret_from_fork+0x7c/0xb0
 [<ffffffff81085a20>] ? kthread_create_on_node+0x140/0x140

This patch clears PGD entries of all processes MM when sync_global_pgds()
is called after hot-removing memory

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 arch/x86/include/asm/pgtable_64.h |  3 ++-
 arch/x86/mm/fault.c               |  2 +-
 arch/x86/mm/init_64.c             | 27 +++++++++++++++++++--------
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 5be9063..809abb3 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -115,7 +115,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
 	native_set_pgd(pgd, native_make_pgd(0));
 }

-extern void sync_global_pgds(unsigned long start, unsigned long end);
+extern void sync_global_pgds(unsigned long start, unsigned long end,
+			     int removed);

 /*
  * Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3664279..0193a32 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -350,7 +350,7 @@ out:

 void vmalloc_sync_all(void)
 {
-	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
+	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
 }

 /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a5b245d..8f68032 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup);
  * When memory was added/removed make sure all the processes MM have
  * suitable PGD entries in the local PGD level page.
  */
-void sync_global_pgds(unsigned long start, unsigned long end)
+void sync_global_pgds(unsigned long start, unsigned long end, int removed)
 {
 	unsigned long address;

@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		const pgd_t *pgd_ref = pgd_offset_k(address);
 		struct page *page;

-		if (pgd_none(*pgd_ref))
+		/*
+		 * When it is called after memory hot remove, pgd_none()
+		 * returns true. In this case (removed == 1), we must clear
+		 * the PGD entries in the local PGD level page.
+		 */
+		if (pgd_none(*pgd_ref) && !removed)
 			continue;

 		spin_lock(&pgd_lock);
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);

-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-			else
+			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
 				BUG_ON(pgd_page_vaddr(*pgd)
 				       != pgd_page_vaddr(*pgd_ref));

+			if (removed) {
+				if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
+					pgd_clear(pgd);
+			} else {
+				if (pgd_none(*pgd))
+					set_pgd(pgd, *pgd_ref);
+			}
+
 			spin_unlock(pgt_lock);
 		}
 		spin_unlock(&pgd_lock);
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start,
 	}

 	if (pgd_changed)
-		sync_global_pgds(addr, end - 1);
+		sync_global_pgds(addr, end - 1, 0);

 	__flush_tlb_all();

@@ -994,7 +1005,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 	}

 	if (pgd_changed)
-		sync_global_pgds(start, end - 1);
+		sync_global_pgds(start, end - 1, 1);

 	flush_tlb_all();
 }
@@ -1341,7 +1352,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 	else
 		err = vmemmap_populate_basepages(start, end, node);
 	if (!err)
-		sync_global_pgds(start, end - 1);
+		sync_global_pgds(start, end - 1, 0);
 	return err;
 }


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
To: <akpm@linux-foundation.org>, <tglx@linutronix.de>,
	<mingo@redhat.com>, <hpa@zytor.com>
Cc: <tangchen@cn.fujitsu.com>, <toshi.kani@hp.com>,
	<linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>,
	<x86@kernel.org>, <guz.fnst@cn.fujitsu.com>,
	<zhangyanfei@cn.fujitsu.com>
Subject: [PATCH 2/2] x86,mem-hotplug: modify PGD entry when removing memory
Date: Wed, 18 Jun 2014 15:38:37 +0900	[thread overview]
Message-ID: <53A133ED.2090005@jp.fujitsu.com> (raw)
In-Reply-To: <53A132E2.9000605@jp.fujitsu.com>

When hot-adding/removing memory, sync_global_pgds() is called for
synchronizing PGD to PGD entries of all processes MM. But when
hot-removing memory, sync_global_pgds() does not work correctly.

At first, sync_global_pgds() checks whether target PGD is none or not.
And if PGD is none, the PGD is skipped. But when hot-removing memory,
PGD may be none since PGD may be cleared by free_pud_table(). So
when sync_global_pgds() is called after hot-removing memory,
sync_global_pgds() should not skip PGD even if the PGD is none.
And sync_global_pgds() must clear PGD entries of all processes MM.

Currently sync_global_pgds() does not clear PGD entries of all processes
MM when hot-removing memory. So when hot adding memory which is same memory
range as removed memory after hot-removing memory, following call traces
are shown:

kernel BUG at arch/x86/mm/init_64.c:206!
...
 [<ffffffff815e0c80>] kernel_physical_mapping_init+0x1b2/0x1d2
 [<ffffffff815ced94>] init_memory_mapping+0x1d4/0x380
 [<ffffffff8104aebd>] arch_add_memory+0x3d/0xd0
 [<ffffffff815d03d9>] add_memory+0xb9/0x1b0
 [<ffffffff81352415>] acpi_memory_device_add+0x1af/0x28e
 [<ffffffff81325dc4>] acpi_bus_device_attach+0x8c/0xf0
 [<ffffffff813413b9>] acpi_ns_walk_namespace+0xc8/0x17f
 [<ffffffff81325d38>] ? acpi_bus_type_and_status+0xb7/0xb7
 [<ffffffff81325d38>] ? acpi_bus_type_and_status+0xb7/0xb7
 [<ffffffff813418ed>] acpi_walk_namespace+0x95/0xc5
 [<ffffffff81326b4c>] acpi_bus_scan+0x9a/0xc2
 [<ffffffff81326bff>] acpi_scan_bus_device_check+0x8b/0x12e
 [<ffffffff81326cb5>] acpi_scan_device_check+0x13/0x15
 [<ffffffff81320122>] acpi_os_execute_deferred+0x25/0x32
 [<ffffffff8107e02b>] process_one_work+0x17b/0x460
 [<ffffffff8107edfb>] worker_thread+0x11b/0x400
 [<ffffffff8107ece0>] ? rescuer_thread+0x400/0x400
 [<ffffffff81085aef>] kthread+0xcf/0xe0
 [<ffffffff81085a20>] ? kthread_create_on_node+0x140/0x140
 [<ffffffff815fc76c>] ret_from_fork+0x7c/0xb0
 [<ffffffff81085a20>] ? kthread_create_on_node+0x140/0x140

This patch clears PGD entries of all processes MM when sync_global_pgds()
is called after hot-removing memory

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 arch/x86/include/asm/pgtable_64.h |  3 ++-
 arch/x86/mm/fault.c               |  2 +-
 arch/x86/mm/init_64.c             | 27 +++++++++++++++++++--------
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 5be9063..809abb3 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -115,7 +115,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
 	native_set_pgd(pgd, native_make_pgd(0));
 }

-extern void sync_global_pgds(unsigned long start, unsigned long end);
+extern void sync_global_pgds(unsigned long start, unsigned long end,
+			     int removed);

 /*
  * Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3664279..0193a32 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -350,7 +350,7 @@ out:

 void vmalloc_sync_all(void)
 {
-	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
+	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
 }

 /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a5b245d..8f68032 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup);
  * When memory was added/removed make sure all the processes MM have
  * suitable PGD entries in the local PGD level page.
  */
-void sync_global_pgds(unsigned long start, unsigned long end)
+void sync_global_pgds(unsigned long start, unsigned long end, int removed)
 {
 	unsigned long address;

@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		const pgd_t *pgd_ref = pgd_offset_k(address);
 		struct page *page;

-		if (pgd_none(*pgd_ref))
+		/*
+		 * When it is called after memory hot remove, pgd_none()
+		 * returns true. In this case (removed == 1), we must clear
+		 * the PGD entries in the local PGD level page.
+		 */
+		if (pgd_none(*pgd_ref) && !removed)
 			continue;

 		spin_lock(&pgd_lock);
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);

-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-			else
+			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
 				BUG_ON(pgd_page_vaddr(*pgd)
 				       != pgd_page_vaddr(*pgd_ref));

+			if (removed) {
+				if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
+					pgd_clear(pgd);
+			} else {
+				if (pgd_none(*pgd))
+					set_pgd(pgd, *pgd_ref);
+			}
+
 			spin_unlock(pgt_lock);
 		}
 		spin_unlock(&pgd_lock);
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start,
 	}

 	if (pgd_changed)
-		sync_global_pgds(addr, end - 1);
+		sync_global_pgds(addr, end - 1, 0);

 	__flush_tlb_all();

@@ -994,7 +1005,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 	}

 	if (pgd_changed)
-		sync_global_pgds(start, end - 1);
+		sync_global_pgds(start, end - 1, 1);

 	flush_tlb_all();
 }
@@ -1341,7 +1352,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 	else
 		err = vmemmap_populate_basepages(start, end, node);
 	if (!err)
-		sync_global_pgds(start, end - 1);
+		sync_global_pgds(start, end - 1, 0);
 	return err;
 }



  parent reply	other threads:[~2014-06-18  6:39 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-18  6:34 [PATCH 0/2] fix kernel panic on memory hotplug Yasuaki Ishimatsu
2014-06-18  6:34 ` Yasuaki Ishimatsu
2014-06-18  6:37 ` [PATCH 1/2] x86,mem-hotplug: pass sync_global_pgds() a correct argument in remove_pagetable() Yasuaki Ishimatsu
2014-06-18  6:37   ` Yasuaki Ishimatsu
2014-06-20 18:27   ` Toshi Kani
2014-06-20 18:27     ` Toshi Kani
2014-06-24  0:33     ` Yasuaki Ishimatsu
2014-06-24  0:33       ` Yasuaki Ishimatsu
2014-06-18  6:38 ` Yasuaki Ishimatsu [this message]
2014-06-18  6:38   ` [PATCH 2/2] x86,mem-hotplug: modify PGD entry when removing memory Yasuaki Ishimatsu
2014-06-20 18:30   ` Toshi Kani
2014-06-20 18:30     ` Toshi Kani
2014-06-24  0:31     ` Yasuaki Ishimatsu
2014-06-24  0:31       ` Yasuaki Ishimatsu
2014-06-24 15:12       ` Toshi Kani
2014-06-24 15:12         ` Toshi Kani
2014-06-24 23:29         ` Yasuaki Ishimatsu
2014-06-24 23:29           ` Yasuaki Ishimatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53A133ED.2090005@jp.fujitsu.com \
    --to=isimatu.yasuaki@jp.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=guz.fnst@cn.fujitsu.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@redhat.com \
    --cc=tangchen@cn.fujitsu.com \
    --cc=tglx@linutronix.de \
    --cc=toshi.kani@hp.com \
    --cc=x86@kernel.org \
    --cc=zhangyanfei@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.