From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linuxppc-dev@lists.ozlabs.org, mpe@ellerman.id.au
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>,
Bharata B Rao <bharata@linux.ibm.com>
Subject: [PATCH v3 3/4] powerpc/mm/radix: Remove split_kernel_mapping()
Date: Thu, 9 Jul 2020 18:49:24 +0530 [thread overview]
Message-ID: <20200709131925.922266-4-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20200709131925.922266-1-aneesh.kumar@linux.ibm.com>
From: Bharata B Rao <bharata@linux.ibm.com>
We split the page table mapping on memory unplug if the
linear range was mapped with huge page mapping (for ex: 1G)
The page table splitting code has a few issues:
1. Recursive locking
--------------------
Memory unplug path takes cpu_hotplug_lock and calls stop_machine()
for splitting the mappings. However stop_machine() takes
cpu_hotplug_lock again causing deadlock.
2. BUG: sleeping function called from in_atomic() context
---------------------------------------------------------
Memory unplug path (remove_pagetable) takes init_mm.page_table_lock
spinlock and later calls stop_machine() which does wait_for_completion()
3. Bad unlock unbalance
-----------------------
Memory unplug path takes init_mm.page_table_lock spinlock and calls
stop_machine(). The stop_machine thread function runs in a different
thread context (migration thread) which tries to release and reaquire
ptl. Releasing ptl from a different thread than which acquired it
causes bad unlock unbalance.
These problems can be avoided if we avoid mapping hot-plugged memory
with 1G mapping, thereby removing the need for splitting them during
unplug. The kernel always make sure the minimum unplug request is
SUBSECTION_SIZE for device memory and SECTION_SIZE for regular memory.
In preparation for such a change remove page table splitting support.
This essentially is a revert of
commit 4dd5f8a99e791 ("powerpc/mm/radix: Split linear mapping on hot-unplug")
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/mm/book3s64/radix_pgtable.c | 95 +++++-------------------
1 file changed, 19 insertions(+), 76 deletions(-)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 46ad2da3087a..d5a01b9aadc9 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -15,7 +15,6 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/string_helpers.h>
-#include <linux/stop_machine.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
@@ -722,32 +721,6 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
p4d_clear(p4d);
}
-struct change_mapping_params {
- pte_t *pte;
- unsigned long start;
- unsigned long end;
- unsigned long aligned_start;
- unsigned long aligned_end;
-};
-
-static int __meminit stop_machine_change_mapping(void *data)
-{
- struct change_mapping_params *params =
- (struct change_mapping_params *)data;
-
- if (!data)
- return -1;
-
- spin_unlock(&init_mm.page_table_lock);
- pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(__pa(params->aligned_start),
- __pa(params->start), -1, PAGE_KERNEL);
- create_physical_mapping(__pa(params->end), __pa(params->aligned_end),
- -1, PAGE_KERNEL);
- spin_lock(&init_mm.page_table_lock);
- return 0;
-}
-
static void remove_pte_table(pte_t *pte_start, unsigned long addr,
unsigned long end)
{
@@ -776,52 +749,6 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
}
}
-/*
- * clear the pte and potentially split the mapping helper
- */
-static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
- unsigned long size, pte_t *pte)
-{
- unsigned long mask = ~(size - 1);
- unsigned long aligned_start = addr & mask;
- unsigned long aligned_end = addr + size;
- struct change_mapping_params params;
- bool split_region = false;
-
- if ((end - addr) < size) {
- /*
- * We're going to clear the PTE, but not flushed
- * the mapping, time to remap and flush. The
- * effects if visible outside the processor or
- * if we are running in code close to the
- * mapping we cleared, we are in trouble.
- */
- if (overlaps_kernel_text(aligned_start, addr) ||
- overlaps_kernel_text(end, aligned_end)) {
- /*
- * Hack, just return, don't pte_clear
- */
- WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
- "text, not splitting\n", addr, end);
- return;
- }
- split_region = true;
- }
-
- if (split_region) {
- params.pte = pte;
- params.start = addr;
- params.end = end;
- params.aligned_start = addr & ~(size - 1);
- params.aligned_end = min_t(unsigned long, aligned_end,
- (unsigned long)__va(memblock_end_of_DRAM()));
- stop_machine(stop_machine_change_mapping, ¶ms, NULL);
- return;
- }
-
- pte_clear(&init_mm, addr, pte);
-}
-
static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
unsigned long end)
{
@@ -837,7 +764,12 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_is_leaf(*pmd)) {
- split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
continue;
}
@@ -862,7 +794,12 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
if (pud_is_leaf(*pud)) {
- split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
+ if (!IS_ALIGNED(addr, PUD_SIZE) ||
+ !IS_ALIGNED(next, PUD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pud);
continue;
}
@@ -890,7 +827,13 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
continue;
if (p4d_is_leaf(*p4d)) {
- split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d);
+ if (!IS_ALIGNED(addr, P4D_SIZE) ||
+ !IS_ALIGNED(next, P4D_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+
+ pte_clear(&init_mm, addr, (pte_t *)pgd);
continue;
}
--
2.26.2
next prev parent reply other threads:[~2020-07-09 13:30 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-09 13:19 [PATCH v3 0/4] powerpc/mm/radix: Memory unplug fixes Aneesh Kumar K.V
2020-07-09 13:19 ` [PATCH v3 1/4] powerpc/mm/radix: Fix PTE/PMD fragment count for early page table mappings Aneesh Kumar K.V
2020-07-09 13:19 ` [PATCH v3 2/4] powerpc/mm/radix: Free PUD table when freeing pagetable Aneesh Kumar K.V
2020-07-09 13:19 ` Aneesh Kumar K.V [this message]
2020-07-09 13:19 ` [PATCH v3 4/4] powerpc/mm/radix: Create separate mappings for hot-plugged memory Aneesh Kumar K.V
2020-07-16 14:00 ` [PATCH v3 0/4] powerpc/mm/radix: Memory unplug fixes Nathan Lynch
2020-07-21 1:45 ` Michael Ellerman
2020-07-21 3:29 ` Bharata B Rao
2020-07-21 12:25 ` Michael Ellerman
2020-07-22 6:05 ` Bharata B Rao
2020-07-22 7:51 ` David Gibson
2020-07-24 11:52 ` Michael Ellerman
2020-07-24 12:17 ` Bharata B Rao
2020-07-25 7:37 ` David Gibson
2020-07-21 4:42 ` Aneesh Kumar K.V
2020-07-24 13:24 ` Michael Ellerman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200709131925.922266-4-aneesh.kumar@linux.ibm.com \
--to=aneesh.kumar@linux.ibm.com \
--cc=bharata@linux.ibm.com \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mpe@ellerman.id.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.