From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
npiggin@gmail.com, kaleshsingh@google.com,
joel@joelfernandes.org, linuxppc-dev@lists.ozlabs.org
Subject: [PATCH v6 03/11] mm/mremap: Convert huge PUD move to separate helper
Date: Mon, 24 May 2021 14:31:06 +0530 [thread overview]
Message-ID: <20210524090114.63446-4-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20210524090114.63446-1-aneesh.kumar@linux.ibm.com>
With TRANSPARENT_HUGEPAGE_PUD enabled the kernel can find huge PUD entries.
Add a helper to move huge PUD entries on mremap().
This will be used by a later patch to optimize mremap of PUD_SIZE aligned
level 4 PTE mapped address
This also make sure we support mremap on huge PUD entries even with
CONFIG_HAVE_MOVE_PUD disabled.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
mm/mremap.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 73 insertions(+), 7 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index ec8f840399ed..1d6fadbd4820 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -324,10 +324,62 @@ static inline bool move_normal_pud(struct vm_area_struct *vma,
}
#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PUD
+static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
+{
+ spinlock_t *old_ptl, *new_ptl;
+ struct mm_struct *mm = vma->vm_mm;
+ pud_t pud;
+
+ /*
+ * The destination pud shouldn't be established, free_pgtables()
+ * should have released it.
+ */
+ if (WARN_ON_ONCE(!pud_none(*new_pud)))
+ return false;
+
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * ptlocks because exclusive mmap_lock prevents deadlock.
+ */
+ old_ptl = pud_lock(vma->vm_mm, old_pud);
+ new_ptl = pud_lockptr(mm, new_pud);
+ if (new_ptl != old_ptl)
+ spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+ /* Clear the pud */
+ pud = *old_pud;
+ pud_clear(old_pud);
+
+ VM_BUG_ON(!pud_none(*new_pud));
+
+ /* Set the new pud */
+ /* mark soft_ditry when we add pud level soft dirty support */
+ set_pud_at(mm, new_addr, new_pud, pud);
+ flush_pud_tlb_range(vma, old_addr, old_addr + HPAGE_PUD_SIZE);
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
+ spin_unlock(old_ptl);
+
+ return true;
+}
+#else
+static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
+{
+ WARN_ON_ONCE(1);
+ return false;
+
+}
+#endif
+
enum pgt_entry {
NORMAL_PMD,
HPAGE_PMD,
NORMAL_PUD,
+ HPAGE_PUD,
};
/*
@@ -347,6 +399,7 @@ static __always_inline unsigned long get_extent(enum pgt_entry entry,
mask = PMD_MASK;
size = PMD_SIZE;
break;
+ case HPAGE_PUD:
case NORMAL_PUD:
mask = PUD_MASK;
size = PUD_SIZE;
@@ -395,6 +448,11 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
move_huge_pmd(vma, old_addr, new_addr, old_entry,
new_entry);
break;
+ case HPAGE_PUD:
+ moved = move_huge_pud(vma, old_addr, new_addr, old_entry,
+ new_entry);
+ break;
+
default:
WARN_ON_ONCE(1);
break;
@@ -414,6 +472,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long extent, old_end;
struct mmu_notifier_range range;
pmd_t *old_pmd, *new_pmd;
+ pud_t *old_pud, *new_pud;
old_end = old_addr + len;
flush_cache_range(vma, old_addr, old_end);
@@ -429,15 +488,22 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
* PUD level if possible.
*/
extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr);
- if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
- pud_t *old_pud, *new_pud;
- old_pud = get_old_pud(vma->vm_mm, old_addr);
- if (!old_pud)
+ old_pud = get_old_pud(vma->vm_mm, old_addr);
+ if (!old_pud)
+ continue;
+ new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
+ if (!new_pud)
+ break;
+ if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) {
+ if (extent == HPAGE_PUD_SIZE) {
+ move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr,
+ old_pud, new_pud, need_rmap_locks);
+ /* We ignore and continue on error? */
continue;
- new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
- if (!new_pud)
- break;
+ }
+ } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
+
if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr,
old_pud, new_pud, need_rmap_locks))
continue;
--
2.31.1
WARNING: multiple messages have this Message-ID (diff)
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org
Cc: mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org,
kaleshsingh@google.com, npiggin@gmail.com,
joel@joelfernandes.org,
Christophe Leroy <christophe.leroy@csgroup.eu>,
Linus Torvalds <torvalds@linux-foundation.org>,
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Subject: [PATCH v6 03/11] mm/mremap: Convert huge PUD move to separate helper
Date: Mon, 24 May 2021 14:31:06 +0530 [thread overview]
Message-ID: <20210524090114.63446-4-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20210524090114.63446-1-aneesh.kumar@linux.ibm.com>
With TRANSPARENT_HUGEPAGE_PUD enabled the kernel can find huge PUD entries.
Add a helper to move huge PUD entries on mremap().
This will be used by a later patch to optimize mremap of PUD_SIZE aligned
level 4 PTE mapped address
This also make sure we support mremap on huge PUD entries even with
CONFIG_HAVE_MOVE_PUD disabled.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
mm/mremap.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 73 insertions(+), 7 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index ec8f840399ed..1d6fadbd4820 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -324,10 +324,62 @@ static inline bool move_normal_pud(struct vm_area_struct *vma,
}
#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PUD
+static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
+{
+ spinlock_t *old_ptl, *new_ptl;
+ struct mm_struct *mm = vma->vm_mm;
+ pud_t pud;
+
+ /*
+ * The destination pud shouldn't be established, free_pgtables()
+ * should have released it.
+ */
+ if (WARN_ON_ONCE(!pud_none(*new_pud)))
+ return false;
+
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * ptlocks because exclusive mmap_lock prevents deadlock.
+ */
+ old_ptl = pud_lock(vma->vm_mm, old_pud);
+ new_ptl = pud_lockptr(mm, new_pud);
+ if (new_ptl != old_ptl)
+ spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+ /* Clear the pud */
+ pud = *old_pud;
+ pud_clear(old_pud);
+
+ VM_BUG_ON(!pud_none(*new_pud));
+
+ /* Set the new pud */
+ /* mark soft_ditry when we add pud level soft dirty support */
+ set_pud_at(mm, new_addr, new_pud, pud);
+ flush_pud_tlb_range(vma, old_addr, old_addr + HPAGE_PUD_SIZE);
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
+ spin_unlock(old_ptl);
+
+ return true;
+}
+#else
+static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
+{
+ WARN_ON_ONCE(1);
+ return false;
+
+}
+#endif
+
enum pgt_entry {
NORMAL_PMD,
HPAGE_PMD,
NORMAL_PUD,
+ HPAGE_PUD,
};
/*
@@ -347,6 +399,7 @@ static __always_inline unsigned long get_extent(enum pgt_entry entry,
mask = PMD_MASK;
size = PMD_SIZE;
break;
+ case HPAGE_PUD:
case NORMAL_PUD:
mask = PUD_MASK;
size = PUD_SIZE;
@@ -395,6 +448,11 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
move_huge_pmd(vma, old_addr, new_addr, old_entry,
new_entry);
break;
+ case HPAGE_PUD:
+ moved = move_huge_pud(vma, old_addr, new_addr, old_entry,
+ new_entry);
+ break;
+
default:
WARN_ON_ONCE(1);
break;
@@ -414,6 +472,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long extent, old_end;
struct mmu_notifier_range range;
pmd_t *old_pmd, *new_pmd;
+ pud_t *old_pud, *new_pud;
old_end = old_addr + len;
flush_cache_range(vma, old_addr, old_end);
@@ -429,15 +488,22 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
* PUD level if possible.
*/
extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr);
- if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
- pud_t *old_pud, *new_pud;
- old_pud = get_old_pud(vma->vm_mm, old_addr);
- if (!old_pud)
+ old_pud = get_old_pud(vma->vm_mm, old_addr);
+ if (!old_pud)
+ continue;
+ new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
+ if (!new_pud)
+ break;
+ if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) {
+ if (extent == HPAGE_PUD_SIZE) {
+ move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr,
+ old_pud, new_pud, need_rmap_locks);
+ /* We ignore and continue on error? */
continue;
- new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
- if (!new_pud)
- break;
+ }
+ } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
+
if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr,
old_pud, new_pud, need_rmap_locks))
continue;
--
2.31.1
next prev parent reply other threads:[~2021-05-24 9:02 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-24 9:01 [PATCH v6 00/11] Speedup mremap on ppc64 Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 01/11] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 02/11] selftest/mremap_test: Avoid crash with static build Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V [this message]
2021-05-24 9:01 ` [PATCH v6 03/11] mm/mremap: Convert huge PUD move to separate helper Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 04/11] mm/mremap: Use pmd/pud_poplulate to update page table entries Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 05/11] powerpc/mm/book3s64: Fix possible build error Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 06/11] powerpc/mm/book3s64: Update tlb flush routines to take a page walk cache flush argument Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 07/11] mm/mremap: Use range flush that does TLB and page walk cache flush Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 17:02 ` Linus Torvalds
2021-05-24 17:02 ` Linus Torvalds
2021-05-25 13:27 ` Aneesh Kumar K.V
2021-05-25 13:27 ` Aneesh Kumar K.V
2021-05-25 17:08 ` Linus Torvalds
2021-05-25 17:08 ` Linus Torvalds
2021-05-24 9:01 ` [PATCH v6 08/11] mm/mremap: properly flush the TLB on mremap Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 09/11] mm/mremap: Fix race between mremap and pageout Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 13:38 ` [PATCH v6 updated 9/11] " Aneesh Kumar K.V
2021-05-24 13:38 ` Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 10/11] mm/mremap: Allow arch runtime override Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
2021-05-24 9:01 ` [PATCH v6 11/11] powerpc/mm: Enable HAVE_MOVE_PMD support Aneesh Kumar K.V
2021-05-24 9:01 ` Aneesh Kumar K.V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210524090114.63446-4-aneesh.kumar@linux.ibm.com \
--to=aneesh.kumar@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=joel@joelfernandes.org \
--cc=kaleshsingh@google.com \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=npiggin@gmail.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.