From: James Houghton <jthoughton@google.com>
To: Mike Kravetz <mike.kravetz@oracle.com>,
Muchun Song <songmuchun@bytedance.com>,
Peter Xu <peterx@redhat.com>
Cc: David Hildenbrand <david@redhat.com>,
David Rientjes <rientjes@google.com>,
Axel Rasmussen <axelrasmussen@google.com>,
Mina Almasry <almasrymina@google.com>,
Jue Wang <juew@google.com>,
Manish Mishra <manish.mishra@nutanix.com>,
"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
James Houghton <jthoughton@google.com>
Subject: [RFC PATCH 19/26] hugetlb: add HGM support for copy_hugetlb_page_range
Date: Fri, 24 Jun 2022 17:36:49 +0000 [thread overview]
Message-ID: <20220624173656.2033256-20-jthoughton@google.com> (raw)
In-Reply-To: <20220624173656.2033256-1-jthoughton@google.com>
This allows fork() to work with high-granularity mappings. The page
table structure is copied such that partially mapped regions will remain
partially mapped in the same way for the new process.
Signed-off-by: James Houghton <jthoughton@google.com>
---
mm/hugetlb.c | 74 +++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 59 insertions(+), 15 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index aadfcee947cf..0ec2f231524e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4851,7 +4851,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *src_vma)
{
pte_t *src_pte, *dst_pte, entry, dst_entry;
- struct page *ptepage;
+ struct hugetlb_pte src_hpte, dst_hpte;
+ struct page *ptepage, *hpage;
unsigned long addr;
bool cow = is_cow_mapping(src_vma->vm_flags);
struct hstate *h = hstate_vma(src_vma);
@@ -4878,17 +4879,44 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
i_mmap_lock_read(mapping);
}
- for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
+ addr = src_vma->vm_start;
+ while (addr < src_vma->vm_end) {
spinlock_t *src_ptl, *dst_ptl;
+ unsigned long hpte_sz;
src_pte = huge_pte_offset(src, addr, sz);
- if (!src_pte)
+ if (!src_pte) {
+ addr += sz;
continue;
+ }
dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
if (!dst_pte) {
ret = -ENOMEM;
break;
}
+ hugetlb_pte_populate(&src_hpte, src_pte, huge_page_shift(h));
+ hugetlb_pte_populate(&dst_hpte, dst_pte, huge_page_shift(h));
+
+ if (hugetlb_hgm_enabled(src_vma)) {
+ BUG_ON(!hugetlb_hgm_enabled(dst_vma));
+ ret = hugetlb_walk_to(src, &src_hpte, addr,
+ PAGE_SIZE, /*stop_at_none=*/true);
+ if (ret)
+ break;
+ ret = huge_pte_alloc_high_granularity(
+ &dst_hpte, dst, dst_vma, addr,
+ hugetlb_pte_shift(&src_hpte),
+ HUGETLB_SPLIT_NONE,
+ /*write_locked=*/false);
+ if (ret)
+ break;
+
+ src_pte = src_hpte.ptep;
+ dst_pte = dst_hpte.ptep;
+ }
+
+ hpte_sz = hugetlb_pte_size(&src_hpte);
+
/*
* If the pagetables are shared don't copy or take references.
* dst_pte == src_pte is the common case of src/dest sharing.
@@ -4899,16 +4927,19 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
* after taking the lock below.
*/
dst_entry = huge_ptep_get(dst_pte);
- if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
+ if ((dst_pte == src_pte) || !hugetlb_pte_none(&dst_hpte)) {
+ addr += hugetlb_pte_size(&src_hpte);
continue;
+ }
- dst_ptl = huge_pte_lock(h, dst, dst_pte);
- src_ptl = huge_pte_lockptr(huge_page_shift(h), src, src_pte);
+ dst_ptl = hugetlb_pte_lock(dst, &dst_hpte);
+ src_ptl = hugetlb_pte_lockptr(src, &src_hpte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
dst_entry = huge_ptep_get(dst_pte);
again:
- if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
+ if (hugetlb_pte_none(&src_hpte) ||
+ !hugetlb_pte_none(&dst_hpte)) {
/*
* Skip if src entry none. Also, skip in the
* unlikely case dst entry !none as this implies
@@ -4931,11 +4962,12 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
if (userfaultfd_wp(src_vma) && uffd_wp)
entry = huge_pte_mkuffd_wp(entry);
set_huge_swap_pte_at(src, addr, src_pte,
- entry, sz);
+ entry, hpte_sz);
}
if (!userfaultfd_wp(dst_vma) && uffd_wp)
entry = huge_pte_clear_uffd_wp(entry);
- set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
+ set_huge_swap_pte_at(dst, addr, dst_pte, entry,
+ hpte_sz);
} else if (unlikely(is_pte_marker(entry))) {
/*
* We copy the pte marker only if the dst vma has
@@ -4946,7 +4978,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
} else {
entry = huge_ptep_get(src_pte);
ptepage = pte_page(entry);
- get_page(ptepage);
+ hpage = compound_head(ptepage);
+ get_page(hpage);
/*
* Failing to duplicate the anon rmap is a rare case
@@ -4959,9 +4992,16 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
* sleep during the process.
*/
if (!PageAnon(ptepage)) {
- page_dup_file_rmap(ptepage, true);
+ /* Only dup_rmap once for a page */
+ if (IS_ALIGNED(addr, sz))
+ page_dup_file_rmap(hpage, true);
} else if (page_try_dup_anon_rmap(ptepage, true,
src_vma)) {
+ if (hugetlb_hgm_enabled(src_vma)) {
+ ret = -EINVAL;
+ break;
+ }
+ BUG_ON(!IS_ALIGNED(addr, hugetlb_pte_size(&src_hpte)));
pte_t src_pte_old = entry;
struct page *new;
@@ -4970,13 +5010,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
/* Do not use reserve as it's private owned */
new = alloc_huge_page(dst_vma, addr, 1);
if (IS_ERR(new)) {
- put_page(ptepage);
+ put_page(hpage);
ret = PTR_ERR(new);
break;
}
- copy_user_huge_page(new, ptepage, addr, dst_vma,
+ copy_user_huge_page(new, hpage, addr, dst_vma,
npages);
- put_page(ptepage);
+ put_page(hpage);
/* Install the new huge page if src pte stable */
dst_ptl = huge_pte_lock(h, dst, dst_pte);
@@ -4994,6 +5034,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
hugetlb_install_page(dst_vma, dst_pte, addr, new);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
+ addr += hugetlb_pte_size(&src_hpte);
continue;
}
@@ -5010,10 +5051,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
}
set_huge_pte_at(dst, addr, dst_pte, entry);
- hugetlb_count_add(npages, dst);
+ hugetlb_count_add(
+ hugetlb_pte_size(&dst_hpte) / PAGE_SIZE,
+ dst);
}
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
+ addr += hugetlb_pte_size(&src_hpte);
}
if (cow) {
--
2.37.0.rc0.161.g10f37bed90-goog
next prev parent reply other threads:[~2022-06-24 17:37 UTC|newest]
Thread overview: 125+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-24 17:36 [RFC PATCH 00/26] hugetlb: Introduce HugeTLB high-granularity mapping James Houghton
2022-06-24 17:36 ` [RFC PATCH 01/26] hugetlb: make hstate accessor functions const James Houghton
2022-06-24 18:43 ` Mina Almasry
[not found] ` <e55f90f5-ba14-5d6e-8f8f-abf731b9095e@nutanix.com>
2022-06-27 12:09 ` manish.mishra
2022-06-28 17:08 ` James Houghton
2022-06-29 6:18 ` Muchun Song
2022-06-24 17:36 ` [RFC PATCH 02/26] hugetlb: sort hstates in hugetlb_init_hstates James Houghton
2022-06-24 18:51 ` Mina Almasry
2022-06-27 12:08 ` manish.mishra
2022-06-28 15:35 ` James Houghton
2022-06-27 18:42 ` Mike Kravetz
2022-06-28 15:40 ` James Houghton
2022-06-29 6:39 ` Muchun Song
2022-06-29 21:06 ` Mike Kravetz
2022-06-29 21:13 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 03/26] hugetlb: add make_huge_pte_with_shift James Houghton
2022-06-24 19:01 ` Mina Almasry
2022-06-27 12:13 ` manish.mishra
2022-06-24 17:36 ` [RFC PATCH 04/26] hugetlb: make huge_pte_lockptr take an explicit shift argument James Houghton
2022-06-27 12:26 ` manish.mishra
2022-06-27 20:51 ` Mike Kravetz
2022-06-28 15:29 ` James Houghton
2022-06-29 6:09 ` Muchun Song
2022-06-29 21:03 ` Mike Kravetz
2022-06-29 21:39 ` James Houghton
2022-06-29 22:24 ` Mike Kravetz
2022-06-30 9:35 ` Muchun Song
2022-06-30 16:23 ` James Houghton
2022-06-30 17:40 ` Mike Kravetz
2022-07-01 3:32 ` Muchun Song
2022-06-24 17:36 ` [RFC PATCH 05/26] hugetlb: add CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING James Houghton
2022-06-27 12:28 ` manish.mishra
2022-06-28 20:03 ` Mina Almasry
2022-06-24 17:36 ` [RFC PATCH 06/26] mm: make free_p?d_range functions public James Houghton
2022-06-27 12:31 ` manish.mishra
2022-06-28 20:35 ` Mike Kravetz
2022-07-12 20:52 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 07/26] hugetlb: add hugetlb_pte to track HugeTLB page table entries James Houghton
2022-06-27 12:47 ` manish.mishra
2022-06-29 16:28 ` James Houghton
2022-06-28 20:25 ` Mina Almasry
2022-06-29 16:42 ` James Houghton
2022-06-28 20:44 ` Mike Kravetz
2022-06-29 16:24 ` James Houghton
2022-07-11 23:32 ` Mike Kravetz
2022-07-12 9:42 ` Dr. David Alan Gilbert
2022-07-12 17:51 ` Mike Kravetz
2022-07-15 16:35 ` Peter Xu
2022-07-15 21:52 ` Axel Rasmussen
2022-07-15 23:03 ` Peter Xu
2022-09-08 17:38 ` Peter Xu
2022-09-08 17:54 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 08/26] hugetlb: add hugetlb_free_range to free PT structures James Houghton
2022-06-27 12:52 ` manish.mishra
2022-06-28 20:27 ` Mina Almasry
2022-06-24 17:36 ` [RFC PATCH 09/26] hugetlb: add hugetlb_hgm_enabled James Houghton
2022-06-27 12:55 ` manish.mishra
2022-06-28 20:33 ` Mina Almasry
2022-09-08 18:07 ` Peter Xu
2022-09-08 18:13 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 10/26] hugetlb: add for_each_hgm_shift James Houghton
2022-06-27 13:01 ` manish.mishra
2022-06-28 21:58 ` Mina Almasry
2022-07-07 21:39 ` Mike Kravetz
2022-07-08 15:52 ` James Houghton
2022-07-09 21:55 ` Mina Almasry
2022-06-24 17:36 ` [RFC PATCH 11/26] hugetlb: add hugetlb_walk_to to do PT walks James Houghton
2022-06-27 13:07 ` manish.mishra
2022-07-07 23:03 ` Mike Kravetz
2022-09-08 18:20 ` Peter Xu
2022-06-24 17:36 ` [RFC PATCH 12/26] hugetlb: add HugeTLB splitting functionality James Houghton
2022-06-27 13:50 ` manish.mishra
2022-06-29 16:10 ` James Houghton
2022-06-29 14:33 ` manish.mishra
2022-06-29 16:20 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 13/26] hugetlb: add huge_pte_alloc_high_granularity James Houghton
2022-06-29 14:11 ` manish.mishra
2022-06-24 17:36 ` [RFC PATCH 14/26] hugetlb: add HGM support for hugetlb_fault and hugetlb_no_page James Houghton
2022-06-29 14:40 ` manish.mishra
2022-06-29 15:56 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 15/26] hugetlb: make unmapping compatible with high-granularity mappings James Houghton
2022-07-19 10:19 ` manish.mishra
2022-07-19 15:58 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 16/26] hugetlb: make hugetlb_change_protection compatible with HGM James Houghton
2022-06-24 17:36 ` [RFC PATCH 17/26] hugetlb: update follow_hugetlb_page to support HGM James Houghton
2022-07-19 10:48 ` manish.mishra
2022-07-19 16:19 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 18/26] hugetlb: use struct hugetlb_pte for walk_hugetlb_range James Houghton
2022-06-24 17:36 ` James Houghton [this message]
2022-07-11 23:41 ` [RFC PATCH 19/26] hugetlb: add HGM support for copy_hugetlb_page_range Mike Kravetz
2022-07-12 17:19 ` James Houghton
2022-07-12 18:06 ` Mike Kravetz
2022-07-15 21:39 ` Axel Rasmussen
2022-06-24 17:36 ` [RFC PATCH 20/26] hugetlb: add support for high-granularity UFFDIO_CONTINUE James Houghton
2022-07-15 16:21 ` Peter Xu
2022-07-15 16:58 ` James Houghton
2022-07-15 17:20 ` Peter Xu
2022-07-20 20:58 ` James Houghton
2022-07-21 19:09 ` Peter Xu
2022-07-21 19:44 ` James Houghton
2022-07-21 19:53 ` Peter Xu
2022-06-24 17:36 ` [RFC PATCH 21/26] hugetlb: add hugetlb_collapse James Houghton
2022-06-24 17:36 ` [RFC PATCH 22/26] madvise: add uapi for HugeTLB HGM collapse: MADV_COLLAPSE James Houghton
2022-06-24 17:36 ` [RFC PATCH 23/26] userfaultfd: add UFFD_FEATURE_MINOR_HUGETLBFS_HGM James Houghton
2022-06-24 17:36 ` [RFC PATCH 24/26] arm64/hugetlb: add support for high-granularity mappings James Houghton
2022-06-24 17:36 ` [RFC PATCH 25/26] selftests: add HugeTLB HGM to userfaultfd selftest James Houghton
2022-06-24 17:36 ` [RFC PATCH 26/26] selftests: add HugeTLB HGM to KVM demand paging selftest James Houghton
2022-06-24 18:29 ` [RFC PATCH 00/26] hugetlb: Introduce HugeTLB high-granularity mapping Matthew Wilcox
2022-06-27 16:36 ` James Houghton
2022-06-27 17:56 ` Dr. David Alan Gilbert
2022-06-27 20:31 ` James Houghton
2022-06-28 0:04 ` Nadav Amit
2022-06-30 19:21 ` Peter Xu
2022-07-01 5:54 ` Nadav Amit
2022-06-28 8:20 ` Dr. David Alan Gilbert
2022-06-30 16:09 ` Peter Xu
2022-06-24 18:41 ` Mina Almasry
2022-06-27 16:27 ` James Houghton
2022-06-28 14:17 ` Muchun Song
2022-06-28 17:26 ` Mina Almasry
2022-06-28 17:56 ` Dr. David Alan Gilbert
2022-06-29 18:31 ` James Houghton
2022-06-29 20:39 ` Axel Rasmussen
2022-06-24 18:47 ` Matthew Wilcox
2022-06-27 16:48 ` James Houghton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220624173656.2033256-20-jthoughton@google.com \
--to=jthoughton@google.com \
--cc=almasrymina@google.com \
--cc=axelrasmussen@google.com \
--cc=david@redhat.com \
--cc=dgilbert@redhat.com \
--cc=juew@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=manish.mishra@nutanix.com \
--cc=mike.kravetz@oracle.com \
--cc=peterx@redhat.com \
--cc=rientjes@google.com \
--cc=songmuchun@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).