From: James Houghton <jthoughton@google.com>
To: Mike Kravetz <mike.kravetz@oracle.com>,
Muchun Song <songmuchun@bytedance.com>,
Peter Xu <peterx@redhat.com>
Cc: David Hildenbrand <david@redhat.com>,
David Rientjes <rientjes@google.com>,
Axel Rasmussen <axelrasmussen@google.com>,
Mina Almasry <almasrymina@google.com>,
Jue Wang <juew@google.com>,
Manish Mishra <manish.mishra@nutanix.com>,
"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
James Houghton <jthoughton@google.com>
Subject: [RFC PATCH 15/26] hugetlb: make unmapping compatible with high-granularity mappings
Date: Fri, 24 Jun 2022 17:36:45 +0000 [thread overview]
Message-ID: <20220624173656.2033256-16-jthoughton@google.com> (raw)
In-Reply-To: <20220624173656.2033256-1-jthoughton@google.com>
This enlightens __unmap_hugepage_range to deal with high-granularity
mappings. This doesn't change its API; it still must be called with
hugepage alignment, but it will correctly unmap hugepages that have been
mapped at high granularity.
Analogous to the mapcount rules introduced by hugetlb_no_page, we only
drop mapcount in this case if we are unmapping an entire hugepage in one
operation. This is the case when a VMA is destroyed.
Eventually, functionality here can be expanded to allow users to call
MADV_DONTNEED on PAGE_SIZE-aligned sections of a hugepage, but that is
not done here.
Signed-off-by: James Houghton <jthoughton@google.com>
---
include/asm-generic/tlb.h | 6 +--
mm/hugetlb.c | 85 ++++++++++++++++++++++++++-------------
2 files changed, 59 insertions(+), 32 deletions(-)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index ff3e82553a76..8daa3ae460d9 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -562,9 +562,9 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
__tlb_remove_tlb_entry(tlb, ptep, address); \
} while (0)
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
+#define tlb_remove_huge_tlb_entry(tlb, hpte, address) \
do { \
- unsigned long _sz = huge_page_size(h); \
+ unsigned long _sz = hugetlb_pte_size(&hpte); \
if (_sz >= P4D_SIZE) \
tlb_flush_p4d_range(tlb, address, _sz); \
else if (_sz >= PUD_SIZE) \
@@ -573,7 +573,7 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
tlb_flush_pmd_range(tlb, address, _sz); \
else \
tlb_flush_pte_range(tlb, address, _sz); \
- __tlb_remove_tlb_entry(tlb, ptep, address); \
+ __tlb_remove_tlb_entry(tlb, hpte.ptep, address);\
} while (0)
/**
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index da30621656b8..51fc1d3f122f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5120,24 +5120,20 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
- pte_t *ptep;
+ struct hugetlb_pte hpte;
pte_t pte;
spinlock_t *ptl;
- struct page *page;
+ struct page *hpage, *subpage;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
struct mmu_notifier_range range;
bool force_flush = false;
+ bool hgm_enabled = hugetlb_hgm_enabled(vma);
WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
BUG_ON(end & ~huge_page_mask(h));
- /*
- * This is a hugetlb vma, all the pte entries should point
- * to huge page.
- */
- tlb_change_page_size(tlb, sz);
tlb_start_vma(tlb, vma);
/*
@@ -5148,25 +5144,43 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
mmu_notifier_invalidate_range_start(&range);
address = start;
- for (; address < end; address += sz) {
- ptep = huge_pte_offset(mm, address, sz);
- if (!ptep)
+
+ while (address < end) {
+ pte_t *ptep = huge_pte_offset(mm, address, sz);
+
+ if (!ptep) {
+ address += sz;
continue;
+ }
+ hugetlb_pte_populate(&hpte, ptep, huge_page_shift(h));
+ if (hgm_enabled) {
+ int ret = huge_pte_alloc_high_granularity(
+ &hpte, mm, vma, address, PAGE_SHIFT,
+ HUGETLB_SPLIT_NEVER,
+ /*write_locked=*/true);
+ /*
+ * We will never split anything, so this should always
+ * succeed.
+ */
+ BUG_ON(ret);
+ }
- ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, vma, &address, ptep)) {
+ ptl = hugetlb_pte_lock(mm, &hpte);
+ if (!hgm_enabled && huge_pmd_unshare(
+ mm, vma, &address, hpte.ptep)) {
spin_unlock(ptl);
tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
force_flush = true;
- continue;
+ goto next_hpte;
}
- pte = huge_ptep_get(ptep);
- if (huge_pte_none(pte)) {
+ if (hugetlb_pte_none(&hpte)) {
spin_unlock(ptl);
- continue;
+ goto next_hpte;
}
+ pte = hugetlb_ptep_get(&hpte);
+
/*
* Migrating hugepage or HWPoisoned hugepage is already
* unmapped and its refcount is dropped, so just clear pte here.
@@ -5180,24 +5194,27 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
*/
if (pte_swp_uffd_wp_any(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
- set_huge_pte_at(mm, address, ptep,
+ set_huge_pte_at(mm, address, hpte.ptep,
make_pte_marker(PTE_MARKER_UFFD_WP));
else
- huge_pte_clear(mm, address, ptep, sz);
+ huge_pte_clear(mm, address, hpte.ptep,
+ hugetlb_pte_size(&hpte));
spin_unlock(ptl);
- continue;
+ goto next_hpte;
}
- page = pte_page(pte);
+ subpage = pte_page(pte);
+ BUG_ON(!subpage);
+ hpage = compound_head(subpage);
/*
* If a reference page is supplied, it is because a specific
* page is being unmapped, not a range. Ensure the page we
* are about to unmap is the actual page of interest.
*/
if (ref_page) {
- if (page != ref_page) {
+ if (hpage != ref_page) {
spin_unlock(ptl);
- continue;
+ goto next_hpte;
}
/*
* Mark the VMA as having unmapped its page so that
@@ -5207,25 +5224,35 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED);
}
- pte = huge_ptep_get_and_clear(mm, address, ptep);
- tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
+ pte = huge_ptep_get_and_clear(mm, address, hpte.ptep);
+ tlb_change_page_size(tlb, hugetlb_pte_size(&hpte));
+ tlb_remove_huge_tlb_entry(tlb, hpte, address);
if (huge_pte_dirty(pte))
- set_page_dirty(page);
+ set_page_dirty(hpage);
/* Leave a uffd-wp pte marker if needed */
if (huge_pte_uffd_wp(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
- set_huge_pte_at(mm, address, ptep,
+ set_huge_pte_at(mm, address, hpte.ptep,
make_pte_marker(PTE_MARKER_UFFD_WP));
- hugetlb_count_sub(pages_per_huge_page(h), mm);
- page_remove_rmap(page, vma, true);
+
+ hugetlb_count_sub(hugetlb_pte_size(&hpte)/PAGE_SIZE, mm);
+
+ /*
+ * If we are unmapping the entire page, remove it from the
+ * rmap.
+ */
+ if (IS_ALIGNED(address, sz) && address + sz <= end)
+ page_remove_rmap(hpage, vma, true);
spin_unlock(ptl);
- tlb_remove_page_size(tlb, page, huge_page_size(h));
+ tlb_remove_page_size(tlb, subpage, hugetlb_pte_size(&hpte));
/*
* Bail out after unmapping reference page if supplied
*/
if (ref_page)
break;
+next_hpte:
+ address += hugetlb_pte_size(&hpte);
}
mmu_notifier_invalidate_range_end(&range);
tlb_end_vma(tlb, vma);
--
2.37.0.rc0.161.g10f37bed90-goog
next prev parent reply other threads:[~2022-06-24 17:37 UTC|newest]
Thread overview: 125+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-24 17:36 [RFC PATCH 00/26] hugetlb: Introduce HugeTLB high-granularity mapping James Houghton
2022-06-24 17:36 ` [RFC PATCH 01/26] hugetlb: make hstate accessor functions const James Houghton
2022-06-24 18:43 ` Mina Almasry
[not found] ` <e55f90f5-ba14-5d6e-8f8f-abf731b9095e@nutanix.com>
2022-06-27 12:09 ` manish.mishra
2022-06-28 17:08 ` James Houghton
2022-06-29 6:18 ` Muchun Song
2022-06-24 17:36 ` [RFC PATCH 02/26] hugetlb: sort hstates in hugetlb_init_hstates James Houghton
2022-06-24 18:51 ` Mina Almasry
2022-06-27 12:08 ` manish.mishra
2022-06-28 15:35 ` James Houghton
2022-06-27 18:42 ` Mike Kravetz
2022-06-28 15:40 ` James Houghton
2022-06-29 6:39 ` Muchun Song
2022-06-29 21:06 ` Mike Kravetz
2022-06-29 21:13 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 03/26] hugetlb: add make_huge_pte_with_shift James Houghton
2022-06-24 19:01 ` Mina Almasry
2022-06-27 12:13 ` manish.mishra
2022-06-24 17:36 ` [RFC PATCH 04/26] hugetlb: make huge_pte_lockptr take an explicit shift argument James Houghton
2022-06-27 12:26 ` manish.mishra
2022-06-27 20:51 ` Mike Kravetz
2022-06-28 15:29 ` James Houghton
2022-06-29 6:09 ` Muchun Song
2022-06-29 21:03 ` Mike Kravetz
2022-06-29 21:39 ` James Houghton
2022-06-29 22:24 ` Mike Kravetz
2022-06-30 9:35 ` Muchun Song
2022-06-30 16:23 ` James Houghton
2022-06-30 17:40 ` Mike Kravetz
2022-07-01 3:32 ` Muchun Song
2022-06-24 17:36 ` [RFC PATCH 05/26] hugetlb: add CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING James Houghton
2022-06-27 12:28 ` manish.mishra
2022-06-28 20:03 ` Mina Almasry
2022-06-24 17:36 ` [RFC PATCH 06/26] mm: make free_p?d_range functions public James Houghton
2022-06-27 12:31 ` manish.mishra
2022-06-28 20:35 ` Mike Kravetz
2022-07-12 20:52 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 07/26] hugetlb: add hugetlb_pte to track HugeTLB page table entries James Houghton
2022-06-27 12:47 ` manish.mishra
2022-06-29 16:28 ` James Houghton
2022-06-28 20:25 ` Mina Almasry
2022-06-29 16:42 ` James Houghton
2022-06-28 20:44 ` Mike Kravetz
2022-06-29 16:24 ` James Houghton
2022-07-11 23:32 ` Mike Kravetz
2022-07-12 9:42 ` Dr. David Alan Gilbert
2022-07-12 17:51 ` Mike Kravetz
2022-07-15 16:35 ` Peter Xu
2022-07-15 21:52 ` Axel Rasmussen
2022-07-15 23:03 ` Peter Xu
2022-09-08 17:38 ` Peter Xu
2022-09-08 17:54 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 08/26] hugetlb: add hugetlb_free_range to free PT structures James Houghton
2022-06-27 12:52 ` manish.mishra
2022-06-28 20:27 ` Mina Almasry
2022-06-24 17:36 ` [RFC PATCH 09/26] hugetlb: add hugetlb_hgm_enabled James Houghton
2022-06-27 12:55 ` manish.mishra
2022-06-28 20:33 ` Mina Almasry
2022-09-08 18:07 ` Peter Xu
2022-09-08 18:13 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 10/26] hugetlb: add for_each_hgm_shift James Houghton
2022-06-27 13:01 ` manish.mishra
2022-06-28 21:58 ` Mina Almasry
2022-07-07 21:39 ` Mike Kravetz
2022-07-08 15:52 ` James Houghton
2022-07-09 21:55 ` Mina Almasry
2022-06-24 17:36 ` [RFC PATCH 11/26] hugetlb: add hugetlb_walk_to to do PT walks James Houghton
2022-06-27 13:07 ` manish.mishra
2022-07-07 23:03 ` Mike Kravetz
2022-09-08 18:20 ` Peter Xu
2022-06-24 17:36 ` [RFC PATCH 12/26] hugetlb: add HugeTLB splitting functionality James Houghton
2022-06-27 13:50 ` manish.mishra
2022-06-29 16:10 ` James Houghton
2022-06-29 14:33 ` manish.mishra
2022-06-29 16:20 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 13/26] hugetlb: add huge_pte_alloc_high_granularity James Houghton
2022-06-29 14:11 ` manish.mishra
2022-06-24 17:36 ` [RFC PATCH 14/26] hugetlb: add HGM support for hugetlb_fault and hugetlb_no_page James Houghton
2022-06-29 14:40 ` manish.mishra
2022-06-29 15:56 ` James Houghton
2022-06-24 17:36 ` James Houghton [this message]
2022-07-19 10:19 ` [RFC PATCH 15/26] hugetlb: make unmapping compatible with high-granularity mappings manish.mishra
2022-07-19 15:58 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 16/26] hugetlb: make hugetlb_change_protection compatible with HGM James Houghton
2022-06-24 17:36 ` [RFC PATCH 17/26] hugetlb: update follow_hugetlb_page to support HGM James Houghton
2022-07-19 10:48 ` manish.mishra
2022-07-19 16:19 ` James Houghton
2022-06-24 17:36 ` [RFC PATCH 18/26] hugetlb: use struct hugetlb_pte for walk_hugetlb_range James Houghton
2022-06-24 17:36 ` [RFC PATCH 19/26] hugetlb: add HGM support for copy_hugetlb_page_range James Houghton
2022-07-11 23:41 ` Mike Kravetz
2022-07-12 17:19 ` James Houghton
2022-07-12 18:06 ` Mike Kravetz
2022-07-15 21:39 ` Axel Rasmussen
2022-06-24 17:36 ` [RFC PATCH 20/26] hugetlb: add support for high-granularity UFFDIO_CONTINUE James Houghton
2022-07-15 16:21 ` Peter Xu
2022-07-15 16:58 ` James Houghton
2022-07-15 17:20 ` Peter Xu
2022-07-20 20:58 ` James Houghton
2022-07-21 19:09 ` Peter Xu
2022-07-21 19:44 ` James Houghton
2022-07-21 19:53 ` Peter Xu
2022-06-24 17:36 ` [RFC PATCH 21/26] hugetlb: add hugetlb_collapse James Houghton
2022-06-24 17:36 ` [RFC PATCH 22/26] madvise: add uapi for HugeTLB HGM collapse: MADV_COLLAPSE James Houghton
2022-06-24 17:36 ` [RFC PATCH 23/26] userfaultfd: add UFFD_FEATURE_MINOR_HUGETLBFS_HGM James Houghton
2022-06-24 17:36 ` [RFC PATCH 24/26] arm64/hugetlb: add support for high-granularity mappings James Houghton
2022-06-24 17:36 ` [RFC PATCH 25/26] selftests: add HugeTLB HGM to userfaultfd selftest James Houghton
2022-06-24 17:36 ` [RFC PATCH 26/26] selftests: add HugeTLB HGM to KVM demand paging selftest James Houghton
2022-06-24 18:29 ` [RFC PATCH 00/26] hugetlb: Introduce HugeTLB high-granularity mapping Matthew Wilcox
2022-06-27 16:36 ` James Houghton
2022-06-27 17:56 ` Dr. David Alan Gilbert
2022-06-27 20:31 ` James Houghton
2022-06-28 0:04 ` Nadav Amit
2022-06-30 19:21 ` Peter Xu
2022-07-01 5:54 ` Nadav Amit
2022-06-28 8:20 ` Dr. David Alan Gilbert
2022-06-30 16:09 ` Peter Xu
2022-06-24 18:41 ` Mina Almasry
2022-06-27 16:27 ` James Houghton
2022-06-28 14:17 ` Muchun Song
2022-06-28 17:26 ` Mina Almasry
2022-06-28 17:56 ` Dr. David Alan Gilbert
2022-06-29 18:31 ` James Houghton
2022-06-29 20:39 ` Axel Rasmussen
2022-06-24 18:47 ` Matthew Wilcox
2022-06-27 16:48 ` James Houghton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220624173656.2033256-16-jthoughton@google.com \
--to=jthoughton@google.com \
--cc=almasrymina@google.com \
--cc=axelrasmussen@google.com \
--cc=david@redhat.com \
--cc=dgilbert@redhat.com \
--cc=juew@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=manish.mishra@nutanix.com \
--cc=mike.kravetz@oracle.com \
--cc=peterx@redhat.com \
--cc=rientjes@google.com \
--cc=songmuchun@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).