From: Dev Jain <dev.jain@arm.com>
To: akpm@linux-foundation.org, david@redhat.com, willy@infradead.org,
kirill.shutemov@linux.intel.com
Cc: ryan.roberts@arm.com, anshuman.khandual@arm.com,
catalin.marinas@arm.com, cl@gentwo.org, vbabka@suse.cz,
mhocko@suse.com, apopple@nvidia.com, dave.hansen@linux.intel.com,
will@kernel.org, baohua@kernel.org, jack@suse.cz,
srivatsa@csail.mit.edu, haowenchao22@gmail.com, hughd@google.com,
aneesh.kumar@kernel.org, yang@os.amperecomputing.com,
peterx@redhat.com, ioworker0@gmail.com,
wangkefeng.wang@huawei.com, ziy@nvidia.com, jglisse@google.com,
surenb@google.com, vishal.moola@gmail.com, zokeefe@google.com,
zhengqi.arch@bytedance.com, jhubbard@nvidia.com,
21cnbao@gmail.com, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, Dev Jain <dev.jain@arm.com>
Subject: [RFC PATCH 08/12] khugepaged: Abstract PMD-THP collapse
Date: Mon, 16 Dec 2024 22:21:01 +0530 [thread overview]
Message-ID: <20241216165105.56185-9-dev.jain@arm.com> (raw)
In-Reply-To: <20241216165105.56185-1-dev.jain@arm.com>
Abstract away taking the mmap_lock exclusively, copying page contents, and
setting the PMD, into vma_collapse_anon_folio_pmd().
Signed-off-by: Dev Jain <dev.jain@arm.com>
---
mm/khugepaged.c | 119 +++++++++++++++++++++++++++---------------------
1 file changed, 66 insertions(+), 53 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 078794aa3335..88beebef773e 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1111,58 +1111,17 @@ static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm,
return SCAN_SUCCEED;
}
-static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
- int referenced, int unmapped, int order,
- struct collapse_control *cc)
+static int vma_collapse_anon_folio_pmd(struct mm_struct *mm, unsigned long address,
+ struct vm_area_struct *vma, struct collapse_control *cc, pmd_t *pmd,
+ struct folio *folio)
{
+ struct mmu_notifier_range range;
+ spinlock_t *pmd_ptl, *pte_ptl;
LIST_HEAD(compound_pagelist);
- pmd_t *pmd, _pmd;
- pte_t *pte;
pgtable_t pgtable;
- struct folio *folio;
- spinlock_t *pmd_ptl, *pte_ptl;
- int result = SCAN_FAIL;
- struct vm_area_struct *vma;
- struct mmu_notifier_range range;
-
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
- /*
- * Before allocating the hugepage, release the mmap_lock read lock.
- * The allocation can take potentially a long time if it involves
- * sync compaction, and we do not need to hold the mmap_lock during
- * that. We will recheck the vma after taking it again in write mode.
- */
- mmap_read_unlock(mm);
-
- result = alloc_charge_folio(&folio, mm, order, cc);
- if (result != SCAN_SUCCEED)
- goto out_nolock;
-
- mmap_read_lock(mm);
- result = hugepage_vma_revalidate(mm, address, true, &vma, order, cc);
- if (result != SCAN_SUCCEED) {
- mmap_read_unlock(mm);
- goto out_nolock;
- }
-
- result = find_pmd_or_thp_or_none(mm, address, &pmd);
- if (result != SCAN_SUCCEED) {
- mmap_read_unlock(mm);
- goto out_nolock;
- }
-
- if (unmapped) {
- /*
- * __collapse_huge_page_swapin will return with mmap_lock
- * released when it fails. So we jump out_nolock directly in
- * that case. Continuing to collapse causes inconsistency.
- */
- result = __collapse_huge_page_swapin(mm, vma, address, pmd,
- referenced, order);
- if (result != SCAN_SUCCEED)
- goto out_nolock;
- }
+ int result;
+ pmd_t _pmd;
+ pte_t *pte;
mmap_read_unlock(mm);
/*
@@ -1174,7 +1133,8 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
* mmap_lock.
*/
mmap_write_lock(mm);
- result = hugepage_vma_revalidate(mm, address, true, &vma, order, cc);
+
+ result = hugepage_vma_revalidate(mm, address, true, &vma, HPAGE_PMD_ORDER, cc);
if (result != SCAN_SUCCEED)
goto out_up_write;
/* check if the pmd is still valid */
@@ -1206,7 +1166,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
if (pte) {
result = __collapse_huge_page_isolate(vma, address, pte, cc,
- &compound_pagelist, order);
+ &compound_pagelist, HPAGE_PMD_ORDER);
spin_unlock(pte_ptl);
} else {
result = SCAN_PMD_NULL;
@@ -1262,11 +1222,64 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
deferred_split_folio(folio, false);
spin_unlock(pmd_ptl);
- folio = NULL;
-
result = SCAN_SUCCEED;
out_up_write:
mmap_write_unlock(mm);
+ return result;
+}
+
+static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
+ int referenced, int unmapped, int order,
+ struct collapse_control *cc)
+{
+ struct vm_area_struct *vma;
+ int result = SCAN_FAIL;
+ struct folio *folio;
+ pmd_t *pmd;
+
+ /*
+ * Before allocating the hugepage, release the mmap_lock read lock.
+ * The allocation can take potentially a long time if it involves
+ * sync compaction, and we do not need to hold the mmap_lock during
+ * that. We will recheck the vma after taking it again in write mode.
+ */
+ mmap_read_unlock(mm);
+
+ result = alloc_charge_folio(&folio, mm, order, cc);
+ if (result != SCAN_SUCCEED)
+ goto out_nolock;
+
+ mmap_read_lock(mm);
+ result = hugepage_vma_revalidate(mm, address, true, &vma, order, cc);
+ if (result != SCAN_SUCCEED) {
+ mmap_read_unlock(mm);
+ goto out_nolock;
+ }
+
+ result = find_pmd_or_thp_or_none(mm, address, &pmd);
+ if (result != SCAN_SUCCEED) {
+ mmap_read_unlock(mm);
+ goto out_nolock;
+ }
+
+ if (unmapped) {
+ /*
+ * __collapse_huge_page_swapin will return with mmap_lock
+ * released when it fails. So we jump out_nolock directly in
+ * that case. Continuing to collapse causes inconsistency.
+ */
+ result = __collapse_huge_page_swapin(mm, vma, address, pmd,
+ referenced, order);
+ if (result != SCAN_SUCCEED)
+ goto out_nolock;
+ }
+
+ if (order == HPAGE_PMD_ORDER)
+ result = vma_collapse_anon_folio_pmd(mm, address, vma, cc, pmd, folio);
+
+ if (result == SCAN_SUCCEED)
+ folio = NULL;
+
out_nolock:
if (folio)
folio_put(folio);
--
2.30.2
next prev parent reply other threads:[~2024-12-16 16:53 UTC|newest]
Thread overview: 74+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-16 16:50 [RFC PATCH 00/12] khugepaged: Asynchronous mTHP collapse Dev Jain
2024-12-16 16:50 ` [RFC PATCH 01/12] khugepaged: Rename hpage_collapse_scan_pmd() -> ptes() Dev Jain
2024-12-17 4:18 ` Matthew Wilcox
2024-12-17 5:52 ` Dev Jain
2024-12-17 6:43 ` Ryan Roberts
2024-12-17 18:11 ` Zi Yan
2024-12-17 19:12 ` Ryan Roberts
2024-12-16 16:50 ` [RFC PATCH 02/12] khugepaged: Generalize alloc_charge_folio() Dev Jain
2024-12-17 2:51 ` Baolin Wang
2024-12-17 6:08 ` Dev Jain
2024-12-17 4:17 ` Matthew Wilcox
2024-12-17 7:09 ` Ryan Roberts
2024-12-17 13:00 ` Zi Yan
2024-12-20 17:41 ` Christoph Lameter (Ampere)
2024-12-20 17:45 ` Ryan Roberts
2024-12-20 18:47 ` Christoph Lameter (Ampere)
2025-01-02 11:21 ` Ryan Roberts
2024-12-17 6:53 ` Ryan Roberts
2024-12-17 9:06 ` Dev Jain
2024-12-16 16:50 ` [RFC PATCH 03/12] khugepaged: Generalize hugepage_vma_revalidate() Dev Jain
2024-12-17 4:21 ` Matthew Wilcox
2024-12-17 16:58 ` Ryan Roberts
2024-12-16 16:50 ` [RFC PATCH 04/12] khugepaged: Generalize __collapse_huge_page_swapin() Dev Jain
2024-12-17 4:24 ` Matthew Wilcox
2024-12-16 16:50 ` [RFC PATCH 05/12] khugepaged: Generalize __collapse_huge_page_isolate() Dev Jain
2024-12-17 4:32 ` Matthew Wilcox
2024-12-17 6:41 ` Dev Jain
2024-12-17 17:14 ` Ryan Roberts
2024-12-17 17:09 ` Ryan Roberts
2024-12-16 16:50 ` [RFC PATCH 06/12] khugepaged: Generalize __collapse_huge_page_copy_failed() Dev Jain
2024-12-17 17:22 ` Ryan Roberts
2024-12-18 8:49 ` Dev Jain
2024-12-16 16:51 ` [RFC PATCH 07/12] khugepaged: Scan PTEs order-wise Dev Jain
2024-12-17 18:15 ` Ryan Roberts
2024-12-18 9:24 ` Dev Jain
2025-01-06 10:04 ` Usama Arif
2025-01-07 7:17 ` Dev Jain
2024-12-16 16:51 ` Dev Jain [this message]
2024-12-17 19:24 ` [RFC PATCH 08/12] khugepaged: Abstract PMD-THP collapse Ryan Roberts
2024-12-18 9:26 ` Dev Jain
2024-12-16 16:51 ` [RFC PATCH 09/12] khugepaged: Introduce vma_collapse_anon_folio() Dev Jain
2024-12-16 17:06 ` David Hildenbrand
2024-12-16 19:08 ` Yang Shi
2024-12-17 10:07 ` Dev Jain
2024-12-17 10:32 ` David Hildenbrand
2024-12-18 8:35 ` Dev Jain
2025-01-02 10:08 ` Dev Jain
2025-01-02 11:33 ` David Hildenbrand
2025-01-03 8:17 ` Dev Jain
2025-01-02 11:22 ` David Hildenbrand
2024-12-18 15:59 ` Dev Jain
2025-01-06 10:17 ` Usama Arif
2025-01-07 8:12 ` Dev Jain
2024-12-16 16:51 ` [RFC PATCH 10/12] khugepaged: Skip PTE range if a larger mTHP is already mapped Dev Jain
2024-12-18 7:36 ` Ryan Roberts
2024-12-18 9:34 ` Dev Jain
2024-12-19 3:40 ` John Hubbard
2024-12-19 3:51 ` Zi Yan
2024-12-19 7:59 ` Dev Jain
2024-12-19 8:07 ` Dev Jain
2024-12-20 11:57 ` Ryan Roberts
2024-12-16 16:51 ` [RFC PATCH 11/12] khugepaged: Enable sysfs to control order of collapse Dev Jain
2024-12-16 16:51 ` [RFC PATCH 12/12] selftests/mm: khugepaged: Enlighten for mTHP collapse Dev Jain
2024-12-18 9:03 ` Ryan Roberts
2024-12-18 9:50 ` Dev Jain
2024-12-20 11:05 ` Ryan Roberts
2024-12-30 7:09 ` Dev Jain
2024-12-30 16:36 ` Zi Yan
2025-01-02 11:43 ` Ryan Roberts
2025-01-03 10:10 ` Dev Jain
2025-01-03 10:11 ` Dev Jain
2024-12-16 17:31 ` [RFC PATCH 00/12] khugepaged: Asynchronous " Dev Jain
2025-01-02 21:58 ` Nico Pache
2025-01-03 7:04 ` Dev Jain
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241216165105.56185-9-dev.jain@arm.com \
--to=dev.jain@arm.com \
--cc=21cnbao@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=aneesh.kumar@kernel.org \
--cc=anshuman.khandual@arm.com \
--cc=apopple@nvidia.com \
--cc=baohua@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=cl@gentwo.org \
--cc=dave.hansen@linux.intel.com \
--cc=david@redhat.com \
--cc=haowenchao22@gmail.com \
--cc=hughd@google.com \
--cc=ioworker0@gmail.com \
--cc=jack@suse.cz \
--cc=jglisse@google.com \
--cc=jhubbard@nvidia.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=peterx@redhat.com \
--cc=ryan.roberts@arm.com \
--cc=srivatsa@csail.mit.edu \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=vishal.moola@gmail.com \
--cc=wangkefeng.wang@huawei.com \
--cc=will@kernel.org \
--cc=willy@infradead.org \
--cc=yang@os.amperecomputing.com \
--cc=zhengqi.arch@bytedance.com \
--cc=ziy@nvidia.com \
--cc=zokeefe@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.