All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dev Jain <dev.jain@arm.com>
To: akpm@linux-foundation.org, david@redhat.com, willy@infradead.org,
	kirill.shutemov@linux.intel.com
Cc: npache@redhat.com, ryan.roberts@arm.com,
	anshuman.khandual@arm.com, catalin.marinas@arm.com,
	cl@gentwo.org, vbabka@suse.cz, mhocko@suse.com,
	apopple@nvidia.com, dave.hansen@linux.intel.com, will@kernel.org,
	baohua@kernel.org, jack@suse.cz, srivatsa@csail.mit.edu,
	haowenchao22@gmail.com, hughd@google.com,
	aneesh.kumar@kernel.org, yang@os.amperecomputing.com,
	peterx@redhat.com, ioworker0@gmail.com,
	wangkefeng.wang@huawei.com, ziy@nvidia.com, jglisse@google.com,
	surenb@google.com, vishal.moola@gmail.com, zokeefe@google.com,
	zhengqi.arch@bytedance.com, jhubbard@nvidia.com,
	21cnbao@gmail.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, Dev Jain <dev.jain@arm.com>
Subject: [PATCH v2 16/17] khugepaged: Implement strict policy for mTHP collapse
Date: Tue, 11 Feb 2025 16:43:25 +0530	[thread overview]
Message-ID: <20250211111326.14295-17-dev.jain@arm.com> (raw)
In-Reply-To: <20250211111326.14295-1-dev.jain@arm.com>

As noted in the discussion thread ending at [1], avoid the creep problem by
collapsing to mTHPs only if max_ptes_none is zero or 511. Along with this,
make mTHP collapse conditions stricter by removing scaling of max_ptes_shared
and max_ptes_swap, and consider collapse only if there are no shared or
swap PTEs in the range.

[1] https://lore.kernel.org/all/8114d47b-b383-4d6e-ab65-a0e88b99c873@arm.com/

Signed-off-by: Dev Jain <dev.jain@arm.com>
---
 mm/khugepaged.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index d2bb008b95e7..b589f889bb5a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -417,6 +417,17 @@ static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm)
 
 static bool thp_enabled(void)
 {
+	bool anon_pmd_enabled = (test_bit(PMD_ORDER, &huge_anon_orders_always) ||
+				 test_bit(PMD_ORDER, &huge_anon_orders_madvise) ||
+			         (test_bit(PMD_ORDER, &huge_anon_orders_inherit) &&
+			         hugepage_global_enabled()));
+
+	/*
+	 * If PMD_ORDER is ineligible for collapse, check if mTHP collapse policy is obeyed;
+	 * see Documentation/admin-guide/transhuge.rst
+	 */
+	bool anon_collapse_mthp = (khugepaged_max_ptes_none == 0 ||
+			      khugepaged_max_ptes_none == HPAGE_PMD_NR - 1);
 	/*
 	 * We cover the anon, shmem and the file-backed case here; file-backed
 	 * hugepages, when configured in, are determined by the global control.
@@ -427,8 +438,9 @@ static bool thp_enabled(void)
 	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
 	    hugepage_global_enabled())
 		return true;
-	if (huge_anon_orders_always || huge_anon_orders_madvise ||
-	    (huge_anon_orders_inherit && hugepage_global_enabled()))
+	if ((huge_anon_orders_always || huge_anon_orders_madvise ||
+	    (huge_anon_orders_inherit && hugepage_global_enabled())) &&
+	    (anon_pmd_enabled || anon_collapse_mthp))
 		return true;
 	if (IS_ENABLED(CONFIG_SHMEM) && shmem_hpage_pmd_enabled())
 		return true;
@@ -578,13 +590,16 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 	pte_t *_pte;
 	int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0;
 	bool writable = false;
-	unsigned int max_ptes_shared = khugepaged_max_ptes_shared >> (HPAGE_PMD_ORDER - order);
+	unsigned int max_ptes_shared = khugepaged_max_ptes_shared;
 	unsigned int max_ptes_none = khugepaged_max_ptes_none >> (HPAGE_PMD_ORDER - order);
 	bool all_pfns_present = true;
 	bool all_pfns_contig = true;
 	bool first_pfn_aligned = true;
 	pte_t prev_pteval;
 
+	if (order != HPAGE_PMD_ORDER)
+		max_ptes_shared = 0;
+
 	for (_pte = pte; _pte < pte + (1UL << order);
 	     _pte++, address += PAGE_SIZE) {
 		pte_t pteval = ptep_get(_pte);
@@ -1453,11 +1468,16 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 	order = highest_order(orders);
 	VM_BUG_ON(address & ((PAGE_SIZE << order) - 1));
 
+	max_ptes_none = khugepaged_max_ptes_none;
+	max_ptes_shared = khugepaged_max_ptes_shared;
+	max_ptes_swap = khugepaged_max_ptes_swap;
+
 scan_pte_range:
 
-	max_ptes_shared = khugepaged_max_ptes_shared >> (HPAGE_PMD_ORDER - order);
+	if (order != HPAGE_PMD_ORDER)
+		max_ptes_shared = max_ptes_swap = 0;
+
 	max_ptes_none = khugepaged_max_ptes_none >> (HPAGE_PMD_ORDER - order);
-	max_ptes_swap = khugepaged_max_ptes_swap >> (HPAGE_PMD_ORDER - order);
 	referenced = 0, shared = 0, none_or_zero = 0, unmapped = 0;
 	all_pfns_present = true, all_pfns_contig = true, first_pfn_aligned = true;
 
@@ -2651,6 +2671,11 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
 	int order;
 	bool is_file_vma;
 	int prev_progress = 0;
+	bool collapse_mthp = true;
+
+	/* Avoid the creep problem; see Documentation/admin-guide/transhuge.rst */
+	if (khugepaged_max_ptes_none && khugepaged_max_ptes_none != HPAGE_PMD_NR - 1)
+		collapse_mthp = false;
 
 	VM_BUG_ON(!pages);
 	lockdep_assert_held(&khugepaged_mm_lock);
@@ -2710,6 +2735,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
 			/* select the highest possible order for the VMA */
 			order = highest_order(orders);
 			while (orders) {
+				if (order != HPAGE_PMD_ORDER && !collapse_mthp)
+					goto skip;
 				hend = round_down(vma->vm_end, PAGE_SIZE << order);
 				if (khugepaged_scan.address <= hend)
 					break;
-- 
2.30.2



  parent reply	other threads:[~2025-02-11 11:16 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-11 11:13 [PATCH v2 00/17] khugepaged: Asynchronous mTHP collapse Dev Jain
2025-02-11 11:13 ` [PATCH v2 01/17] khugepaged: Generalize alloc_charge_folio() Dev Jain
2025-02-11 11:13 ` [PATCH v2 02/17] khugepaged: Generalize hugepage_vma_revalidate() Dev Jain
2025-02-11 11:13 ` [PATCH v2 03/17] khugepaged: Generalize __collapse_huge_page_swapin() Dev Jain
2025-02-11 11:13 ` [PATCH v2 04/17] khugepaged: Generalize __collapse_huge_page_isolate() Dev Jain
2025-02-11 11:13 ` [PATCH v2 05/17] khugepaged: Generalize __collapse_huge_page_copy() Dev Jain
2025-02-11 11:13 ` [PATCH v2 06/17] khugepaged: Abstract PMD-THP collapse Dev Jain
2025-02-11 11:13 ` [PATCH v2 07/17] khugepaged: Scan PTEs order-wise Dev Jain
2025-02-11 11:13 ` [PATCH v2 08/17] khugepaged: Introduce vma_collapse_anon_folio() Dev Jain
2025-02-11 11:13 ` [PATCH v2 09/17] khugepaged: Define collapse policy if a larger folio is already mapped Dev Jain
2025-02-11 11:13 ` [PATCH v2 10/17] khugepaged: Exit early on fully-mapped aligned mTHP Dev Jain
2025-02-11 11:13 ` [PATCH v2 11/17] khugepaged: Enable sysfs to control order of collapse Dev Jain
2025-02-11 11:13 ` [PATCH v2 12/17] khugepaged: Enable variable-sized VMA collapse Dev Jain
2025-02-11 11:13 ` [PATCH v2 13/17] khugepaged: Lock all VMAs mapping the PTE table Dev Jain
2025-02-11 11:13 ` [PATCH v2 14/17] khugepaged: Reset scan address to correct alignment Dev Jain
2025-02-11 11:13 ` [PATCH v2 15/17] khugepaged: Delay cond_resched() Dev Jain
2025-02-11 11:13 ` Dev Jain [this message]
2025-02-11 11:13 ` [PATCH v2 17/17] Documentation: transhuge: Define khugepaged mTHP collapse policy Dev Jain
2025-02-11 23:23 ` [PATCH v2 00/17] khugepaged: Asynchronous mTHP collapse Andrew Morton
2025-02-12  4:18   ` Dev Jain
2025-02-15  1:47 ` Nico Pache
2025-02-15  7:36   ` Dev Jain

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250211111326.14295-17-dev.jain@arm.com \
    --to=dev.jain@arm.com \
    --cc=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@kernel.org \
    --cc=anshuman.khandual@arm.com \
    --cc=apopple@nvidia.com \
    --cc=baohua@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=cl@gentwo.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=haowenchao22@gmail.com \
    --cc=hughd@google.com \
    --cc=ioworker0@gmail.com \
    --cc=jack@suse.cz \
    --cc=jglisse@google.com \
    --cc=jhubbard@nvidia.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=npache@redhat.com \
    --cc=peterx@redhat.com \
    --cc=ryan.roberts@arm.com \
    --cc=srivatsa@csail.mit.edu \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=vishal.moola@gmail.com \
    --cc=wangkefeng.wang@huawei.com \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=yang@os.amperecomputing.com \
    --cc=zhengqi.arch@bytedance.com \
    --cc=ziy@nvidia.com \
    --cc=zokeefe@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.