All of lore.kernel.org
 help / color / mirror / Atom feed
From: "David Hildenbrand (Arm)" <david@kernel.org>
To: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Cc: Nico Pache <npache@redhat.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	aarcange@redhat.com, akpm@linux-foundation.org,
	anshuman.khandual@arm.com, apopple@nvidia.com, baohua@kernel.org,
	baolin.wang@linux.alibaba.com, byungchul@sk.com,
	catalin.marinas@arm.com, cl@gentwo.org, corbet@lwn.net,
	dave.hansen@linux.intel.com, dev.jain@arm.com, gourry@gourry.net,
	hannes@cmpxchg.org, hughd@google.com, jackmanb@google.com,
	jack@suse.cz, jannh@google.com, jglisse@google.com,
	joshua.hahnjy@gmail.com, kas@kernel.org, lance.yang@linux.dev,
	Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com,
	mathieu.desnoyers@efficios.com, matthew.brost@intel.com,
	mhiramat@kernel.org, mhocko@suse.com, peterx@redhat.com,
	pfalcato@suse.de, rakie.kim@sk.com, raquini@redhat.com,
	rdunlap@infradead.org, richard.weiyang@gmail.com,
	rientjes@google.com, rostedt@goodmis.org, rppt@kernel.org,
	ryan.roberts@arm.com, shivankg@amd.com, sunnanyong@huawei.com,
	surenb@google.com, thomas.hellstrom@linux.intel.com,
	tiwai@suse.de, usamaarif642@gmail.com, vbabka@suse.cz,
	vishal.moola@gmail.com, wangkefeng.wang@huawei.com,
	will@kernel.org, willy@infradead.org,
	yang@os.amperecomputing.com, ying.huang@linux.alibaba.com,
	ziy@nvidia.com, zokeefe@google.com
Subject: Re: [PATCH mm-unstable v4 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Date: Tue, 31 Mar 2026 23:03:43 +0200	[thread overview]
Message-ID: <9d5a4703-a15e-4be6-926b-af2d98db40c0@kernel.org> (raw)
In-Reply-To: <37132e09-9c03-4afc-bc73-4bca416c5343@kernel.org>

On 3/31/26 22:50, David Hildenbrand (Arm) wrote:
> On 3/31/26 22:06, Lorenzo Stoakes (Oracle) wrote:
>> On Tue, Mar 31, 2026 at 10:00:58PM +0200, David Hildenbrand (Arm) wrote:
>>>
>>> Right.
>>>
>>> The original code used
>>>
>>> 	bool mmap_locked = true;
>>>
>>> If the fix get squashed, maybe we should revert to that handling to
>>> minimize the churn?
>>
>> Well collapse_single_pmd() and all descendents would need to be updated to
>> invert the boolean, not sure it's worth it.
> 
> The code is confusing me.
> 
> In the old code, collapse_single_pmd() was called with "mmap_locked ==
> true" and set "mmap_locked=false".
> 
> Now we call it with "mmap_unlocked=false" and it sets "mmap_unlocked=true".
> 
> So far so good.
> 
> However, collapse_scan_pmd() consumed "mmap_locked", now it effectively
> consumes "mmap_unlocked".

Okay, I'm too confused for today and give up :)

FWIW, this is the effective change, and the effective changes to
mmap_unlocked and lock_dropped ... confuse me:


 mm/khugepaged.c | 146 +++++++++++++++++++++++++-----------------------
 1 file changed, 76 insertions(+), 70 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 26c38934e829..3ffff9f98b2f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1253,7 +1253,7 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
 
 static enum scan_result collapse_scan_pmd(struct mm_struct *mm,
 		struct vm_area_struct *vma, unsigned long start_addr,
-		bool *mmap_locked, struct collapse_control *cc)
+		bool *lock_dropped, struct collapse_control *cc)
 {
 	pmd_t *pmd;
 	pte_t *pte, *_pte;
@@ -1428,7 +1428,7 @@ static enum scan_result collapse_scan_pmd(struct mm_struct *mm,
 		result = collapse_huge_page(mm, start_addr, referenced,
 					    unmapped, cc);
 		/* collapse_huge_page will return with the mmap_lock released */
-		*mmap_locked = false;
+		*lock_dropped = true;
 	}
 out:
 	trace_mm_khugepaged_scan_pmd(mm, folio, referenced,
@@ -2420,6 +2420,67 @@ static enum scan_result collapse_scan_file(struct mm_struct *mm,
 	return result;
 }
 
+/*
+ * Try to collapse a single PMD starting at a PMD aligned addr, and return
+ * the results.
+ */
+static enum scan_result collapse_single_pmd(unsigned long addr,
+		struct vm_area_struct *vma, bool *lock_dropped,
+		struct collapse_control *cc)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	bool triggered_wb = false;
+	enum scan_result result;
+	struct file *file;
+	pgoff_t pgoff;
+
+	mmap_assert_locked(mm);
+
+	if (vma_is_anonymous(vma)) {
+		result = collapse_scan_pmd(mm, vma, addr, lock_dropped, cc);
+		goto end;
+	}
+
+	file = get_file(vma->vm_file);
+	pgoff = linear_page_index(vma, addr);
+
+	mmap_read_unlock(mm);
+	*lock_dropped = true;
+retry:
+	result = collapse_scan_file(mm, addr, file, pgoff, cc);
+
+	/*
+	 * For MADV_COLLAPSE, when encountering dirty pages, try to writeback,
+	 * then retry the collapse one time.
+	 */
+	if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK &&
+	    !triggered_wb && mapping_can_writeback(file->f_mapping)) {
+		const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
+		const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
+
+		filemap_write_and_wait_range(file->f_mapping, lstart, lend);
+		triggered_wb = true;
+		goto retry;
+	}
+	fput(file);
+
+	if (result == SCAN_PTE_MAPPED_HUGEPAGE) {
+		mmap_read_lock(mm);
+		if (collapse_test_exit_or_disable(mm))
+			result = SCAN_ANY_PROCESS;
+		else
+			result = try_collapse_pte_mapped_thp(mm, addr,
+							     !cc->is_khugepaged);
+		if (result == SCAN_PMD_MAPPED)
+			result = SCAN_SUCCEED;
+		mmap_read_unlock(mm);
+	}
+end:
+	if (cc->is_khugepaged && result == SCAN_SUCCEED)
+		++khugepaged_pages_collapsed;
+	return result;
+}
+
 static void collapse_scan_mm_slot(unsigned int progress_max,
 		enum scan_result *result, struct collapse_control *cc)
 	__releases(&khugepaged_mm_lock)
@@ -2481,46 +2542,21 @@ static void collapse_scan_mm_slot(unsigned int progress_max,
 		VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
 
 		while (khugepaged_scan.address < hend) {
-			bool mmap_locked = true;
+			bool lock_dropped = false;
 
 			cond_resched();
 			if (unlikely(collapse_test_exit_or_disable(mm)))
 				goto breakouterloop;
 
-			VM_BUG_ON(khugepaged_scan.address < hstart ||
+			VM_WARN_ON_ONCE(khugepaged_scan.address < hstart ||
 				  khugepaged_scan.address + HPAGE_PMD_SIZE >
 				  hend);
-			if (!vma_is_anonymous(vma)) {
-				struct file *file = get_file(vma->vm_file);
-				pgoff_t pgoff = linear_page_index(vma,
-						khugepaged_scan.address);
-
-				mmap_read_unlock(mm);
-				mmap_locked = false;
-				*result = collapse_scan_file(mm,
-					khugepaged_scan.address, file, pgoff, cc);
-				fput(file);
-				if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
-					mmap_read_lock(mm);
-					if (collapse_test_exit_or_disable(mm))
-						goto breakouterloop;
-					*result = try_collapse_pte_mapped_thp(mm,
-						khugepaged_scan.address, false);
-					if (*result == SCAN_PMD_MAPPED)
-						*result = SCAN_SUCCEED;
-					mmap_read_unlock(mm);
-				}
-			} else {
-				*result = collapse_scan_pmd(mm, vma,
-					khugepaged_scan.address, &mmap_locked, cc);
-			}
-
-			if (*result == SCAN_SUCCEED)
-				++khugepaged_pages_collapsed;
 
+			*result = collapse_single_pmd(khugepaged_scan.address,
+						      vma, &lock_dropped, cc);
 			/* move to next address */
 			khugepaged_scan.address += HPAGE_PMD_SIZE;
-			if (!mmap_locked)
+			if (lock_dropped)
 				/*
 				 * We released mmap_lock so break loop.  Note
 				 * that we drop mmap_lock before all hugepage
@@ -2795,7 +2831,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
 	unsigned long hstart, hend, addr;
 	enum scan_result last_fail = SCAN_FAIL;
 	int thps = 0;
-	bool mmap_locked = true;
+	bool mmap_unlocked = false;
 
 	BUG_ON(vma->vm_start > start);
 	BUG_ON(vma->vm_end < end);
@@ -2817,13 +2853,12 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
 
 	for (addr = hstart; addr < hend; addr += HPAGE_PMD_SIZE) {
 		enum scan_result result = SCAN_FAIL;
-		bool triggered_wb = false;
 
-retry:
-		if (!mmap_locked) {
+		if (mmap_unlocked) {
 			cond_resched();
 			mmap_read_lock(mm);
-			mmap_locked = true;
+			mmap_unlocked = false;
+			*lock_dropped = true;
 			result = hugepage_vma_revalidate(mm, addr, false, &vma,
 							 cc);
 			if (result  != SCAN_SUCCEED) {
@@ -2833,45 +2868,14 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
 
 			hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
 		}
-		mmap_assert_locked(mm);
-		if (!vma_is_anonymous(vma)) {
-			struct file *file = get_file(vma->vm_file);
-			pgoff_t pgoff = linear_page_index(vma, addr);
 
-			mmap_read_unlock(mm);
-			mmap_locked = false;
-			*lock_dropped = true;
-			result = collapse_scan_file(mm, addr, file, pgoff, cc);
-
-			if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
-			    mapping_can_writeback(file->f_mapping)) {
-				loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
-				loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
+		result = collapse_single_pmd(addr, vma, &mmap_unlocked, cc);
 
-				filemap_write_and_wait_range(file->f_mapping, lstart, lend);
-				triggered_wb = true;
-				fput(file);
-				goto retry;
-			}
-			fput(file);
-		} else {
-			result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, cc);
-		}
-		if (!mmap_locked)
-			*lock_dropped = true;
-
-handle_result:
 		switch (result) {
 		case SCAN_SUCCEED:
 		case SCAN_PMD_MAPPED:
 			++thps;
 			break;
-		case SCAN_PTE_MAPPED_HUGEPAGE:
-			BUG_ON(mmap_locked);
-			mmap_read_lock(mm);
-			result = try_collapse_pte_mapped_thp(mm, addr, true);
-			mmap_read_unlock(mm);
-			goto handle_result;
 		/* Whitelisted set of results where continuing OK */
 		case SCAN_NO_PTE_TABLE:
 		case SCAN_PTE_NON_PRESENT:
@@ -2894,8 +2898,10 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
 
 out_maybelock:
 	/* Caller expects us to hold mmap_lock on return */
-	if (!mmap_locked)
+	if (mmap_unlocked) {
+		*lock_dropped = false;
 		mmap_read_lock(mm);
+	}
 out_nolock:
 	mmap_assert_locked(mm);
 	mmdrop(mm);
-- 
2.43.0


-- 
Cheers,

David


  reply	other threads:[~2026-03-31 21:04 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-25 11:40 [PATCH mm-unstable v4 0/5] mm: khugepaged cleanups and mTHP prerequisites Nico Pache
2026-03-25 11:40 ` [PATCH mm-unstable v4 1/5] mm: consolidate anonymous folio PTE mapping into helpers Nico Pache
2026-03-25 11:40 ` [PATCH mm-unstable v4 2/5] mm: introduce is_pmd_order helper Nico Pache
2026-03-25 12:11   ` Lorenzo Stoakes (Oracle)
2026-03-25 14:45     ` Andrew Morton
2026-03-25 14:49       ` Lorenzo Stoakes (Oracle)
2026-03-25 16:05         ` Andrew Morton
2026-03-25 11:40 ` [PATCH mm-unstable v4 3/5] mm/khugepaged: define KHUGEPAGED_MAX_PTES_LIMIT as HPAGE_PMD_NR - 1 Nico Pache
2026-03-25 11:40 ` [PATCH mm-unstable v4 4/5] mm/khugepaged: rename hpage_collapse_* to collapse_* Nico Pache
2026-03-25 12:08   ` Lorenzo Stoakes (Oracle)
2026-03-25 11:40 ` [PATCH mm-unstable v4 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd() Nico Pache
2026-03-31 14:01   ` Lorenzo Stoakes (Oracle)
2026-03-31 14:13     ` David Hildenbrand (Arm)
2026-03-31 14:15       ` Lorenzo Stoakes (Oracle)
2026-03-31 14:46         ` David Hildenbrand (Arm)
2026-03-31 20:00         ` David Hildenbrand (Arm)
2026-03-31 20:06           ` Lorenzo Stoakes (Oracle)
2026-03-31 20:50             ` David Hildenbrand (Arm)
2026-03-31 21:03               ` David Hildenbrand (Arm) [this message]
2026-03-31 21:09                 ` Nico Pache
2026-04-01  8:14                   ` Lorenzo Stoakes (Oracle)
2026-04-01 20:31                     ` Andrew Morton
2026-04-07  8:38                       ` Lorenzo Stoakes (Oracle)
2026-04-07 21:42                         ` Andrew Morton
2026-04-08  6:42                           ` Lorenzo Stoakes
2026-03-31 21:35           ` Andrew Morton
2026-03-31 21:49             ` Nico Pache
2026-04-01  7:05               ` David Hildenbrand (Arm)
2026-04-01  8:17                 ` Lorenzo Stoakes (Oracle)
2026-03-31 19:46       ` Nico Pache
2026-03-31 19:59         ` Lorenzo Stoakes (Oracle)
2026-03-31 16:29     ` Lance Yang
2026-03-31 19:59     ` Nico Pache
2026-03-25 11:44 ` [PATCH mm-unstable v4 0/5] mm: khugepaged cleanups and mTHP prerequisites Lorenzo Stoakes (Oracle)
2026-03-26  0:25 ` Andrew Morton
2026-03-26  4:44   ` Roman Gushchin
2026-03-26 16:48     ` Nico Pache
2026-03-26 17:35       ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9d5a4703-a15e-4be6-926b-af2d98db40c0@kernel.org \
    --to=david@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=apopple@nvidia.com \
    --cc=baohua@kernel.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=byungchul@sk.com \
    --cc=catalin.marinas@arm.com \
    --cc=cl@gentwo.org \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=dev.jain@arm.com \
    --cc=gourry@gourry.net \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=jackmanb@google.com \
    --cc=jannh@google.com \
    --cc=jglisse@google.com \
    --cc=joshua.hahnjy@gmail.com \
    --cc=kas@kernel.org \
    --cc=lance.yang@linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=matthew.brost@intel.com \
    --cc=mhiramat@kernel.org \
    --cc=mhocko@suse.com \
    --cc=npache@redhat.com \
    --cc=peterx@redhat.com \
    --cc=pfalcato@suse.de \
    --cc=rakie.kim@sk.com \
    --cc=raquini@redhat.com \
    --cc=rdunlap@infradead.org \
    --cc=richard.weiyang@gmail.com \
    --cc=rientjes@google.com \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=ryan.roberts@arm.com \
    --cc=shivankg@amd.com \
    --cc=sunnanyong@huawei.com \
    --cc=surenb@google.com \
    --cc=thomas.hellstrom@linux.intel.com \
    --cc=tiwai@suse.de \
    --cc=usamaarif642@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=vishal.moola@gmail.com \
    --cc=wangkefeng.wang@huawei.com \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=yang@os.amperecomputing.com \
    --cc=ying.huang@linux.alibaba.com \
    --cc=ziy@nvidia.com \
    --cc=zokeefe@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.