From: Usama Arif <usamaarif642@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>,
david@redhat.com, linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org, corbet@lwn.net, rppt@kernel.org,
surenb@google.com, mhocko@suse.com, hannes@cmpxchg.org,
baohua@kernel.org, shakeel.butt@linux.dev, riel@surriel.com,
ziy@nvidia.com, laoar.shao@gmail.com, dev.jain@arm.com,
baolin.wang@linux.alibaba.com, npache@redhat.com,
lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
ryan.roberts@arm.com, vbabka@suse.cz, jannh@google.com,
Arnd Bergmann <arnd@arndb.de>,
sj@kernel.org, linux-kernel@vger.kernel.org,
linux-doc@vger.kernel.org, kernel-team@meta.com,
Usama Arif <usamaarif642@gmail.com>
Subject: [PATCH v4 3/7] mm/huge_memory: respect MADV_COLLAPSE with PR_THP_DISABLE_EXCEPT_ADVISED
Date: Wed, 13 Aug 2025 14:55:38 +0100 [thread overview]
Message-ID: <20250813135642.1986480-4-usamaarif642@gmail.com> (raw)
In-Reply-To: <20250813135642.1986480-1-usamaarif642@gmail.com>
From: David Hildenbrand <david@redhat.com>
Let's allow for making MADV_COLLAPSE succeed on areas that neither have
VM_HUGEPAGE nor VM_NOHUGEPAGE when we have THP disabled
unless explicitly advised (PR_THP_DISABLE_EXCEPT_ADVISED).
MADV_COLLAPSE is a clear advice that we want to collapse.
Note that we still respect the VM_NOHUGEPAGE flag, just like
MADV_COLLAPSE always does. So consequently, MADV_COLLAPSE is now only
refused on VM_NOHUGEPAGE with PR_THP_DISABLE_EXCEPT_ADVISED,
including for shmem.
Co-developed-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
include/linux/huge_mm.h | 8 +++++++-
include/uapi/linux/prctl.h | 2 +-
mm/huge_memory.c | 5 +++--
mm/memory.c | 6 ++++--
mm/shmem.c | 2 +-
5 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 92ea0b9771fae..1ac0d06fb3c1d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -329,7 +329,7 @@ struct thpsize {
* through madvise or prctl.
*/
static inline bool vma_thp_disabled(struct vm_area_struct *vma,
- vm_flags_t vm_flags)
+ vm_flags_t vm_flags, bool forced_collapse)
{
/* Are THPs disabled for this VMA? */
if (vm_flags & VM_NOHUGEPAGE)
@@ -343,6 +343,12 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma,
*/
if (vm_flags & VM_HUGEPAGE)
return false;
+ /*
+ * Forcing a collapse (e.g., madv_collapse), is a clear advice to
+ * use THPs.
+ */
+ if (forced_collapse)
+ return false;
return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm);
}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 150b6deebfb1e..51c4e8c82b1e9 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -185,7 +185,7 @@ struct prctl_mm_map {
#define PR_SET_THP_DISABLE 41
/*
* Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE /
- * VM_HUGEPAGE).
+ * VM_HUGEPAGE, MADV_COLLAPSE).
*/
# define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1)
#define PR_GET_THP_DISABLE 42
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9c716be949cbf..1eca2d543449c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -104,7 +104,8 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
{
const bool smaps = type == TVA_SMAPS;
const bool in_pf = type == TVA_PAGEFAULT;
- const bool enforce_sysfs = type != TVA_FORCED_COLLAPSE;
+ const bool forced_collapse = type == TVA_FORCED_COLLAPSE;
+ const bool enforce_sysfs = !forced_collapse;
unsigned long supported_orders;
/* Check the intersection of requested and supported orders. */
@@ -122,7 +123,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
if (!vma->vm_mm) /* vdso */
return 0;
- if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags))
+ if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags, forced_collapse))
return 0;
/* khugepaged doesn't collapse DAX vma, but page fault is fine. */
diff --git a/mm/memory.c b/mm/memory.c
index 7b1e8f137fa3f..e4f533655305a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5332,9 +5332,11 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa
* It is too late to allocate a small folio, we already have a large
* folio in the pagecache: especially s390 KVM cannot tolerate any
* PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any
- * PMD mappings if THPs are disabled.
+ * PMD mappings if THPs are disabled. As we already have a THP ...
+ * behave as if we are forcing a collapse.
*/
- if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags))
+ if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags,
+ /* forced_collapse=*/ true))
return ret;
if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
diff --git a/mm/shmem.c b/mm/shmem.c
index e2c76a30802b6..d945de3a7f0e7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1817,7 +1817,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
unsigned int global_orders;
- if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags)))
+ if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
return 0;
global_orders = shmem_huge_global_enabled(inode, index, write_end,
--
2.47.3
next prev parent reply other threads:[~2025-08-13 13:57 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-13 13:55 [PATCH v4 0/7] prctl: extend PR_SET_THP_DISABLE to only provide THPs when advised Usama Arif
2025-08-13 13:55 ` [PATCH v4 1/7] prctl: extend PR_SET_THP_DISABLE to optionally exclude VM_HUGEPAGE Usama Arif
2025-08-13 13:55 ` [PATCH v4 2/7] mm/huge_memory: convert "tva_flags" to "enum tva_type" Usama Arif
2025-08-14 3:07 ` Yafang Shao
2025-08-14 10:43 ` Usama Arif
2025-08-15 1:11 ` Andrew Morton
2025-08-15 9:29 ` Usama Arif
2025-08-14 14:59 ` Zi Yan
2025-08-13 13:55 ` Usama Arif [this message]
2025-08-14 15:14 ` [PATCH v4 3/7] mm/huge_memory: respect MADV_COLLAPSE with PR_THP_DISABLE_EXCEPT_ADVISED Zi Yan
2025-08-13 13:55 ` [PATCH v4 4/7] docs: transhuge: document process level THP controls Usama Arif
2025-08-13 14:30 ` Lorenzo Stoakes
2025-08-14 15:47 ` Zi Yan
2025-08-13 13:55 ` [PATCH v4 5/7] selftest/mm: Extract sz2ord function into vm_util.h Usama Arif
2025-08-13 14:31 ` Lorenzo Stoakes
2025-08-14 15:52 ` Zi Yan
2025-08-13 13:55 ` [PATCH v4 6/7] selftests: prctl: introduce tests for disabling THPs completely Usama Arif
2025-08-13 14:54 ` Lorenzo Stoakes
2025-08-13 13:55 ` [PATCH v4 7/7] selftests: prctl: introduce tests for disabling THPs except for madvise Usama Arif
2025-08-13 15:13 ` Lorenzo Stoakes
2025-08-13 16:24 ` David Hildenbrand
2025-08-13 18:52 ` Lorenzo Stoakes
2025-08-14 9:32 ` David Hildenbrand
2025-08-14 10:49 ` Lorenzo Stoakes
2025-08-14 11:45 ` Mark Brown
2025-08-14 12:00 ` David Hildenbrand
2025-08-14 12:09 ` Mark Brown
2025-08-14 12:59 ` David Hildenbrand
2025-08-14 13:08 ` Mark Brown
2025-08-14 15:02 ` Lorenzo Stoakes
2025-08-14 15:41 ` Usama Arif
2025-08-14 10:36 ` Usama Arif
2025-08-14 10:53 ` Lorenzo Stoakes
2025-08-14 11:51 ` Usama Arif
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250813135642.1986480-4-usamaarif642@gmail.com \
--to=usamaarif642@gmail.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=corbet@lwn.net \
--cc=david@redhat.com \
--cc=dev.jain@arm.com \
--cc=hannes@cmpxchg.org \
--cc=jannh@google.com \
--cc=kernel-team@meta.com \
--cc=laoar.shao@gmail.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mhocko@suse.com \
--cc=npache@redhat.com \
--cc=riel@surriel.com \
--cc=rppt@kernel.org \
--cc=ryan.roberts@arm.com \
--cc=shakeel.butt@linux.dev \
--cc=sj@kernel.org \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).