public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed
From: Usama Arif <usamaarif642@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	david@redhat.com, linux-mm@kvack.org
Cc: hannes@cmpxchg.org, shakeel.butt@linux.dev, riel@surriel.com,
	ziy@nvidia.com, baolin.wang@linux.alibaba.com,
	lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
	npache@redhat.com, ryan.roberts@arm.com,
	linux-kernel@vger.kernel.org, kernel-team@meta.com,
	Usama Arif <usamaarif642@gmail.com>
Subject: [PATCH 1/1] prctl: allow overriding system THP policy to always per process
Date: Wed,  7 May 2025 15:00:34 +0100	[thread overview]
Message-ID: <20250507141132.2773275-2-usamaarif642@gmail.com> (raw)
In-Reply-To: <20250507141132.2773275-1-usamaarif642@gmail.com>

Allowing override of global THP policy per process allows workloads
that have shown to benefit from hugepages to do so, without regressing
workloads that wouldn't benefit. This will allow such types of workloads
to be run/stacked on the same machine.

It also helps in rolling out hugepages in hyperscaler configurations
for workloads that benefit from them, where a single THP policy is likely
to be used across the entire fleet, and prctl will help override it.

Signed-off-by: Usama Arif <usamaarif642@gmail.com>
---
 include/linux/huge_mm.h                          |  3 ++-
 include/linux/mm_types.h                         |  7 ++-----
 include/uapi/linux/prctl.h                       |  3 +++
 kernel/sys.c                                     | 16 ++++++++++++++++
 tools/include/uapi/linux/prctl.h                 |  3 +++
 .../perf/trace/beauty/include/uapi/linux/prctl.h |  3 +++
 6 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2f190c90192d..0587dc4b8e2d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -293,7 +293,8 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 		if (vm_flags & VM_HUGEPAGE)
 			mask |= READ_ONCE(huge_anon_orders_madvise);
 		if (hugepage_global_always() ||
-		    ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled()))
+		    ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled()) ||
+		    test_bit(MMF_THP_ALWAYS, &vma->vm_mm->flags))
 			mask |= READ_ONCE(huge_anon_orders_inherit);
 
 		orders &= mask;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e76bade9ebb1..9bcd72b2b191 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1704,11 +1704,8 @@ enum {
 #define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
 #define MMF_VM_HUGEPAGE		17	/* set when mm is available for khugepaged */
 
-/*
- * This one-shot flag is dropped due to necessity of changing exe once again
- * on NFS restore
- */
-//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
+/* override inherited page sizes to always for the entire process */
+ #define MMF_THP_ALWAYS	18
 
 #define MMF_HAS_UPROBES		19	/* has uprobes */
 #define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 15c18ef4eb11..22c526681562 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -364,4 +364,7 @@ struct prctl_mm_map {
 # define PR_TIMER_CREATE_RESTORE_IDS_ON		1
 # define PR_TIMER_CREATE_RESTORE_IDS_GET	2
 
+#define PR_SET_THP_ALWAYS	78
+#define PR_GET_THP_ALWAYS	79
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index c434968e9f5d..ee56b059ff1f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2658,6 +2658,22 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			clear_bit(MMF_DISABLE_THP, &me->mm->flags);
 		mmap_write_unlock(me->mm);
 		break;
+	case PR_GET_THP_ALWAYS:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = !!test_bit(MMF_THP_ALWAYS, &me->mm->flags);
+		break;
+	case PR_SET_THP_ALWAYS:
+		if (arg3 || arg4 || arg5)
+			return -EINVAL;
+		if (mmap_write_lock_killable(me->mm))
+			return -EINTR;
+		if (arg2)
+			set_bit(MMF_THP_ALWAYS, &me->mm->flags);
+		else
+			clear_bit(MMF_THP_ALWAYS, &me->mm->flags);
+		mmap_write_unlock(me->mm);
+		break;
 	case PR_MPX_ENABLE_MANAGEMENT:
 	case PR_MPX_DISABLE_MANAGEMENT:
 		/* No longer implemented: */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 35791791a879..f5f6cff42b3f 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -328,4 +328,7 @@ struct prctl_mm_map {
 # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC	0x10 /* Clear the aspect on exec */
 # define PR_PPC_DEXCR_CTRL_MASK		0x1f
 
+#define PR_GET_THP_ALWAYS	78
+#define PR_SET_THP_ALWAYS	79
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 15c18ef4eb11..680996d56faf 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -364,4 +364,7 @@ struct prctl_mm_map {
 # define PR_TIMER_CREATE_RESTORE_IDS_ON		1
 # define PR_TIMER_CREATE_RESTORE_IDS_GET	2
 
+#define PR_GET_THP_ALWAYS	78
+#define PR_SET_THP_ALWAYS	79
+
 #endif /* _LINUX_PRCTL_H */
-- 
2.47.1



  reply	other threads:[~2025-05-07 14:11 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-07 14:00 [PATCH 0/1] prctl: allow overriding system THP policy to always Usama Arif
2025-05-07 14:00 ` Usama Arif [this message]
2025-05-07 15:02   ` [PATCH 1/1] prctl: allow overriding system THP policy to always per process Usama Arif
2025-05-07 20:14   ` Zi Yan
2025-05-08 10:53     ` Usama Arif
2025-05-08 20:29       ` Zi Yan
2025-05-07 14:57 ` [PATCH 0/1] prctl: allow overriding system THP policy to always Zi Yan
2025-05-07 15:12   ` Usama Arif
2025-05-07 15:57     ` Zi Yan
2025-05-07 16:09       ` Usama Arif
2025-05-08  5:41         ` Yafang Shao
2025-05-08 16:04           ` Usama Arif
2025-05-09  2:15             ` Yafang Shao
2025-05-09  5:13               ` Johannes Weiner
2025-05-09  9:24                 ` Yafang Shao
2025-05-09  9:30                   ` David Hildenbrand
2025-05-09  9:43                     ` Yafang Shao
2025-05-09 16:46                       ` Johannes Weiner
2025-05-09 22:42                         ` David Hildenbrand
2025-05-09 23:34                           ` Zi Yan
2025-05-11  8:15                             ` David Hildenbrand
2025-05-11 14:08                               ` Usama Arif
2025-05-13 11:43                                 ` Yafang Shao
2025-05-13 12:04                                 ` David Hildenbrand
2025-05-11  2:08                         ` Yafang Shao
2025-05-08 11:06 ` David Hildenbrand
2025-05-08 16:35   ` Usama Arif
2025-05-08 17:39     ` David Hildenbrand
2025-05-08 18:05       ` Usama Arif

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250507141132.2773275-2-usamaarif642@gmail.com \
    --to=usamaarif642@gmail.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=npache@redhat.com \
    --cc=riel@surriel.com \
    --cc=ryan.roberts@arm.com \
    --cc=shakeel.butt@linux.dev \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox