public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed
From: Luka Bai <lukafocus@icloud.com>
To: linux-mm@kvack.org
Cc: Jonathan Corbet <corbet@lwn.net>,
	 Shuah Khan <skhan@linuxfoundation.org>,
	 Andrew Morton <akpm@linux-foundation.org>,
	 David Hildenbrand <david@kernel.org>,
	Lorenzo Stoakes <ljs@kernel.org>,  Zi Yan <ziy@nvidia.com>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	 "Liam R. Howlett" <liam@infradead.org>,
	Nico Pache <npache@redhat.com>,
	 Ryan Roberts <ryan.roberts@arm.com>, Dev Jain <dev.jain@arm.com>,
	 Barry Song <baohua@kernel.org>,
	Lance Yang <lance.yang@linux.dev>,
	 Vlastimil Babka <vbabka@kernel.org>,
	Mike Rapoport <rppt@kernel.org>,
	 Suren Baghdasaryan <surenb@google.com>,
	Michal Hocko <mhocko@suse.com>,  Jann Horn <jannh@google.com>,
	Arnd Bergmann <arnd@arndb.de>,  Kairui Song <kasong@tencent.com>,
	linux-kernel@vger.kernel.org,  linux-arch@vger.kernel.org,
	linux-doc@vger.kernel.org,  Luka Bai <lukabai@tencent.com>
Subject: [PATCH 1/5] mm: add basic madvise helpers and branch for THP setup
Date: Fri, 01 May 2026 13:55:42 +0800	[thread overview]
Message-ID: <20260501-thp_cow-v1-1-005377483738@tencent.com> (raw)
In-Reply-To: <20260501-thp_cow-v1-0-005377483738@tencent.com>

From: Luka Bai <lukabai@tencent.com>

Transparent huge page is now properly working with most of the mm
framework, and well fused with the folio concept that can be
reclaimed or allocated with a large order. However, its deed is not
very "estimable". For example, a THP is easily split in many path like
partially mapped, swap out or fork + COW(for child processes).

In some cases, we may want it to have some concluded result. Since
some workloads expect a relatively "stable" THP, while others may want
to save memory more rather than the performance benifits.

This patch adds some basic helpers and branch in madvise path so that
we can add madvise choices on THP to conduct what we do on different
types of operations like COW or swap that may split THP, on the level
of vma.

We transfer the type of configuration using parameters of madvise,
analyze it and save the result in vma->vm_flags for later use.

Currently the only operation in the list is COW. It decides whether
we want to use hugepages for the child process when it writes a spot
on the shared anonymous pmd so that we can make sure the THP not
being split after writing. This patch only adds the basic setup
helpers, the real usage will be added in the later patches.

Signed-off-by: Luka Bai <lukabai@tencent.com>
---
 include/linux/huge_mm.h                |  6 ++++++
 include/linux/mm.h                     | 19 +++++++++++++++++++
 include/uapi/asm-generic/mman-common.h |  9 +++++++++
 mm/madvise.c                           | 25 +++++++++++++++++++++++++
 4 files changed, 59 insertions(+)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 48496f09909b..a0ce8c0b81f5 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,6 +6,7 @@
 
 #include <linux/fs.h> /* only for vma_is_dax() */
 #include <linux/kobject.h>
+#include <uapi/asm-generic/mman-common.h>
 
 vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -363,6 +364,11 @@ static inline bool thp_disabled_by_hw(void)
 	return transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED);
 }
 
+static inline bool madv_thp_cow(int behavior)
+{
+	return behavior & MADV_THP_COW;
+}
+
 unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags);
 unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1d76da6e0791..8a800819cfa2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -391,6 +391,10 @@ enum {
 #else
 	DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	DECLARE_VMA_BIT(THP_SETUP_1, 43),
+	DECLARE_VMA_BIT_ALIAS(THP_COW, THP_SETUP_1),
+#endif
 };
 #undef DECLARE_VMA_BIT
 #undef DECLARE_VMA_BIT_ALIAS
@@ -510,6 +514,9 @@ enum {
 #define VM_DROPPABLE		VM_NONE
 #define VMA_DROPPABLE		EMPTY_VMA_FLAGS
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define VM_THP_COW	INIT_VM_FLAG(THP_COW)
+#endif
 
 /* Bits set in the VMA until the stack is in its final location */
 #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
@@ -4128,6 +4135,18 @@ extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 		     struct list_head *uf);
 extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline bool madv_thp_behavior(int behavior)
+{
+	return behavior >= MADV_THP_SETUP_BASE && behavior < MADV_THP_SETUP_END;
+}
+#else
+static inline bool madv_thp_behavior(int behavior)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_MMU
 extern int __mm_populate(unsigned long addr, unsigned long len,
 			 int ignore_errors);
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index ef1c27fa3c57..1617ed374503 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -82,6 +82,15 @@
 #define MADV_GUARD_INSTALL 102		/* fatal signal on access to range */
 #define MADV_GUARD_REMOVE 103		/* unguard range */
 
+/* for THP setup */
+#define MADV_THP_SETUP_BASE 256
+enum {
+	MADV_THP_COW_BIT,
+	MADV_THP_SETUP_MAX_BIT,
+};
+#define MADV_THP_COW        (MADV_THP_SETUP_BASE + (1 << MADV_THP_COW_BIT))
+#define MADV_THP_SETUP_END	(MADV_THP_SETUP_BASE + (1 << MADV_THP_SETUP_MAX_BIT))
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/mm/madvise.c b/mm/madvise.c
index 69708e953cf5..5dbfc89682d7 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1331,6 +1331,25 @@ static bool can_madvise_modify(struct madvise_behavior *madv_behavior)
 }
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static vm_flags_t madvise_thp_setup(struct madvise_behavior *madv_behavior)
+{
+	int thp_behavior = madv_behavior->behavior - MADV_THP_SETUP_BASE;
+	struct vm_area_struct *vma = madv_behavior->vma;
+	vm_flags_t new_flags = vma->vm_flags;
+
+	if (madv_thp_cow(thp_behavior))
+		new_flags |= VM_THP_COW;
+
+	return new_flags;
+}
+#else
+static vm_flags_t madvise_thp_setup(struct madvise_behavior *madv_behavior)
+{
+	return madv_behavior->vma->vm_flags;
+}
+#endif
+
 /*
  * Apply an madvise behavior to a region of a vma.  madvise_update_vma
  * will handle splitting a vm area into separate areas, each area with its own
@@ -1427,6 +1446,10 @@ static int madvise_vma_behavior(struct madvise_behavior *madv_behavior)
 		break;
 	}
 
+	/* Handle THP behaviors */
+	if (madv_thp_behavior(behavior))
+		new_flags = madvise_thp_setup(madv_behavior);
+
 	/* This is a write operation.*/
 	VM_WARN_ON_ONCE(madv_behavior->lock_mode != MADVISE_MMAP_WRITE_LOCK);
 
@@ -1555,6 +1578,8 @@ madvise_behavior_valid(int behavior)
 		return true;
 
 	default:
+		if (madv_thp_behavior(behavior))
+			return true;
 		return false;
 	}
 }

-- 
2.52.0


  reply	other threads:[~2026-05-01  5:56 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-01  5:55 [PATCH 0/5] mm: Support selecting doing direct COW for anonymous pmd entry Luka Bai
2026-05-01  5:55 ` Luka Bai [this message]
2026-05-01  5:55 ` [PATCH 2/5] mm: add pmd level THP COW parameter in sysfs Luka Bai
2026-05-01  5:55 ` [PATCH 3/5] mm: add pmd level THP COW judgement helpers Luka Bai
2026-05-01  5:55 ` [PATCH 4/5] mm: enable map_anon_folio_pmd_nopf to handle unshare Luka Bai
2026-05-01  5:55 ` [PATCH 5/5] mm: support choosing to do THP COW for anonymous pmd entry Luka Bai
2026-05-01  7:11   ` David Hildenbrand (Arm)
2026-05-01 15:01     ` Luka Bai
2026-05-01  7:07 ` [PATCH 0/5] mm: Support selecting doing direct " David Hildenbrand (Arm)
2026-05-01 16:16   ` Luka Bai
2026-05-01 18:30     ` David Hildenbrand (Arm)
2026-05-02  5:06       ` Luka Bai
2026-05-03  7:03 ` [syzbot ci] " syzbot ci

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260501-thp_cow-v1-1-005377483738@tencent.com \
    --to=lukafocus@icloud.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=baohua@kernel.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=corbet@lwn.net \
    --cc=david@kernel.org \
    --cc=dev.jain@arm.com \
    --cc=jannh@google.com \
    --cc=kasong@tencent.com \
    --cc=lance.yang@linux.dev \
    --cc=liam@infradead.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=lukabai@tencent.com \
    --cc=mhocko@suse.com \
    --cc=npache@redhat.com \
    --cc=rppt@kernel.org \
    --cc=ryan.roberts@arm.com \
    --cc=skhan@linuxfoundation.org \
    --cc=surenb@google.com \
    --cc=vbabka@kernel.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox