Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Lance Yang <lance.yang@linux.dev>
To: luizcap@redhat.com, baolin.wang@linux.alibaba.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	david@kernel.org, ziy@nvidia.com, lance.yang@linux.dev,
	corbet@lwn.net, tsbogend@alpha.franken.de, maddy@linux.ibm.com,
	mpe@ellerman.id.au, agordeev@linux.ibm.com,
	gerald.schaefer@linux.ibm.com, hca@linux.ibm.com,
	gor@linux.ibm.com, x86@kernel.org, tglx@kernel.org,
	mingo@redhat.com, bp@alien8.de, ira.weiny@intel.com,
	hughd@google.com, dave.hansen@linux.intel.com, djbw@kernel.org,
	vishal.l.verma@intel.com, dave.jiang@intel.com,
	akpm@linux-foundation.org, yintirui@huawei.com, ljs@kernel.org
Subject: Re: [PATCH v5 14/14] mm: thp: always enable mTHP support
Date: Wed,  3 Jun 2026 14:47:18 +0800	[thread overview]
Message-ID: <20260603064718.81699-1-lance.yang@linux.dev> (raw)
In-Reply-To: <8781a9a0f115705ee11884ed3184b65a1ce39923.1780066530.git.luizcap@redhat.com>

Hi Luiz,

SHMEM_HUGE_FORCE still assumes PMD order in a few places.

Is that expected?

shmem_init() only sets the default inherit mask when PMD leaves are
available.

	if (!shmem_orders_configured && pgtable_has_pmd_leaves())
		huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER);

But shmem_parse_huge() rejects "force" unless the mask is exactly PMD
order.

	if (huge == SHMEM_HUGE_FORCE &&
	    huge_shmem_orders_inherit != BIT(HPAGE_PMD_ORDER))
		return -EINVAL;

Even if "force" is selected, shmem_huge_global_enabled() still return
only PMD order.

	if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE)
		return maybe_pmd_order;

and shmem_allowable_huge_orders() mask it out.

	if (!pgtable_has_pmd_leaves())
		disabled_orders = BIT(PMD_ORDER);

	if (!vma || !vma_is_anon_shmem(vma))
		return global_orders & ~disabled_orders;

For anon shmem, it can also return 0 for same reason.

	if (shmem_huge == SHMEM_HUGE_FORCE)
		return READ_ONCE(huge_shmem_orders_inherit) & ~disabled_orders;

Should SHMEM_HUGE_FORCE use the available mTHP orders below PMD when
pgtable_has_pmd_leaves() is false?

Cheers, Lance

On Fri, May 29, 2026 at 10:55:32AM -0400, Luiz Capitulino wrote:
>If PMD-sized pages are not supported on an architecture (ie. the
>arch implements arch_has_pmd_leaves() and it returns false) then the
>current code disables all THP, including mTHP.
>
>This commit fixes this by allowing mTHP to be always enabled for all
>archs. When PMD-sized pages are not supported, its sysfs entry won't be
>created and their mapping will be disallowed at page-fault time.
>
>Similarly, this commit implements the following changes for shmem in
>shmem_allowable_huge_orders():
>
> - Drop the pgtable_has_pmd_leaves() check so that mTHP sizes are
>   considered
> - Filter out PMD and PUD orders from allowable orders when
>   PMD-sized pages are not supported by the CPU
>
>Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
>---
> mm/huge_memory.c | 25 ++++++++++++++++++++-----
> mm/shmem.c       | 14 +++++++++-----
> 2 files changed, 29 insertions(+), 10 deletions(-)
>
>diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>index 32254febe097..059901a8c6cb 100644
>--- a/mm/huge_memory.c
>+++ b/mm/huge_memory.c
>@@ -126,6 +126,15 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
> 	else
> 		supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;
> 
>+	if (!pgtable_has_pmd_leaves()) {
>+		/*
>+		 * If the CPU does not support PMD leaves, assume for
>+		 * now that it does not support PUD leaves and disable
>+		 * both folio orders.
>+		 */
>+		supported_orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
>+	}
>+
> 	orders &= supported_orders;
> 	if (!orders)
> 		return 0;
>@@ -133,7 +142,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
> 	if (!vma->vm_mm)		/* vdso */
> 		return 0;
> 
>-	if (!pgtable_has_pmd_leaves() || vma_thp_disabled(vma, vm_flags, forced_collapse))
>+	if (vma_thp_disabled(vma, vm_flags, forced_collapse))
> 		return 0;
> 
> 	/* khugepaged doesn't collapse DAX vma, but page fault is fine. */
>@@ -848,7 +857,7 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
> 	 * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time
> 	 * constant so we have to do this here.
> 	 */
>-	if (!anon_orders_configured)
>+	if (!anon_orders_configured && pgtable_has_pmd_leaves())
> 		huge_anon_orders_inherit = BIT(PMD_ORDER);
> 
> 	*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
>@@ -870,6 +879,15 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
> 	}
> 
> 	orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT;
>+	if (!pgtable_has_pmd_leaves()) {
>+		/*
>+		 * If the CPU does not support PMD leaves, assume for
>+		 * now that it does not support PUD leaves and disable
>+		 * both folio orders.
>+		 */
>+		orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
>+	}
>+
> 	order = highest_order(orders);
> 	while (orders) {
> 		thpsize = thpsize_create(order, *hugepage_kobj);
>@@ -969,9 +987,6 @@ static int __init hugepage_init(void)
> 	int err;
> 	struct kobject *hugepage_kobj;
> 
>-	if (!pgtable_has_pmd_leaves())
>-		return -EINVAL;
>-
> 	/*
> 	 * hugepages can't be allocated by the buddy allocator
> 	 */
>diff --git a/mm/shmem.c b/mm/shmem.c
>index 079e299ea789..c15dffd0eb41 100644
>--- a/mm/shmem.c
>+++ b/mm/shmem.c
>@@ -1844,16 +1844,19 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
> 	unsigned long mask = READ_ONCE(huge_shmem_orders_always);
> 	unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
> 	vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
>-	unsigned int global_orders;
>+	unsigned int global_orders, disabled_orders = 0;
> 
>-	if (!pgtable_has_pmd_leaves() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
>+	if (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))
> 		return 0;
> 
>+	if (!pgtable_has_pmd_leaves())
>+		disabled_orders = BIT(PMD_ORDER);
>+
> 	global_orders = shmem_huge_global_enabled(inode, index, write_end,
> 						  shmem_huge_force, vma, vm_flags);
> 	/* Tmpfs huge pages allocation */
> 	if (!vma || !vma_is_anon_shmem(vma))
>-		return global_orders;
>+		return global_orders & ~disabled_orders;
> 
> 	/*
> 	 * Following the 'deny' semantics of the top level, force the huge
>@@ -1867,7 +1870,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
> 	 * means non-PMD sized THP can not override 'huge' mount option now.
> 	 */
> 	if (shmem_huge == SHMEM_HUGE_FORCE)
>-		return READ_ONCE(huge_shmem_orders_inherit);
>+		return READ_ONCE(huge_shmem_orders_inherit) & ~disabled_orders;
> 
> 	/* Allow mTHP that will be fully within i_size. */
> 	mask |= shmem_get_orders_within_size(inode, within_size_orders, index, 0);
>@@ -1878,6 +1881,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
> 	if (global_orders > 0)
> 		mask |= READ_ONCE(huge_shmem_orders_inherit);
> 
>+	mask &= ~disabled_orders;
> 	return THP_ORDERS_ALL_FILE_DEFAULT & mask;
> }
> 
>@@ -5461,7 +5465,7 @@ void __init shmem_init(void)
> 	 * Default to setting PMD-sized THP to inherit the global setting and
> 	 * disable all other multi-size THPs.
> 	 */
>-	if (!shmem_orders_configured)
>+	if (!shmem_orders_configured && pgtable_has_pmd_leaves())
> 		huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER);
> #endif
> 	return;
>-- 
>2.54.0
>
>


  parent reply	other threads:[~2026-06-03  6:47 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-29 14:55 [PATCH v5 00/14] mm: thp: always enable mTHP support Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 01/14] docs: tmpfs: remove implementation detail reference Luiz Capitulino
2026-06-02 10:16   ` Dev Jain
2026-05-29 14:55 ` [PATCH v5 02/14] mm: shmem: shmem_getattr(): set blksize to highest supported THP order Luiz Capitulino
2026-06-02 15:13   ` Zi Yan
2026-05-29 14:55 ` [PATCH v5 03/14] mm: introduce pgtable_has_pmd_leaves() Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 04/14] drivers: dax: use pgtable_has_pmd_leaves() Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 05/14] drivers: nvdimm: " Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 06/14] mm: debug_vm_pgtable: " Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 07/14] mm: shmem: allow THP support determination at folio allocation time Luiz Capitulino
2026-06-01 12:09   ` Baolin Wang
2026-06-01 12:20     ` Luiz Capitulino
2026-06-02  2:55       ` Baolin Wang
2026-05-29 14:55 ` [PATCH v5 08/14] s390: move has_transparent_hugepage() out of THP guard Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 09/14] powerpc: " Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 10/14] mips: " Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 11/14] x86: " Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 12/14] treewide: introduce arch_has_pmd_leaves() Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 13/14] mm: replace thp_disabled_by_hw() with pgtable_has_pmd_leaves() Luiz Capitulino
2026-05-29 14:55 ` [PATCH v5 14/14] mm: thp: always enable mTHP support Luiz Capitulino
2026-06-01 12:02   ` Baolin Wang
2026-06-02  3:02     ` Baolin Wang
2026-06-03  6:47   ` Lance Yang [this message]
2026-06-03  7:23     ` Baolin Wang
2026-06-03  7:40       ` Lance Yang
2026-06-03  8:12   ` Lance Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260603064718.81699-1-lance.yang@linux.dev \
    --to=lance.yang@linux.dev \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=dave.jiang@intel.com \
    --cc=david@kernel.org \
    --cc=djbw@kernel.org \
    --cc=gerald.schaefer@linux.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=hughd@google.com \
    --cc=ira.weiny@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=luizcap@redhat.com \
    --cc=maddy@linux.ibm.com \
    --cc=mingo@redhat.com \
    --cc=mpe@ellerman.id.au \
    --cc=tglx@kernel.org \
    --cc=tsbogend@alpha.franken.de \
    --cc=vishal.l.verma@intel.com \
    --cc=x86@kernel.org \
    --cc=yintirui@huawei.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox