(sashiko review) Re: [PATCH v4 9/9] mm: thp: always enable mTHP support

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Luiz Capitulino <luizcap@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	david@kernel.org, baolin.wang@linux.alibaba.com, ziy@nvidia.com,
	lance.yang@linux.dev
Cc: corbet@lwn.net, tsbogend@alpha.franken.de, maddy@linux.ibm.com,
	mpe@ellerman.id.au, agordeev@linux.ibm.com,
	gerald.schaefer@linux.ibm.com, hca@linux.ibm.com,
	gor@linux.ibm.com, x86@kernel.org, dave.hansen@linux.intel.com,
	djbw@kernel.org, vishal.l.verma@intel.com, dave.jiang@intel.com,
	akpm@linux-foundation.org, lorenzo.stoakes@oracle.com
Subject: (sashiko review) Re: [PATCH v4 9/9] mm: thp: always enable mTHP support
Date: Wed, 6 May 2026 14:34:46 -0400	[thread overview]
Message-ID: <a3acf91d-3207-4a32-a115-e0f037024095@redhat.com> (raw)
In-Reply-To: <f67da00a825da9097b5faf2f390ad344450b88be.1777663129.git.luizcap@redhat.com>

On 2026-05-01 15:18, Luiz Capitulino wrote:
> If PMD-sized pages are not supported on an architecture (ie. the
> arch implements arch_has_pmd_leaves() and it returns false) then the
> current code disables all THP, including mTHP.
> 
> This commit fixes this by allowing mTHP to be always enabled for all
> archs. When PMD-sized pages are not supported, its sysfs entry won't be
> created and their mapping will be disallowed at page-fault time.
> 
> Similarly, this commit implements the following changes for shmem in
> shmem_allowable_huge_orders():
> 
>   - Drop the pgtable_has_pmd_leaves() check so that mTHP sizes are
>     considered
>   - Filter out PMD and PUD orders from allowable orders when
>     PMD-sized pages are not supported by the CPU
> 
> Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
> ---
>   mm/huge_memory.c | 23 ++++++++++++++++++-----
>   mm/shmem.c       | 14 +++++++++-----
>   2 files changed, 27 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 32254febe097..c1765c8e3dc6 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -126,6 +126,14 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
>   	else
>   		supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;
>   
> +	if (!pgtable_has_pmd_leaves()) {
> +		/*
> +		 * The CPU doesn't support PMD-sized pages, assume it
> +		 * doesn't support PUD-sized pages either.
> +		 */
> +		supported_orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
> +	}
> +
>   	orders &= supported_orders;
>   	if (!orders)
>   		return 0;
> @@ -133,7 +141,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
>   	if (!vma->vm_mm)		/* vdso */
>   		return 0;
>   
> -	if (!pgtable_has_pmd_leaves() || vma_thp_disabled(vma, vm_flags, forced_collapse))
> +	if (vma_thp_disabled(vma, vm_flags, forced_collapse))
>   		return 0;
>   
>   	/* khugepaged doesn't collapse DAX vma, but page fault is fine. */
> @@ -848,7 +856,7 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
>   	 * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time
>   	 * constant so we have to do this here.
>   	 */
> -	if (!anon_orders_configured)
> +	if (!anon_orders_configured && pgtable_has_pmd_leaves())
>   		huge_anon_orders_inherit = BIT(PMD_ORDER);
>   
>   	*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
> @@ -870,6 +878,14 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
>   	}
>   
>   	orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT;
> +	if (!pgtable_has_pmd_leaves()) {
> +		/*
> +		 * The CPU doesn't support PMD-sized pages, assume it
> +		 * doesn't support PUD-sized pages either.
> +		 */
> +		orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
> +	}
> +
>   	order = highest_order(orders);
>   	while (orders) {
>   		thpsize = thpsize_create(order, *hugepage_kobj);
> @@ -969,9 +985,6 @@ static int __init hugepage_init(void)
>   	int err;
>   	struct kobject *hugepage_kobj;
>   
> -	if (!pgtable_has_pmd_leaves())
> -		return -EINVAL;
> -
>   	/*
>   	 * hugepages can't be allocated by the buddy allocator
>   	 */
> diff --git a/mm/shmem.c b/mm/shmem.c
> index a48f034830cd..23893c2bc2dd 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1840,16 +1840,19 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
>   	unsigned long mask = READ_ONCE(huge_shmem_orders_always);
>   	unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
>   	vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
> -	unsigned int global_orders;
> +	unsigned int global_orders, filter_orders = 0;
>   
> -	if (!pgtable_has_pmd_leaves() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
> +	if (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))
>   		return 0;
>   
> +	if (!pgtable_has_pmd_leaves())
> +		filter_orders = BIT(PMD_ORDER) | BIT(PUD_ORDER);
> +
>   	global_orders = shmem_huge_global_enabled(inode, index, write_end,
>   						  shmem_huge_force, vma, vm_flags);
>   	/* Tmpfs huge pages allocation */
>   	if (!vma || !vma_is_anon_shmem(vma))
> -		return global_orders;
> +		return global_orders & ~filter_orders;

"""
Could this lead to unintended truncation of the order masks?

Because filter_orders is declared as an unsigned int, assigning the result
of BIT() causes an implicit downcast from unsigned long.
"""

If I'm checking this right, the maximum bit number we can get to is 22
on arm64. So, we're safe. That being said we should be consistent with
the return type for both filter_order and global_orders. I'll change it.

NOTE: I'm skipping the other comment from Sashiko which is about the
same issue.

>   
>   	/*
>   	 * Following the 'deny' semantics of the top level, force the huge
> @@ -1863,7 +1866,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
>   	 * means non-PMD sized THP can not override 'huge' mount option now.
>   	 */
>   	if (shmem_huge == SHMEM_HUGE_FORCE)
> -		return READ_ONCE(huge_shmem_orders_inherit);
> +		return READ_ONCE(huge_shmem_orders_inherit) & ~filter_orders;
>   
>   	/* Allow mTHP that will be fully within i_size. */
>   	mask |= shmem_get_orders_within_size(inode, within_size_orders, index, 0);
> @@ -1874,6 +1877,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
>   	if (global_orders > 0)
>   		mask |= READ_ONCE(huge_shmem_orders_inherit);
>   
> +	mask &= ~filter_orders;
>   	return THP_ORDERS_ALL_FILE_DEFAULT & mask;
>   }
>   
> @@ -5457,7 +5461,7 @@ void __init shmem_init(void)
>   	 * Default to setting PMD-sized THP to inherit the global setting and
>   	 * disable all other multi-size THPs.
>   	 */
> -	if (!shmem_orders_configured)
> +	if (!shmem_orders_configured && pgtable_has_pmd_leaves())
>   		huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER);
>   #endif
>   	return;

next prev parent reply	other threads:[~2026-05-06 18:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-01 19:18 [PATCH v4 0/9] mm: thp: always enable mTHP support Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 1/9] docs: tmpfs: remove implementation detail reference Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 2/9] mm: introduce pgtable_has_pmd_leaves() Luiz Capitulino
2026-05-06 17:50   ` (sashiko review) " Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 3/9] drivers: dax: use pgtable_has_pmd_leaves() Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 4/9] drivers: nvdimm: " Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 5/9] mm: debug_vm_pgtable: " Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 6/9] mm: shmem: drop has_transparent_hugepage() usage Luiz Capitulino
2026-05-06 18:12   ` (sashiko review) " Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 7/9] treewide: introduce arch_has_pmd_leaves() Luiz Capitulino
2026-05-06 18:22   ` (sashiko review) " Luiz Capitulino
2026-05-06 18:30     ` Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 8/9] mm: replace thp_disabled_by_hw() with pgtable_has_pmd_leaves() Luiz Capitulino
2026-05-01 19:18 ` [PATCH v4 9/9] mm: thp: always enable mTHP support Luiz Capitulino
2026-05-06  5:46   ` Baolin Wang
2026-05-06 18:34   ` Luiz Capitulino [this message]
2026-05-03 15:02 ` [PATCH v4 0/9] " Andrew Morton
2026-05-04 19:11   ` Luiz Capitulino

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a3acf91d-3207-4a32-a115-e0f037024095@redhat.com \
    --to=luizcap@redhat.com \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=dave.jiang@intel.com \
    --cc=david@kernel.org \
    --cc=djbw@kernel.org \
    --cc=gerald.schaefer@linux.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=lance.yang@linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=maddy@linux.ibm.com \
    --cc=mpe@ellerman.id.au \
    --cc=tsbogend@alpha.franken.de \
    --cc=vishal.l.verma@intel.com \
    --cc=x86@kernel.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox