From: Jane Chu <jane.chu@oracle.com>
To: akpm@linux-foundation.org, david@kernel.org,
muchun.song@linux.dev, osalvador@suse.de
Cc: lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
vbabka@kernel.org, rppt@kernel.org, surenb@google.com,
mhocko@suse.com, corbet@lwn.net, skhan@linuxfoundation.org,
hughd@google.com, baolin.wang@linux.alibaba.com,
peterx@redhat.com, linux-mm@kvack.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 3/6] hugetlb: make hugetlb_fault_mutex_hash() take PAGE_SIZE index
Date: Thu, 9 Apr 2026 17:41:54 -0600 [thread overview]
Message-ID: <20260409234158.837786-4-jane.chu@oracle.com> (raw)
In-Reply-To: <20260409234158.837786-1-jane.chu@oracle.com>
hugetlb_fault_mutex_hash() is used to serialize faults and page cache
operations on the same hugetlb file offset. The helper currently expects
its index argument in hugetlb page granularity, so callers have to
open-code conversions from the PAGE_SIZE-based indices commonly used
in the rest of MM helpers.
Change hugetlb_fault_mutex_hash() to take a PAGE_SIZE-based index
instead, and perform the hugetlb-granularity conversion inside the helper.
Update all callers accordingly.
This makes the helper interface consistent with filemap_get_folio(),
and linear_page_index(), while preserving the same lock selection for
a given hugetlb file offset.
Signed-off-by: Jane Chu <jane.chu@oracle.com>
---
fs/hugetlbfs/inode.c | 19 ++++++++++---------
mm/hugetlb.c | 28 +++++++++++++++++++---------
mm/memfd.c | 11 ++++++-----
mm/userfaultfd.c | 7 +++----
4 files changed, 38 insertions(+), 27 deletions(-)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cf79fb830377..e24e9bf54e14 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -575,7 +575,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
struct address_space *mapping = &inode->i_data;
const pgoff_t end = lend >> PAGE_SHIFT;
struct folio_batch fbatch;
- pgoff_t next, index;
+ pgoff_t next, idx;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
@@ -586,15 +586,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
struct folio *folio = fbatch.folios[i];
u32 hash = 0;
- index = folio->index >> huge_page_order(h);
- hash = hugetlb_fault_mutex_hash(mapping, index);
+ hash = hugetlb_fault_mutex_hash(mapping, folio->index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/*
* Remove folio that was part of folio_batch.
*/
+ idx = folio->index >> huge_page_order(h);
remove_inode_single_folio(h, inode, mapping, folio,
- index, truncate_op);
+ idx, truncate_op);
freed++;
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -734,7 +734,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
struct mm_struct *mm = current->mm;
loff_t hpage_size = huge_page_size(h);
unsigned long hpage_shift = huge_page_shift(h);
- pgoff_t start, index, end;
+ pgoff_t start, end, idx, index;
int error;
u32 hash;
@@ -774,7 +774,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
vm_flags_init(&pseudo_vma, VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pseudo_vma.vm_file = file;
- for (index = start; index < end; index++) {
+ for (idx = start; idx < end; idx++) {
/*
* This is supposed to be the vaddr where the page is being
* faulted in, but we have no vaddr here.
@@ -794,14 +794,15 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
}
/* addr is the offset within the file (zero based) */
- addr = index * hpage_size;
+ addr = idx * hpage_size;
/* mutex taken here, fault path and hole punch */
+ index = idx << huge_page_order(h);
hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
- folio = filemap_get_folio(mapping, index << huge_page_order(h));
+ folio = filemap_get_folio(mapping, index);
if (!IS_ERR(folio)) {
folio_put(folio);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -824,7 +825,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
}
folio_zero_user(folio, addr);
__folio_mark_uptodate(folio);
- error = hugetlb_add_to_page_cache(folio, mapping, index);
+ error = hugetlb_add_to_page_cache(folio, mapping, idx);
if (unlikely(error)) {
restore_reserve_on_error(h, &pseudo_vma, addr, folio);
folio_put(folio);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38b39eaf46cc..9d5ae1f87850 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5515,7 +5515,7 @@ static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
*/
if (cow_from_owner) {
struct address_space *mapping = vma->vm_file->f_mapping;
- pgoff_t idx;
+ pgoff_t index;
u32 hash;
folio_put(old_folio);
@@ -5528,8 +5528,9 @@ static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
*
* Reacquire both after unmap operation.
*/
- idx = vma_hugecache_offset(h, vma, vmf->address);
- hash = hugetlb_fault_mutex_hash(mapping, idx);
+ index = linear_page_index(vma, vmf->address);
+ hash = hugetlb_fault_mutex_hash(mapping, index);
+
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -5664,6 +5665,10 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_fault *vmf,
unsigned long reason)
{
u32 hash;
+ pgoff_t index;
+
+ index = linear_page_index((const struct vm_area_struct *)vmf, vmf->address);
+ hash = hugetlb_fault_mutex_hash(mapping, index);
/*
* vma_lock and hugetlb_fault_mutex must be dropped before handling
@@ -5671,7 +5676,6 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_fault *vmf,
* userfault, any vma operation should be careful from here.
*/
hugetlb_vma_unlock_read(vmf->vma);
- hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
return handle_userfault(vmf, reason);
}
@@ -5696,7 +5700,8 @@ static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm, unsigned
static vm_fault_t hugetlb_no_page(struct address_space *mapping,
struct vm_fault *vmf)
{
- u32 hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff);
+ u32 hash;
+ pgoff_t index;
bool new_folio, new_anon_folio = false;
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
@@ -5707,6 +5712,8 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
unsigned long size;
pte_t new_pte;
+ index = vmf->pgoff << huge_page_order(h);
+ hash = hugetlb_fault_mutex_hash(mapping, index);
/*
* Currently, we are forced to kill the process in the event the
* original mapper has unmapped pages from the child due to a failed
@@ -5920,13 +5927,14 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
}
#ifdef CONFIG_SMP
-u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
+/* 'index' is expected to be in PAGE_SIZE granularity */
+u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t index)
{
unsigned long key[2];
u32 hash;
key[0] = (unsigned long) mapping;
- key[1] = idx;
+ key[1] = index >> huge_page_order(hstate_inode(mapping->host));
hash = jhash2((u32 *)&key, sizeof(key)/(sizeof(u32)), 0);
@@ -5937,7 +5945,7 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
* For uniprocessor systems we always use a single mutex, so just
* return 0 and avoid the hashing overhead.
*/
-u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
+u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t index)
{
return 0;
}
@@ -5952,6 +5960,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
struct hstate *h = hstate_vma(vma);
struct address_space *mapping;
bool need_wait_lock = false;
+ pgoff_t index;
struct vm_fault vmf = {
.vma = vma,
.address = address & huge_page_mask(h),
@@ -5972,8 +5981,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* get spurious allocation failures if two CPUs race to instantiate
* the same page in the page cache.
*/
+ index = linear_page_index(vma, vmf.address);
mapping = vma->vm_file->f_mapping;
- hash = hugetlb_fault_mutex_hash(mapping, vmf.pgoff);
+ hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/*
diff --git a/mm/memfd.c b/mm/memfd.c
index fb425f4e315f..911ff8220d05 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -64,7 +64,7 @@ static void memfd_tag_pins(struct xa_state *xas)
* (memfd_pin_folios()) cannot find a folio in the page cache at a given
* index in the mapping.
*/
-struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
+struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t index)
{
#ifdef CONFIG_HUGETLB_PAGE
struct folio *folio;
@@ -79,12 +79,13 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
*/
struct inode *inode = file_inode(memfd);
struct hstate *h = hstate_file(memfd);
- int err = -ENOMEM;
long nr_resv;
+ pgoff_t idx;
+ int err = -ENOMEM;
gfp_mask = htlb_alloc_mask(h);
gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
- idx >>= huge_page_order(h);
+ idx = index >> huge_page_order(h);
nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, EMPTY_VMA_FLAGS);
if (nr_resv < 0)
@@ -116,7 +117,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
* races with concurrent allocations, as required by all other
* callers of hugetlb_add_to_page_cache().
*/
- hash = hugetlb_fault_mutex_hash(memfd->f_mapping, idx);
+ hash = hugetlb_fault_mutex_hash(memfd->f_mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
err = hugetlb_add_to_page_cache(folio,
@@ -140,7 +141,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
return ERR_PTR(err);
}
#endif
- return shmem_read_folio(memfd->f_mapping, idx);
+ return shmem_read_folio(memfd->f_mapping, index);
}
/*
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index c053aa4389b6..9482b25d3d84 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -504,7 +504,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
long copied;
struct folio *folio;
unsigned long vma_hpagesize;
- pgoff_t idx;
+ pgoff_t index;
u32 hash;
struct address_space *mapping;
@@ -573,10 +573,9 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
* in the case of shared pmds. fault mutex prevents
* races with other faulting threads.
*/
- idx = linear_page_index(dst_vma, dst_addr);
- idx >>= huge_page_order(hstate_vma(dst_vma));
+ index = linear_page_index(dst_vma, dst_addr);
mapping = dst_vma->vm_file->f_mapping;
- hash = hugetlb_fault_mutex_hash(mapping, idx);
+ hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vma_lock_read(dst_vma);
--
2.43.5
next prev parent reply other threads:[~2026-04-09 23:42 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-09 23:41 [PATCH 0/6] hugetlb: normalize exported interfaces to use base-page indices Jane Chu
2026-04-09 23:41 ` [PATCH 1/6] hugetlb: open-code hugetlb folio lookup index conversion Jane Chu
2026-04-09 23:41 ` [PATCH 2/6] hugetlb: remove the hugetlb_linear_page_index() helper Jane Chu
2026-04-09 23:41 ` Jane Chu [this message]
2026-04-09 23:41 ` [PATCH 4/6] hugetlb: drop vma_hugecache_offset() in favor of linear_page_index() Jane Chu
2026-04-09 23:41 ` [PATCH 5/6] hugetlb: make hugetlb_add_to_page_cache() use PAGE_SIZE-based index Jane Chu
2026-04-09 23:41 ` [PATCH 6/6] hugetlb: pass hugetlb reservation ranges in base-page indices Jane Chu
2026-04-10 6:45 ` [syzbot ci] Re: hugetlb: normalize exported interfaces to use " syzbot ci
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260409234158.837786-4-jane.chu@oracle.com \
--to=jane.chu@oracle.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=corbet@lwn.net \
--cc=david@kernel.org \
--cc=hughd@google.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mhocko@suse.com \
--cc=muchun.song@linux.dev \
--cc=osalvador@suse.de \
--cc=peterx@redhat.com \
--cc=rppt@kernel.org \
--cc=skhan@linuxfoundation.org \
--cc=surenb@google.com \
--cc=vbabka@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox