[PATCH 1/2] mm/memory: Do not populate page table entries beyond i

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size.
@ 2025-10-21  6:35 Kiryl Shutsemau
  2025-10-21  6:35 ` [PATCH 2/2] mm/truncate: Unmap large folio on split failure Kiryl Shutsemau
  2025-10-21 12:08 ` [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size David Hildenbrand
  0 siblings, 2 replies; 12+ messages in thread
From: Kiryl Shutsemau @ 2025-10-21  6:35 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Hugh Dickins, Matthew Wilcox,
	Alexander Viro, Christian Brauner
  Cc: Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Rik van Riel, Harry Yoo,
	Johannes Weiner, Shakeel Butt, Baolin Wang, Darrick J. Wong,
	linux-mm, linux-fsdevel, linux-kernel, Kiryl Shutsemau

From: Kiryl Shutsemau <kas@kernel.org>

Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.

Recent changes attempted to fault in full folio where possible. They did
not respect i_size, which led to populating PTEs beyond i_size and
breaking SIGBUS semantics.

Darrick reported generic/749 breakage because of this.

However, the problem existed before the recent changes. With huge=always
tmpfs, any write to a file leads to PMD-size allocation. Following the
fault-in of the folio will install PMD mapping regardless of i_size.

Fix filemap_map_pages() and finish_fault() to not install:
  - PTEs beyond i_size;
  - PMD mappings across i_size;

Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Fixes: 19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
Fixes: 357b92761d94 ("mm/filemap: map entire large folio faultaround")
Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
Reported-by: "Darrick J. Wong" <djwong@kernel.org>
---
 mm/filemap.c | 18 ++++++++++--------
 mm/memory.c  | 12 ++++++++++--
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 13f0259d993c..0d251f6ab480 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3681,7 +3681,8 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
 static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 			struct folio *folio, unsigned long start,
 			unsigned long addr, unsigned int nr_pages,
-			unsigned long *rss, unsigned short *mmap_miss)
+			unsigned long *rss, unsigned short *mmap_miss,
+			pgoff_t file_end)
 {
 	unsigned int ref_from_caller = 1;
 	vm_fault_t ret = 0;
@@ -3697,7 +3698,8 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 	 */
 	addr0 = addr - start * PAGE_SIZE;
 	if (folio_within_vma(folio, vmf->vma) &&
-	    (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {
+	    (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK) &&
+	    file_end >= folio_next_index(folio)) {
 		vmf->pte -= start;
 		page -= start;
 		addr = addr0;
@@ -3817,7 +3819,11 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 	if (!folio)
 		goto out;
 
-	if (filemap_map_pmd(vmf, folio, start_pgoff)) {
+	file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+	end_pgoff = min(end_pgoff, file_end);
+
+	if (file_end >= folio_next_index(folio) &&
+	    filemap_map_pmd(vmf, folio, start_pgoff)) {
 		ret = VM_FAULT_NOPAGE;
 		goto out;
 	}
@@ -3830,10 +3836,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		goto out;
 	}
 
-	file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
-	if (end_pgoff > file_end)
-		end_pgoff = file_end;
-
 	folio_type = mm_counter_file(folio);
 	do {
 		unsigned long end;
@@ -3850,7 +3852,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		else
 			ret |= filemap_map_folio_range(vmf, folio,
 					xas.xa_index - folio->index, addr,
-					nr_pages, &rss, &mmap_miss);
+					nr_pages, &rss, &mmap_miss, file_end);
 
 		folio_unlock(folio);
 	} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
diff --git a/mm/memory.c b/mm/memory.c
index 74b45e258323..dfa5b437c9d9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5480,6 +5480,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
 	int type, nr_pages;
 	unsigned long addr;
 	bool needs_fallback = false;
+	pgoff_t file_end = -1UL;
 
 fallback:
 	addr = vmf->address;
@@ -5501,8 +5502,14 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
 			return ret;
 	}
 
+	if (vma->vm_file) {
+		struct inode *inode = vma->vm_file->f_mapping->host;
+		file_end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+	}
+
 	if (pmd_none(*vmf->pmd)) {
-		if (folio_test_pmd_mappable(folio)) {
+		if (folio_test_pmd_mappable(folio) &&
+		    file_end >= folio_next_index(folio)) {
 			ret = do_set_pmd(vmf, folio, page);
 			if (ret != VM_FAULT_FALLBACK)
 				return ret;
@@ -5533,7 +5540,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
 		if (unlikely(vma_off < idx ||
 			    vma_off + (nr_pages - idx) > vma_pages(vma) ||
 			    pte_off < idx ||
-			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE)) {
+			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE ||
+			    file_end < folio_next_index(folio))) {
 			nr_pages = 1;
 		} else {
 			/* Now we can set mappings for the whole large folio. */
-- 
2.50.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] mm/truncate: Unmap large folio on split failure
  2025-10-21  6:35 [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size Kiryl Shutsemau
@ 2025-10-21  6:35 ` Kiryl Shutsemau
  2025-10-21  9:44   ` David Hildenbrand
  2025-10-21 12:33   ` David Hildenbrand
  2025-10-21 12:08 ` [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size David Hildenbrand
  1 sibling, 2 replies; 12+ messages in thread
From: Kiryl Shutsemau @ 2025-10-21  6:35 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Hugh Dickins, Matthew Wilcox,
	Alexander Viro, Christian Brauner
  Cc: Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Rik van Riel, Harry Yoo,
	Johannes Weiner, Shakeel Butt, Baolin Wang, Darrick J. Wong,
	linux-mm, linux-fsdevel, linux-kernel, Kiryl Shutsemau

From: Kiryl Shutsemau <kas@kernel.org>

Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.

This behavior might not be respected on truncation.

During truncation, the kernel splits a large folio in order to reclaim
memory. As a side effect, it unmaps the folio and destroys PMD mappings
of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
are preserved.

However, if the split fails, PMD mappings are preserved and the user
will not receive SIGBUS on any accesses within the PMD.

Unmap the folio on split failure. It will lead to refault as PTEs and
preserve SIGBUS semantics.

Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
---
 mm/truncate.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 91eb92a5ce4f..cdb698b5f7fa 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -177,6 +177,28 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
 	return 0;
 }
 
+static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at)
+{
+	enum ttu_flags ttu_flags =
+		TTU_RMAP_LOCKED |
+		TTU_SYNC |
+		TTU_BATCH_FLUSH |
+		TTU_SPLIT_HUGE_PMD |
+		TTU_IGNORE_MLOCK;
+	int ret;
+
+	ret = try_folio_split(folio, split_at, NULL);
+
+	/*
+	 * If the split fails, unmap the folio, so it will be refaulted
+	 * with PTEs to respect SIGBUS semantics.
+	 */
+	if (ret)
+		try_to_unmap(folio, ttu_flags);
+
+	return ret;
+}
+
 /*
  * Handle partial folios.  The folio may be entirely within the
  * range if a split has raced with us.  If not, we zero the part of the
@@ -224,7 +246,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
 		return true;
 
 	split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
-	if (!try_folio_split(folio, split_at, NULL)) {
+	if (!try_folio_split_or_unmap(folio, split_at)) {
 		/*
 		 * try to split at offset + length to make sure folios within
 		 * the range can be dropped, especially to avoid memory waste
@@ -249,12 +271,13 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
 			goto out;
 
 		/*
+		 * Split the folio.
+		 *
 		 * make sure folio2 is large and does not change its mapping.
-		 * Its split result does not matter here.
 		 */
 		if (folio_test_large(folio2) &&
 		    folio2->mapping == folio->mapping)
-			try_folio_split(folio2, split_at2, NULL);
+			try_folio_split_or_unmap(folio2, split_at2);
 
 		folio_unlock(folio2);
 out:
-- 
2.50.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm/truncate: Unmap large folio on split failure
  2025-10-21  6:35 ` [PATCH 2/2] mm/truncate: Unmap large folio on split failure Kiryl Shutsemau
@ 2025-10-21  9:44   ` David Hildenbrand
  2025-10-21  9:47     ` David Hildenbrand
  2025-10-21 12:33   ` David Hildenbrand
  1 sibling, 1 reply; 12+ messages in thread
From: David Hildenbrand @ 2025-10-21  9:44 UTC (permalink / raw)
  To: Kiryl Shutsemau, Andrew Morton, Hugh Dickins, Matthew Wilcox,
	Alexander Viro, Christian Brauner
  Cc: Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Rik van Riel, Harry Yoo,
	Johannes Weiner, Shakeel Butt, Baolin Wang, Darrick J. Wong,
	linux-mm, linux-fsdevel, linux-kernel, Kiryl Shutsemau

On 21.10.25 08:35, Kiryl Shutsemau wrote:
> From: Kiryl Shutsemau <kas@kernel.org>
> 
> Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
> supposed to generate SIGBUS.
> 
> This behavior might not be respected on truncation.
> 
> During truncation, the kernel splits a large folio in order to reclaim
> memory. As a side effect, it unmaps the folio and destroys PMD mappings
> of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
> are preserved.
> 
> However, if the split fails, PMD mappings are preserved and the user
> will not receive SIGBUS on any accesses within the PMD.
> 
> Unmap the folio on split failure. It will lead to refault as PTEs and
> preserve SIGBUS semantics.

Was the discussion on the old patch set already done? I can spot that 
you send this series 20min after asking Christoph a question in reply to 
pushback.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm/truncate: Unmap large folio on split failure
  2025-10-21  9:44   ` David Hildenbrand
@ 2025-10-21  9:47     ` David Hildenbrand
  2025-10-21 11:31       ` Kiryl Shutsemau
  0 siblings, 1 reply; 12+ messages in thread
From: David Hildenbrand @ 2025-10-21  9:47 UTC (permalink / raw)
  To: Kiryl Shutsemau, Andrew Morton, Hugh Dickins, Matthew Wilcox,
	Alexander Viro, Christian Brauner
  Cc: Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Rik van Riel, Harry Yoo,
	Johannes Weiner, Shakeel Butt, Baolin Wang, Darrick J. Wong,
	linux-mm, linux-fsdevel, linux-kernel, Kiryl Shutsemau

On 21.10.25 11:44, David Hildenbrand wrote:
> On 21.10.25 08:35, Kiryl Shutsemau wrote:
>> From: Kiryl Shutsemau <kas@kernel.org>
>>
>> Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
>> supposed to generate SIGBUS.
>>
>> This behavior might not be respected on truncation.
>>
>> During truncation, the kernel splits a large folio in order to reclaim
>> memory. As a side effect, it unmaps the folio and destroys PMD mappings
>> of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
>> are preserved.
>>
>> However, if the split fails, PMD mappings are preserved and the user
>> will not receive SIGBUS on any accesses within the PMD.
>>
>> Unmap the folio on split failure. It will lead to refault as PTEs and
>> preserve SIGBUS semantics.
> 
> Was the discussion on the old patch set already done? I can spot that
> you send this series 20min after asking Christoph

^ Dave

Also, please send a proper patch series including cover letter that 
describes the changes since the last RFC.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm/truncate: Unmap large folio on split failure
  2025-10-21  9:47     ` David Hildenbrand
@ 2025-10-21 11:31       ` Kiryl Shutsemau
  2025-10-21 11:54         ` David Hildenbrand
  0 siblings, 1 reply; 12+ messages in thread
From: Kiryl Shutsemau @ 2025-10-21 11:31 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Matthew Wilcox, Alexander Viro,
	Christian Brauner, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Johannes Weiner, Shakeel Butt,
	Baolin Wang, Darrick J. Wong, linux-mm, linux-fsdevel,
	linux-kernel

On Tue, Oct 21, 2025 at 11:47:11AM +0200, David Hildenbrand wrote:
> On 21.10.25 11:44, David Hildenbrand wrote:
> > On 21.10.25 08:35, Kiryl Shutsemau wrote:
> > > From: Kiryl Shutsemau <kas@kernel.org>
> > > 
> > > Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
> > > supposed to generate SIGBUS.
> > > 
> > > This behavior might not be respected on truncation.
> > > 
> > > During truncation, the kernel splits a large folio in order to reclaim
> > > memory. As a side effect, it unmaps the folio and destroys PMD mappings
> > > of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
> > > are preserved.
> > > 
> > > However, if the split fails, PMD mappings are preserved and the user
> > > will not receive SIGBUS on any accesses within the PMD.
> > > 
> > > Unmap the folio on split failure. It will lead to refault as PTEs and
> > > preserve SIGBUS semantics.
> > 
> > Was the discussion on the old patch set already done? I can spot that
> > you send this series 20min after asking Dave

Based on feedback from Dave and Christoph on this patchset as well as
comments form Matthew and Darrick ont the report thread I see that my
idea to relax SIGBUS semantics for large folios will not fly :/

But if you want to weigh in...

> Also, please send a proper patch series including cover letter that
> describes the changes since the last RFC.

There is no change besides Signed-off-bys.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm/truncate: Unmap large folio on split failure
  2025-10-21 11:31       ` Kiryl Shutsemau
@ 2025-10-21 11:54         ` David Hildenbrand
  2025-10-21 12:25           ` Kiryl Shutsemau
  0 siblings, 1 reply; 12+ messages in thread
From: David Hildenbrand @ 2025-10-21 11:54 UTC (permalink / raw)
  To: Kiryl Shutsemau
  Cc: Andrew Morton, Hugh Dickins, Matthew Wilcox, Alexander Viro,
	Christian Brauner, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Johannes Weiner, Shakeel Butt,
	Baolin Wang, Darrick J. Wong, linux-mm, linux-fsdevel,
	linux-kernel

On 21.10.25 13:31, Kiryl Shutsemau wrote:
> On Tue, Oct 21, 2025 at 11:47:11AM +0200, David Hildenbrand wrote:
>> On 21.10.25 11:44, David Hildenbrand wrote:
>>> On 21.10.25 08:35, Kiryl Shutsemau wrote:
>>>> From: Kiryl Shutsemau <kas@kernel.org>
>>>>
>>>> Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
>>>> supposed to generate SIGBUS.
>>>>
>>>> This behavior might not be respected on truncation.
>>>>
>>>> During truncation, the kernel splits a large folio in order to reclaim
>>>> memory. As a side effect, it unmaps the folio and destroys PMD mappings
>>>> of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
>>>> are preserved.
>>>>
>>>> However, if the split fails, PMD mappings are preserved and the user
>>>> will not receive SIGBUS on any accesses within the PMD.
>>>>
>>>> Unmap the folio on split failure. It will lead to refault as PTEs and
>>>> preserve SIGBUS semantics.
>>>
>>> Was the discussion on the old patch set already done? I can spot that
>>> you send this series 20min after asking Dave
> 
> Based on feedback from Dave and Christoph on this patchset as well as
> comments form Matthew and Darrick ont the report thread I see that my
> idea to relax SIGBUS semantics for large folios will not fly :/

Then I was probably misreading the last email from you, likely the 
question you raised was independent of the progress of this series and 
more of general nature I assume.

> 
> But if you want to weigh in...

No, I think this makes sense. It's a regression that should be fixed.

> 
>> Also, please send a proper patch series including cover letter that
>> describes the changes since the last RFC.
> 
> There is no change besides Signed-off-bys.

Then point that out, please. It's common practice in MM to send cover 
letters for each new revision.

For example, Andrew will usually incorporate the cover letter into patch 
#1 when merging.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm/truncate: Unmap large folio on split failure
  2025-10-21 11:54         ` David Hildenbrand
@ 2025-10-21 12:25           ` Kiryl Shutsemau
  0 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau @ 2025-10-21 12:25 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Matthew Wilcox, Alexander Viro,
	Christian Brauner, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Johannes Weiner, Shakeel Butt,
	Baolin Wang, Darrick J. Wong, linux-mm, linux-fsdevel,
	linux-kernel

On Tue, Oct 21, 2025 at 01:54:51PM +0200, David Hildenbrand wrote:
> On 21.10.25 13:31, Kiryl Shutsemau wrote:
> > On Tue, Oct 21, 2025 at 11:47:11AM +0200, David Hildenbrand wrote:
> > > On 21.10.25 11:44, David Hildenbrand wrote:
> > > > On 21.10.25 08:35, Kiryl Shutsemau wrote:
> > > > > From: Kiryl Shutsemau <kas@kernel.org>
> > > > > 
> > > > > Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
> > > > > supposed to generate SIGBUS.
> > > > > 
> > > > > This behavior might not be respected on truncation.
> > > > > 
> > > > > During truncation, the kernel splits a large folio in order to reclaim
> > > > > memory. As a side effect, it unmaps the folio and destroys PMD mappings
> > > > > of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
> > > > > are preserved.
> > > > > 
> > > > > However, if the split fails, PMD mappings are preserved and the user
> > > > > will not receive SIGBUS on any accesses within the PMD.
> > > > > 
> > > > > Unmap the folio on split failure. It will lead to refault as PTEs and
> > > > > preserve SIGBUS semantics.
> > > > 
> > > > Was the discussion on the old patch set already done? I can spot that
> > > > you send this series 20min after asking Dave
> > 
> > Based on feedback from Dave and Christoph on this patchset as well as
> > comments form Matthew and Darrick ont the report thread I see that my
> > idea to relax SIGBUS semantics for large folios will not fly :/
> 
> Then I was probably misreading the last email from you, likely the question
> you raised was independent of the progress of this series and more of
> general nature I assume.

Right.

> > 
> > But if you want to weigh in...
> 
> No, I think this makes sense. It's a regression that should be fixed.
> 
> > 
> > > Also, please send a proper patch series including cover letter that
> > > describes the changes since the last RFC.
> > 
> > There is no change besides Signed-off-bys.
> 
> Then point that out, please. It's common practice in MM to send cover
> letters for each new revision.
> 
> For example, Andrew will usually incorporate the cover letter into patch #1
> when merging.

Okay, will do.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm/truncate: Unmap large folio on split failure
  2025-10-21  6:35 ` [PATCH 2/2] mm/truncate: Unmap large folio on split failure Kiryl Shutsemau
  2025-10-21  9:44   ` David Hildenbrand
@ 2025-10-21 12:33   ` David Hildenbrand
  2025-10-21 12:58     ` Kiryl Shutsemau
  1 sibling, 1 reply; 12+ messages in thread
From: David Hildenbrand @ 2025-10-21 12:33 UTC (permalink / raw)
  To: Kiryl Shutsemau, Andrew Morton, Hugh Dickins, Matthew Wilcox,
	Alexander Viro, Christian Brauner
  Cc: Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Rik van Riel, Harry Yoo,
	Johannes Weiner, Shakeel Butt, Baolin Wang, Darrick J. Wong,
	linux-mm, linux-fsdevel, linux-kernel, Kiryl Shutsemau

On 21.10.25 08:35, Kiryl Shutsemau wrote:
> From: Kiryl Shutsemau <kas@kernel.org>
> 
> Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
> supposed to generate SIGBUS.
> 
> This behavior might not be respected on truncation.
> 
> During truncation, the kernel splits a large folio in order to reclaim
> memory. As a side effect, it unmaps the folio and destroys PMD mappings
> of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
> are preserved.
> 
> However, if the split fails, PMD mappings are preserved and the user
> will not receive SIGBUS on any accesses within the PMD.
> 
> Unmap the folio on split failure. It will lead to refault as PTEs and
> preserve SIGBUS semantics.
> 
> Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
> ---
>   mm/truncate.c | 29 ++++++++++++++++++++++++++---
>   1 file changed, 26 insertions(+), 3 deletions(-)
> 
> diff --git a/mm/truncate.c b/mm/truncate.c
> index 91eb92a5ce4f..cdb698b5f7fa 100644
> --- a/mm/truncate.c
> +++ b/mm/truncate.c
> @@ -177,6 +177,28 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
>   	return 0;
>   }
>   
> +static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at)
> +{
> +	enum ttu_flags ttu_flags =
> +		TTU_RMAP_LOCKED |
> +		TTU_SYNC |
> +		TTU_BATCH_FLUSH |

I recall that this flag interacts with try_to_unmap_flush() / 
try_to_unmap_flush_dirty().

See unmap_folio() as one example.

If so, aren't we missing such a call or is the flush implied already 
somehow?

> +		TTU_SPLIT_HUGE_PMD |
> +		TTU_IGNORE_MLOCK;
> +	int ret;
> +
> +	ret = try_folio_split(folio, split_at, NULL);
> +
> +	/*
> +	 * If the split fails, unmap the folio, so it will be refaulted
> +	 * with PTEs to respect SIGBUS semantics.
> +	 */
> +	if (ret)
> +		try_to_unmap(folio, ttu_flags);

Just wondering: do we want to check whether the folio is now actually 
completely unmapped through !folio_mapped() and try to handle if it 
isn't (maybe just warn? Don't know)

We usually check after try_to_unmap() whether we actually found all 
mappings (see unmap_poisoned_folio()). I recall some corner cases where 
unmapping could fail, but I don't remember whether that's specific to 
anonymous pages only.

> +
> +	return ret;
> +}
> +
>   /*
>    * Handle partial folios.  The folio may be entirely within the
>    * range if a split has raced with us.  If not, we zero the part of the
> @@ -224,7 +246,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
>   		return true;
>   
>   	split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
> -	if (!try_folio_split(folio, split_at, NULL)) {
> +	if (!try_folio_split_or_unmap(folio, split_at)) {
>   		/*
>   		 * try to split at offset + length to make sure folios within
>   		 * the range can be dropped, especially to avoid memory waste
> @@ -249,12 +271,13 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
>   			goto out;
>   
>   		/*
> +		 * Split the folio.

I'd drop that. It's not particularly helpful given that we call 
try_folio_split_or_unmap() and mention further above "try to split at 
offset".

Nothing else jumped at me!

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm/truncate: Unmap large folio on split failure
  2025-10-21 12:33   ` David Hildenbrand
@ 2025-10-21 12:58     ` Kiryl Shutsemau
  0 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau @ 2025-10-21 12:58 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Matthew Wilcox, Alexander Viro,
	Christian Brauner, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Johannes Weiner, Shakeel Butt,
	Baolin Wang, Darrick J. Wong, linux-mm, linux-fsdevel,
	linux-kernel

On Tue, Oct 21, 2025 at 02:33:39PM +0200, David Hildenbrand wrote:
> On 21.10.25 08:35, Kiryl Shutsemau wrote:
> > From: Kiryl Shutsemau <kas@kernel.org>
> > 
> > Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
> > supposed to generate SIGBUS.
> > 
> > This behavior might not be respected on truncation.
> > 
> > During truncation, the kernel splits a large folio in order to reclaim
> > memory. As a side effect, it unmaps the folio and destroys PMD mappings
> > of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
> > are preserved.
> > 
> > However, if the split fails, PMD mappings are preserved and the user
> > will not receive SIGBUS on any accesses within the PMD.
> > 
> > Unmap the folio on split failure. It will lead to refault as PTEs and
> > preserve SIGBUS semantics.
> > 
> > Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
> > ---
> >   mm/truncate.c | 29 ++++++++++++++++++++++++++---
> >   1 file changed, 26 insertions(+), 3 deletions(-)
> > 
> > diff --git a/mm/truncate.c b/mm/truncate.c
> > index 91eb92a5ce4f..cdb698b5f7fa 100644
> > --- a/mm/truncate.c
> > +++ b/mm/truncate.c
> > @@ -177,6 +177,28 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
> >   	return 0;
> >   }
> > +static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at)
> > +{
> > +	enum ttu_flags ttu_flags =
> > +		TTU_RMAP_LOCKED |
> > +		TTU_SYNC |
> > +		TTU_BATCH_FLUSH |
> 
> I recall that this flag interacts with try_to_unmap_flush() /
> try_to_unmap_flush_dirty().
> 
> See unmap_folio() as one example.
> 
> If so, aren't we missing such a call or is the flush implied already
> somehow?

My bad. TTU_RMAP_LOCKED also should not be there.

Will fix.

> > +		TTU_SPLIT_HUGE_PMD |
> > +		TTU_IGNORE_MLOCK;
> > +	int ret;
> > +
> > +	ret = try_folio_split(folio, split_at, NULL);
> > +
> > +	/*
> > +	 * If the split fails, unmap the folio, so it will be refaulted
> > +	 * with PTEs to respect SIGBUS semantics.
> > +	 */
> > +	if (ret)
> > +		try_to_unmap(folio, ttu_flags);
> 
> Just wondering: do we want to check whether the folio is now actually
> completely unmapped through !folio_mapped() and try to handle if it isn't
> (maybe just warn? Don't know)
> 
> We usually check after try_to_unmap() whether we actually found all mappings
> (see unmap_poisoned_folio()). I recall some corner cases where unmapping
> could fail, but I don't remember whether that's specific to anonymous pages
> only.

I will add WARN_ON(folio_mapped(folio)).

> 
> > +
> > +	return ret;
> > +}
> > +
> >   /*
> >    * Handle partial folios.  The folio may be entirely within the
> >    * range if a split has raced with us.  If not, we zero the part of the
> > @@ -224,7 +246,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
> >   		return true;
> >   	split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
> > -	if (!try_folio_split(folio, split_at, NULL)) {
> > +	if (!try_folio_split_or_unmap(folio, split_at)) {
> >   		/*
> >   		 * try to split at offset + length to make sure folios within
> >   		 * the range can be dropped, especially to avoid memory waste
> > @@ -249,12 +271,13 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
> >   			goto out;
> >   		/*
> > +		 * Split the folio.
> 
> I'd drop that. It's not particularly helpful given that we call
> try_folio_split_or_unmap() and mention further above "try to split at
> offset".

Okay.

> Nothing else jumped at me!

Thanks for the review!

-- 
  Kiryl Shutsemau / Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size.
  2025-10-21  6:35 [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size Kiryl Shutsemau
  2025-10-21  6:35 ` [PATCH 2/2] mm/truncate: Unmap large folio on split failure Kiryl Shutsemau
@ 2025-10-21 12:08 ` David Hildenbrand
  2025-10-21 12:28   ` Kiryl Shutsemau
  1 sibling, 1 reply; 12+ messages in thread
From: David Hildenbrand @ 2025-10-21 12:08 UTC (permalink / raw)
  To: Kiryl Shutsemau, Andrew Morton, Hugh Dickins, Matthew Wilcox,
	Alexander Viro, Christian Brauner, Hugh Dickins
  Cc: Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Rik van Riel, Harry Yoo,
	Johannes Weiner, Shakeel Butt, Baolin Wang, Darrick J. Wong,
	linux-mm, linux-fsdevel, linux-kernel, Kiryl Shutsemau

On 21.10.25 08:35, Kiryl Shutsemau wrote:
> From: Kiryl Shutsemau <kas@kernel.org>

Subject: I'd drop the trailing "."

> 
> Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
> supposed to generate SIGBUS.
> 
> Recent changes attempted to fault in full folio where possible. They did
> not respect i_size, which led to populating PTEs beyond i_size and
> breaking SIGBUS semantics.
> 
> Darrick reported generic/749 breakage because of this.
> 
> However, the problem existed before the recent changes. With huge=always
> tmpfs, any write to a file leads to PMD-size allocation. Following the
> fault-in of the folio will install PMD mapping regardless of i_size.

Right, there are some legacy oddities with shmem in that area (e.g., 
"within_size" vs. "always" THP allocation control).

Let me CC Hugh: the behavior for shmem seems to date back to 2016.

> 
> Fix filemap_map_pages() and finish_fault() to not install:
>    - PTEs beyond i_size;
>    - PMD mappings across i_size;

Makes sense to me.


[...]

> +++ b/mm/memory.c
> @@ -5480,6 +5480,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
>   	int type, nr_pages;
>   	unsigned long addr;
>   	bool needs_fallback = false;
> +	pgoff_t file_end = -1UL;
>   
>   fallback:
>   	addr = vmf->address;
> @@ -5501,8 +5502,14 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
>   			return ret;
>   	}
>   
> +	if (vma->vm_file) {
> +		struct inode *inode = vma->vm_file->f_mapping->host;

empty line pleae

> +		file_end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
> +	}
> +
>   	if (pmd_none(*vmf->pmd)) {
> -		if (folio_test_pmd_mappable(folio)) {
> +		if (folio_test_pmd_mappable(folio) &&
> +		    file_end >= folio_next_index(folio)) {
>   			ret = do_set_pmd(vmf, folio, page);
>   			if (ret != VM_FAULT_FALLBACK)
>   				return ret;
> @@ -5533,7 +5540,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
>   		if (unlikely(vma_off < idx ||
>   			    vma_off + (nr_pages - idx) > vma_pages(vma) ||
>   			    pte_off < idx ||
> -			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE)) {
> +			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE ||

While at it you could fix the double space before the ">".

> +			    file_end < folio_next_index(folio))) {
>   			nr_pages = 1;
>   		} else {
>   			/* Now we can set mappings for the whole large folio. */

Nothing else jumped at me.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size.
  2025-10-21 12:08 ` [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size David Hildenbrand
@ 2025-10-21 12:28   ` Kiryl Shutsemau
  0 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau @ 2025-10-21 12:28 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Matthew Wilcox, Alexander Viro,
	Christian Brauner, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Johannes Weiner, Shakeel Butt,
	Baolin Wang, Darrick J. Wong, linux-mm, linux-fsdevel,
	linux-kernel

On Tue, Oct 21, 2025 at 02:08:44PM +0200, David Hildenbrand wrote:
> On 21.10.25 08:35, Kiryl Shutsemau wrote:
> > From: Kiryl Shutsemau <kas@kernel.org>
> 
> Subject: I'd drop the trailing "."

Ack.

> > 
> > Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
> > supposed to generate SIGBUS.
> > 
> > Recent changes attempted to fault in full folio where possible. They did
> > not respect i_size, which led to populating PTEs beyond i_size and
> > breaking SIGBUS semantics.
> > 
> > Darrick reported generic/749 breakage because of this.
> > 
> > However, the problem existed before the recent changes. With huge=always
> > tmpfs, any write to a file leads to PMD-size allocation. Following the
> > fault-in of the folio will install PMD mapping regardless of i_size.
> 
> Right, there are some legacy oddities with shmem in that area (e.g.,
> "within_size" vs. "always" THP allocation control).
> 
> Let me CC Hugh: the behavior for shmem seems to date back to 2016.

Yes, it is my huge tmpfs implementation that introduced this.

And Hugh is on CC.

> > 
> > Fix filemap_map_pages() and finish_fault() to not install:
> >    - PTEs beyond i_size;
> >    - PMD mappings across i_size;
> 
> Makes sense to me.
> 
> 
> [...]
> 
> > +++ b/mm/memory.c
> > @@ -5480,6 +5480,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
> >   	int type, nr_pages;
> >   	unsigned long addr;
> >   	bool needs_fallback = false;
> > +	pgoff_t file_end = -1UL;
> >   fallback:
> >   	addr = vmf->address;
> > @@ -5501,8 +5502,14 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
> >   			return ret;
> >   	}
> > +	if (vma->vm_file) {
> > +		struct inode *inode = vma->vm_file->f_mapping->host;
> 
> empty line pleae

Ack.

> 
> > +		file_end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
> > +	}
> > +
> >   	if (pmd_none(*vmf->pmd)) {
> > -		if (folio_test_pmd_mappable(folio)) {
> > +		if (folio_test_pmd_mappable(folio) &&
> > +		    file_end >= folio_next_index(folio)) {
> >   			ret = do_set_pmd(vmf, folio, page);
> >   			if (ret != VM_FAULT_FALLBACK)
> >   				return ret;
> > @@ -5533,7 +5540,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
> >   		if (unlikely(vma_off < idx ||
> >   			    vma_off + (nr_pages - idx) > vma_pages(vma) ||
> >   			    pte_off < idx ||
> > -			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE)) {
> > +			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE ||
> 
> While at it you could fix the double space before the ">".

Okay.


> > +			    file_end < folio_next_index(folio))) {
> >   			nr_pages = 1;
> >   		} else {
> >   			/* Now we can set mappings for the whole large folio. */
> 
> Nothing else jumped at me.
> 
> -- 
> Cheers
> 
> David / dhildenb
> 

-- 
  Kiryl Shutsemau / Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC, PATCH 0/2] Large folios vs. SIGBUS semantics
@ 2025-10-20 16:30 Kiryl Shutsemau
  2025-10-20 16:30 ` [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size Kiryl Shutsemau
  0 siblings, 1 reply; 12+ messages in thread
From: Kiryl Shutsemau @ 2025-10-20 16:30 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Hugh Dickins, Matthew Wilcox,
	Alexander Viro, Christian Brauner
  Cc: Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Rik van Riel, Harry Yoo,
	Johannes Weiner, Shakeel Butt, Baolin Wang, Darrick J. Wong,
	linux-mm, linux-fsdevel, linux-kernel, Kiryl Shutsemau

From: Kiryl Shutsemau <kas@kernel.org>

I do NOT want the patches in this patchset to be applied. Instead, I
would like to discuss the semantics of large folios versus SIGBUS.

## Background

Accessing memory within a VMA, but beyond i_size rounded up to the next
page size, is supposed to generate SIGBUS.

This definition is simple if all pages are PAGE_SIZE in size, but with
large folios in the picture, it is no longer the case.

## Problem

Darrick reported[1] an xfstests regression in v6.18-rc1. generic/749
failed due to missing SIGBUS. This was caused by my recent changes that
try to fault in the whole folio where possible:

	19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
	357b92761d94 ("mm/filemap: map entire large folio faultaround")

These changes did not consider i_size when setting up PTEs, leading to
xfstest breakage.

However, the problem has been present in the kernel for a long time -
since huge tmpfs was introduced in 2016. The kernel happily maps
PMD-sized folios as PMD without checking i_size. And huge=always tmpfs
allocates PMD-size folios on any writes.

I considered this corner case when I implemented a large tmpfs, and my
conclusion was that no one in their right mind should rely on receiving
a SIGBUS signal when accessing beyond i_size. I cannot imagine how it
could be useful for the workload.

Generic/749 was introduced last year with reference to POSIX, but no
real workloads were mentioned. It also acknowledged the tmpfs deviation
from the test case.

POSIX indeed says[3]:

	References within the address range starting at pa and
	continuing for len bytes to whole pages following the end of an
	object shall result in delivery of a SIGBUS signal.

Do we care about adhering strictly to this in absence of real workloads
that relies on this semantics?

I think it valuable to allow kernel to map memory with a larger chunks
-- whole folio -- to get TLB benefits (from both huge pages and TLB
coalescing). I value TLB hit rate over POSIX wording.

Any opinions?

See also discussion in the thread[1] with the report.

[1] https://lore.kernel.org/all/20251014175214.GW6188@frogsfrogsfrogs
[2] https://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git/commit/tests/generic/749?h=for-next&id=e4a6b119e5229599eac96235fb7e683b8a8bdc53
[3] https://pubs.opengroup.org/onlinepubs/9799919799/

Kiryl Shutsemau (2):
  mm/memory: Do not populate page table entries beyond i_size.
  mm/truncate: Unmap large folio on split failure

 mm/filemap.c  | 18 ++++++++++--------
 mm/memory.c   | 12 ++++++++++--
 mm/truncate.c | 29 ++++++++++++++++++++++++++---
 3 files changed, 46 insertions(+), 13 deletions(-)

-- 
2.50.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size.
  2025-10-20 16:30 [RFC, PATCH 0/2] Large folios vs. SIGBUS semantics Kiryl Shutsemau
@ 2025-10-20 16:30 ` Kiryl Shutsemau
  0 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau @ 2025-10-20 16:30 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Hugh Dickins, Matthew Wilcox,
	Alexander Viro, Christian Brauner
  Cc: Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Rik van Riel, Harry Yoo,
	Johannes Weiner, Shakeel Butt, Baolin Wang, Darrick J. Wong,
	linux-mm, linux-fsdevel, linux-kernel, Kiryl Shutsemau

From: Kiryl Shutsemau <kas@kernel.org>

Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.

Recent changes attempted to fault in full folio where possible. They did
not respect i_size, which led to populating PTEs beyond i_size and
breaking SIGBUS semantics.

Darrick reported generic/749 breakage because of this.

However, the problem existed before the recent changes. With huge=always
tmpfs, any write to a file leads to PMD-size allocation. Following the
fault-in of the folio will install PMD mapping regardless of i_size.

Fix filemap_map_pages() and finish_fault() to not install:
  - PTEs beyond i_size;
  - PMD mappings across i_size;

Not-yet-signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Fixes: 19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
Fixes: 357b92761d94 ("mm/filemap: map entire large folio faultaround")
Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
Reported-by: "Darrick J. Wong" <djwong@kernel.org>
---
 mm/filemap.c | 18 ++++++++++--------
 mm/memory.c  | 12 ++++++++++--
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 13f0259d993c..0d251f6ab480 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3681,7 +3681,8 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
 static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 			struct folio *folio, unsigned long start,
 			unsigned long addr, unsigned int nr_pages,
-			unsigned long *rss, unsigned short *mmap_miss)
+			unsigned long *rss, unsigned short *mmap_miss,
+			pgoff_t file_end)
 {
 	unsigned int ref_from_caller = 1;
 	vm_fault_t ret = 0;
@@ -3697,7 +3698,8 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 	 */
 	addr0 = addr - start * PAGE_SIZE;
 	if (folio_within_vma(folio, vmf->vma) &&
-	    (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {
+	    (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK) &&
+	    file_end >= folio_next_index(folio)) {
 		vmf->pte -= start;
 		page -= start;
 		addr = addr0;
@@ -3817,7 +3819,11 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 	if (!folio)
 		goto out;
 
-	if (filemap_map_pmd(vmf, folio, start_pgoff)) {
+	file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+	end_pgoff = min(end_pgoff, file_end);
+
+	if (file_end >= folio_next_index(folio) &&
+	    filemap_map_pmd(vmf, folio, start_pgoff)) {
 		ret = VM_FAULT_NOPAGE;
 		goto out;
 	}
@@ -3830,10 +3836,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		goto out;
 	}
 
-	file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
-	if (end_pgoff > file_end)
-		end_pgoff = file_end;
-
 	folio_type = mm_counter_file(folio);
 	do {
 		unsigned long end;
@@ -3850,7 +3852,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		else
 			ret |= filemap_map_folio_range(vmf, folio,
 					xas.xa_index - folio->index, addr,
-					nr_pages, &rss, &mmap_miss);
+					nr_pages, &rss, &mmap_miss, file_end);
 
 		folio_unlock(folio);
 	} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
diff --git a/mm/memory.c b/mm/memory.c
index 74b45e258323..dfa5b437c9d9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5480,6 +5480,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
 	int type, nr_pages;
 	unsigned long addr;
 	bool needs_fallback = false;
+	pgoff_t file_end = -1UL;
 
 fallback:
 	addr = vmf->address;
@@ -5501,8 +5502,14 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
 			return ret;
 	}
 
+	if (vma->vm_file) {
+		struct inode *inode = vma->vm_file->f_mapping->host;
+		file_end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+	}
+
 	if (pmd_none(*vmf->pmd)) {
-		if (folio_test_pmd_mappable(folio)) {
+		if (folio_test_pmd_mappable(folio) &&
+		    file_end >= folio_next_index(folio)) {
 			ret = do_set_pmd(vmf, folio, page);
 			if (ret != VM_FAULT_FALLBACK)
 				return ret;
@@ -5533,7 +5540,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
 		if (unlikely(vma_off < idx ||
 			    vma_off + (nr_pages - idx) > vma_pages(vma) ||
 			    pte_off < idx ||
-			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE)) {
+			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE ||
+			    file_end < folio_next_index(folio))) {
 			nr_pages = 1;
 		} else {
 			/* Now we can set mappings for the whole large folio. */
-- 
2.50.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-10-21 12:59 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-21  6:35 [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size Kiryl Shutsemau
2025-10-21  6:35 ` [PATCH 2/2] mm/truncate: Unmap large folio on split failure Kiryl Shutsemau
2025-10-21  9:44   ` David Hildenbrand
2025-10-21  9:47     ` David Hildenbrand
2025-10-21 11:31       ` Kiryl Shutsemau
2025-10-21 11:54         ` David Hildenbrand
2025-10-21 12:25           ` Kiryl Shutsemau
2025-10-21 12:33   ` David Hildenbrand
2025-10-21 12:58     ` Kiryl Shutsemau
2025-10-21 12:08 ` [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size David Hildenbrand
2025-10-21 12:28   ` Kiryl Shutsemau
  -- strict thread matches above, loose matches on Subject: below --
2025-10-20 16:30 [RFC, PATCH 0/2] Large folios vs. SIGBUS semantics Kiryl Shutsemau
2025-10-20 16:30 ` [PATCH 1/2] mm/memory: Do not populate page table entries beyond i_size Kiryl Shutsemau

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).