All of lore.kernel.org
 help / color / mirror / Atom feed
* + mm-khugepaged-maintain-page-cache-uptodate-flag.patch added to mm-unstable branch
@ 2023-03-07 22:58 Andrew Morton
  0 siblings, 0 replies; 2+ messages in thread
From: Andrew Morton @ 2023-03-07 22:58 UTC (permalink / raw)
  To: mm-commits, willy, shy828301, peterx, kirill, jiaqiyan, hughd,
	david, stevensd, akpm


The patch titled
     Subject: mm/khugepaged: maintain page cache uptodate flag
has been added to the -mm mm-unstable branch.  Its filename is
     mm-khugepaged-maintain-page-cache-uptodate-flag.patch

This patch will shortly appear at
     https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-khugepaged-maintain-page-cache-uptodate-flag.patch

This patch will later appear in the mm-unstable branch at
    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days

------------------------------------------------------
From: David Stevens <stevensd@chromium.org>
Subject: mm/khugepaged: maintain page cache uptodate flag
Date: Tue, 7 Mar 2023 14:20:36 +0900

Make sure that collapse_file doesn't interfere with checking the uptodate
flag in the page cache by only inserting hpage into the page cache after
it has been updated and marked uptodate.  This is achieved by simply not
replacing present pages with hpage when iterating over the target range. 
The present pages are already locked, so replacing the with the locked
hpage before the collapse is finalized is unnecessary.

This fixes a race where folio_seek_hole_data would mistake hpage for an
fallocated but unwritten page.  This race is visible to userspace via data
temporarily disappearing from SEEK_DATA/SEEK_HOLE.

Link: https://lkml.kernel.org/r/20230307052036.1520708-4-stevensd@google.com
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: David Stevens <stevensd@chromium.org>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/khugepaged.c |   50 +++++++++++-----------------------------------
 1 file changed, 12 insertions(+), 38 deletions(-)

--- a/mm/khugepaged.c~mm-khugepaged-maintain-page-cache-uptodate-flag
+++ a/mm/khugepaged.c
@@ -1930,12 +1930,6 @@ static int collapse_file(struct mm_struc
 		}
 	} while (1);
 
-	/*
-	 * At this point the hpage is locked and not up-to-date.
-	 * It's safe to insert it into the page cache, because nobody would
-	 * be able to map it or use it in another way until we unlock it.
-	 */
-
 	xas_set(&xas, start);
 	for (index = start; index < end; index++) {
 		page = xas_next(&xas);
@@ -2104,13 +2098,9 @@ static int collapse_file(struct mm_struc
 		}
 
 		/*
-		 * Add the page to the list to be able to undo the collapse if
-		 * something go wrong.
+		 * Accumulate the pages that are being collapsed.
 		 */
 		list_add_tail(&page->lru, &pagelist);
-
-		/* Finally, replace with the new page. */
-		xas_store(&xas, hpage);
 		continue;
 out_unlock:
 		unlock_page(page);
@@ -2149,8 +2139,7 @@ xa_unlocked:
 		goto rollback;
 
 	/*
-	 * Replacing old pages with new one has succeeded, now we
-	 * attempt to copy the contents.
+	 * The old pages are locked, so they won't change anymore.
 	 */
 	index = start;
 	list_for_each_entry(page, &pagelist, lru) {
@@ -2230,11 +2219,11 @@ immap_locked:
 		/* nr_none is always 0 for non-shmem. */
 		__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
 	}
-	/* Join all the small entries into a single multi-index entry. */
-	xas_set_order(&xas, start, HPAGE_PMD_ORDER);
-	xas_store(&xas, hpage);
-	xas_unlock_irq(&xas);
 
+	/*
+	 * Mark hpage as uptodate before inserting it into the page cache so
+	 * that it isn't mistaken for an fallocated but unwritten page.
+	 */
 	folio = page_folio(hpage);
 	folio_mark_uptodate(folio);
 	folio_ref_add(folio, HPAGE_PMD_NR - 1);
@@ -2243,6 +2232,11 @@ immap_locked:
 		folio_mark_dirty(folio);
 	folio_add_lru(folio);
 
+	/* Join all the small entries into a single multi-index entry. */
+	xas_set_order(&xas, start, HPAGE_PMD_ORDER);
+	xas_store(&xas, hpage);
+	xas_unlock_irq(&xas);
+
 	/*
 	 * Remove pte page tables, so we can re-fault the page as huge.
 	 */
@@ -2267,36 +2261,18 @@ immap_locked:
 
 rollback:
 	/* Something went wrong: roll back page cache changes */
-	xas_lock_irq(&xas);
 	if (nr_none) {
 		mapping->nrpages -= nr_none;
 		shmem_uncharge(mapping->host, nr_none);
 	}
 
-	xas_set(&xas, start);
-	end = index;
-	for (index = start; index < end; index++) {
-		xas_next(&xas);
-		page = list_first_entry_or_null(&pagelist,
-				struct page, lru);
-		if (!page || xas.xa_index < page->index) {
-			nr_none--;
-			continue;
-		}
-
-		VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
-
+	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
 		/* Unfreeze the page. */
 		list_del(&page->lru);
 		page_ref_unfreeze(page, 2);
-		xas_store(&xas, page);
-		xas_pause(&xas);
-		xas_unlock_irq(&xas);
 		unlock_page(page);
 		putback_lru_page(page);
-		xas_lock_irq(&xas);
 	}
-	VM_BUG_ON(nr_none);
 	/*
 	 * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
 	 * This undo is not needed unless failure is due to SCAN_COPY_MC.
@@ -2304,8 +2280,6 @@ rollback:
 	if (!is_shmem && result == SCAN_COPY_MC)
 		filemap_nr_thps_dec(mapping);
 
-	xas_unlock_irq(&xas);
-
 	hpage->mapping = NULL;
 
 	unlock_page(hpage);
_

Patches currently in -mm which might be from stevensd@chromium.org are

mm-khugepaged-refactor-collapse_file-control-flow.patch
mm-khugepaged-skip-shmem-with-userfaultfd.patch
mm-khugepaged-maintain-page-cache-uptodate-flag.patch


^ permalink raw reply	[flat|nested] 2+ messages in thread
* + mm-khugepaged-maintain-page-cache-uptodate-flag.patch added to mm-unstable branch
@ 2023-04-04 19:48 Andrew Morton
  0 siblings, 0 replies; 2+ messages in thread
From: Andrew Morton @ 2023-04-04 19:48 UTC (permalink / raw)
  To: mm-commits, willy, shy828301, peterx, kirill, jiaqiyan, hughd,
	david, stevensd, akpm


The patch titled
     Subject: mm/khugepaged: maintain page cache uptodate flag
has been added to the -mm mm-unstable branch.  Its filename is
     mm-khugepaged-maintain-page-cache-uptodate-flag.patch

This patch will shortly appear at
     https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-khugepaged-maintain-page-cache-uptodate-flag.patch

This patch will later appear in the mm-unstable branch at
    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days

------------------------------------------------------
From: David Stevens <stevensd@chromium.org>
Subject: mm/khugepaged: maintain page cache uptodate flag
Date: Tue, 4 Apr 2023 21:01:17 +0900

Make sure that collapse_file doesn't interfere with checking the uptodate
flag in the page cache by only inserting hpage into the page cache after
it has been updated and marked uptodate.  This is achieved by simply not
replacing present pages with hpage when iterating over the target range.

The present pages are already locked, so replacing them with the locked
hpage before the collapse is finalized is unnecessary.  However, it is
necessary to stop freezing the present pages after validating them, since
leaving long-term frozen pages in the page cache can lead to deadlocks. 
Simply checking the reference count is sufficient to ensure that there are
no long-term references hanging around that would the collapse would
break.  Similar to hpage, there is no reason that the present pages
actually need to be frozen in addition to being locked.

This fixes a race where folio_seek_hole_data would mistake hpage for an
fallocated but unwritten page.  This race is visible to userspace via data
temporarily disappearing from SEEK_DATA/SEEK_HOLE.  This also fixes a
similar race where pages could temporarily disappear from mincore.

Link: https://lkml.kernel.org/r/20230404120117.2562166-5-stevensd@google.com
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: David Stevens <stevensd@chromium.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/khugepaged.c |   79 ++++++++++++++++------------------------------
 1 file changed, 29 insertions(+), 50 deletions(-)

--- a/mm/khugepaged.c~mm-khugepaged-maintain-page-cache-uptodate-flag
+++ a/mm/khugepaged.c
@@ -1855,17 +1855,18 @@ next:
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and lock a new huge page;
- *  - scan page cache replacing old pages with the new one
+ *  - scan page cache, locking old pages
  *    + swap/gup in pages if necessary;
- *    + keep old pages around in case rollback is required;
+ *  - copy data to new page
+ *  - handle shmem holes
+ *    + re-validate that holes weren't filled by someone else
+ *    + check for userfaultfd
  *  - finalize updates to the page cache;
  *  - if replacing succeeds:
- *    + copy data over;
- *    + free old pages;
  *    + unlock huge page;
+ *    + free old pages;
  *  - if replacing failed;
- *    + put all pages back and unfreeze them;
- *    + restore gaps in the page cache;
+ *    + unlock old pages
  *    + unlock and free huge page;
  */
 static int collapse_file(struct mm_struct *mm, unsigned long addr,
@@ -1913,12 +1914,6 @@ static int collapse_file(struct mm_struc
 		}
 	} while (1);
 
-	/*
-	 * At this point the hpage is locked and not up-to-date.
-	 * It's safe to insert it into the page cache, because nobody would
-	 * be able to map it or use it in another way until we unlock it.
-	 */
-
 	xas_set(&xas, start);
 	for (index = start; index < end; index++) {
 		page = xas_next(&xas);
@@ -2076,12 +2071,16 @@ static int collapse_file(struct mm_struc
 		VM_BUG_ON_PAGE(page != xas_load(&xas), page);
 
 		/*
-		 * The page is expected to have page_count() == 3:
+		 * We control three references to the page:
 		 *  - we hold a pin on it;
 		 *  - one reference from page cache;
 		 *  - one from isolate_lru_page;
+		 * If those are the only references, then any new usage of the
+		 * page will have to fetch it from the page cache. That requires
+		 * locking the page to handle truncate, so any new usage will be
+		 * blocked until we unlock page after collapse/during rollback.
 		 */
-		if (!page_ref_freeze(page, 3)) {
+		if (page_count(page) != 3) {
 			result = SCAN_PAGE_COUNT;
 			xas_unlock_irq(&xas);
 			putback_lru_page(page);
@@ -2089,13 +2088,9 @@ static int collapse_file(struct mm_struc
 		}
 
 		/*
-		 * Add the page to the list to be able to undo the collapse if
-		 * something go wrong.
+		 * Accumulate the pages that are being collapsed.
 		 */
 		list_add_tail(&page->lru, &pagelist);
-
-		/* Finally, replace with the new page. */
-		xas_store(&xas, hpage);
 		continue;
 out_unlock:
 		unlock_page(page);
@@ -2132,8 +2127,7 @@ xa_unlocked:
 		goto rollback;
 
 	/*
-	 * Replacing old pages with new one has succeeded, now we
-	 * attempt to copy the contents.
+	 * The old pages are locked, so they won't change anymore.
 	 */
 	index = start;
 	list_for_each_entry(page, &pagelist, lru) {
@@ -2222,11 +2216,11 @@ immap_locked:
 		/* nr_none is always 0 for non-shmem. */
 		__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
 	}
-	/* Join all the small entries into a single multi-index entry. */
-	xas_set_order(&xas, start, HPAGE_PMD_ORDER);
-	xas_store(&xas, hpage);
-	xas_unlock_irq(&xas);
 
+	/*
+	 * Mark hpage as uptodate before inserting it into the page cache so
+	 * that it isn't mistaken for an fallocated but unwritten page.
+	 */
 	folio = page_folio(hpage);
 	folio_mark_uptodate(folio);
 	folio_ref_add(folio, HPAGE_PMD_NR - 1);
@@ -2235,6 +2229,11 @@ immap_locked:
 		folio_mark_dirty(folio);
 	folio_add_lru(folio);
 
+	/* Join all the small entries into a single multi-index entry. */
+	xas_set_order(&xas, start, HPAGE_PMD_ORDER);
+	xas_store(&xas, hpage);
+	xas_unlock_irq(&xas);
+
 	/*
 	 * Remove pte page tables, so we can re-fault the page as huge.
 	 */
@@ -2248,47 +2247,29 @@ immap_locked:
 	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
 		list_del(&page->lru);
 		page->mapping = NULL;
-		page_ref_unfreeze(page, 1);
 		ClearPageActive(page);
 		ClearPageUnevictable(page);
 		unlock_page(page);
-		put_page(page);
+		folio_put_refs(page_folio(page), 3);
 	}
 
 	goto out;
 
 rollback:
 	/* Something went wrong: roll back page cache changes */
-	xas_lock_irq(&xas);
 	if (nr_none) {
+		xas_lock_irq(&xas);
 		mapping->nrpages -= nr_none;
 		shmem_uncharge(mapping->host, nr_none);
+		xas_unlock_irq(&xas);
 	}
 
-	xas_set(&xas, start);
-	end = index;
-	for (index = start; index < end; index++) {
-		xas_next(&xas);
-		page = list_first_entry_or_null(&pagelist,
-				struct page, lru);
-		if (!page || xas.xa_index < page->index) {
-			nr_none--;
-			continue;
-		}
-
-		VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
-
-		/* Unfreeze the page. */
+	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
 		list_del(&page->lru);
-		page_ref_unfreeze(page, 2);
-		xas_store(&xas, page);
-		xas_pause(&xas);
-		xas_unlock_irq(&xas);
 		unlock_page(page);
 		putback_lru_page(page);
-		xas_lock_irq(&xas);
+		put_page(page);
 	}
-	VM_BUG_ON(nr_none);
 	/*
 	 * Undo the updates of filemap_nr_thps_inc for non-SHMEM
 	 * file only. This undo is not needed unless failure is
@@ -2303,8 +2284,6 @@ rollback:
 		smp_mb();
 	}
 
-	xas_unlock_irq(&xas);
-
 	hpage->mapping = NULL;
 
 	unlock_page(hpage);
_

Patches currently in -mm which might be from stevensd@chromium.org are

mm-khugepaged-drain-lru-after-swapping-in-shmem.patch
mm-khugepaged-refactor-collapse_file-control-flow.patch
mm-khugepaged-skip-shmem-with-userfaultfd.patch
mm-khugepaged-maintain-page-cache-uptodate-flag.patch


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-04-04 19:48 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-07 22:58 + mm-khugepaged-maintain-page-cache-uptodate-flag.patch added to mm-unstable branch Andrew Morton
  -- strict thread matches above, loose matches on Subject: below --
2023-04-04 19:48 Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.