[PATCH v10 04/10] mm/rmap: Split migration into its own function

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v10 04/10] mm/rmap: Split migration into its own function
  2021-06-07  7:58 [PATCH v10 00/10] Add support for SVM atomics in Nouveau Alistair Popple
@ 2021-06-07  7:58   ` Alistair Popple
  0 siblings, 0 replies; 3+ messages in thread
From: Alistair Popple @ 2021-06-07  7:58 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: rcampbell, linux-doc, nouveau, hughd, linux-kernel, dri-devel,
	hch, bskeggs, jgg, peterx, shakeelb, jhubbard, willy,
	Alistair Popple, Christoph Hellwig

Migration is currently implemented as a mode of operation for
try_to_unmap_one() generally specified by passing the TTU_MIGRATION flag
or in the case of splitting a huge anonymous page TTU_SPLIT_FREEZE.

However it does not have much in common with the rest of the unmap
functionality of try_to_unmap_one() and thus splitting it into a
separate function reduces the complexity of try_to_unmap_one() making it
more readable.

Several simplifications can also be made in try_to_migrate_one() based
on the following observations:

 - All users of TTU_MIGRATION also set TTU_IGNORE_MLOCK.
 - No users of TTU_MIGRATION ever set TTU_IGNORE_HWPOISON.
 - No users of TTU_MIGRATION ever set TTU_BATCH_FLUSH.

TTU_SPLIT_FREEZE is a special case of migration used when splitting an
anonymous page. This is most easily dealt with by calling the correct
function from unmap_page() in mm/huge_memory.c  - either
try_to_migrate() for PageAnon or try_to_unmap().

Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>

---

v5:
* Added comments about how PMD splitting works for migration vs.
  unmapping
* Tightened up the flag check in try_to_migrate() to be explicit about
  which TTU_XXX flags are supported.
---
 include/linux/rmap.h |   4 +-
 mm/huge_memory.c     |  15 +-
 mm/migrate.c         |   9 +-
 mm/rmap.c            | 358 ++++++++++++++++++++++++++++++++-----------
 4 files changed, 280 insertions(+), 106 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 38a746787c2f..0e25d829f742 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -86,8 +86,6 @@ struct anon_vma_chain {
 };
 
 enum ttu_flags {
-	TTU_MIGRATION		= 0x1,	/* migration mode */
-
 	TTU_SPLIT_HUGE_PMD	= 0x4,	/* split huge PMD if any */
 	TTU_IGNORE_MLOCK	= 0x8,	/* ignore mlock */
 	TTU_IGNORE_HWPOISON	= 0x20,	/* corrupted page is recoverable */
@@ -96,7 +94,6 @@ enum ttu_flags {
 					 * do a final flush if necessary */
 	TTU_RMAP_LOCKED		= 0x80,	/* do not grab rmap lock:
 					 * caller holds it */
-	TTU_SPLIT_FREEZE	= 0x100,		/* freeze pte under splitting thp */
 };
 
 #ifdef CONFIG_MMU
@@ -193,6 +190,7 @@ static inline void page_dup_rmap(struct page *page, bool compound)
 int page_referenced(struct page *, int is_locked,
 			struct mem_cgroup *memcg, unsigned long *vm_flags);
 
+bool try_to_migrate(struct page *page, enum ttu_flags flags);
 bool try_to_unmap(struct page *, enum ttu_flags flags);
 
 /* Avoid racy checks */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ec6dab72217..6dddc75b89ee 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2345,16 +2345,21 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void unmap_page(struct page *page)
 {
-	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
-		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+	enum ttu_flags ttu_flags = TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
 	bool unmap_success;
 
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 
 	if (PageAnon(page))
-		ttu_flags |= TTU_SPLIT_FREEZE;
-
-	unmap_success = try_to_unmap(page, ttu_flags);
+		unmap_success = try_to_migrate(page, ttu_flags);
+	else
+		/*
+		 * Don't install migration entries for file backed pages. This
+		 * helps handle cases when i_size is in the middle of the page
+		 * as there is no need to unmap pages beyond i_size manually.
+		 */
+		unmap_success = try_to_unmap(page, ttu_flags |
+						TTU_IGNORE_MLOCK);
 	VM_BUG_ON_PAGE(!unmap_success, page);
 }
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 930de919b1f2..05740f816bc4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1103,7 +1103,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		/* Establish migration ptes */
 		VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
 				page);
-		try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
+		try_to_migrate(page, 0);
 		page_was_mapped = 1;
 	}
 
@@ -1305,7 +1305,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 
 	if (page_mapped(hpage)) {
 		bool mapping_locked = false;
-		enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
+		enum ttu_flags ttu = 0;
 
 		if (!PageAnon(hpage)) {
 			/*
@@ -1322,7 +1322,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 			ttu |= TTU_RMAP_LOCKED;
 		}
 
-		try_to_unmap(hpage, ttu);
+		try_to_migrate(hpage, ttu);
 		page_was_mapped = 1;
 
 		if (mapping_locked)
@@ -2712,7 +2712,6 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
  */
 static void migrate_vma_unmap(struct migrate_vma *migrate)
 {
-	int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
 	const unsigned long npages = migrate->npages;
 	const unsigned long start = migrate->start;
 	unsigned long addr, i, restore = 0;
@@ -2724,7 +2723,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
 			continue;
 
 		if (page_mapped(page)) {
-			try_to_unmap(page, flags);
+			try_to_migrate(page, 0);
 			if (page_mapped(page))
 				goto restore;
 		}
diff --git a/mm/rmap.c b/mm/rmap.c
index b6c50df08b3b..be0450d905cd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1405,14 +1405,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
-	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
-	    is_zone_device_page(page) && !is_device_private_page(page))
-		return true;
-
-	if (flags & TTU_SPLIT_HUGE_PMD) {
-		split_huge_pmd_address(vma, address,
-				flags & TTU_SPLIT_FREEZE, page);
-	}
+	if (flags & TTU_SPLIT_HUGE_PMD)
+		split_huge_pmd_address(vma, address, false, page);
 
 	/*
 	 * For THP, we have to assume the worse case ie pmd for invalidation.
@@ -1436,16 +1430,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	mmu_notifier_invalidate_range_start(&range);
 
 	while (page_vma_mapped_walk(&pvmw)) {
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
-		/* PMD-mapped THP migration entry */
-		if (!pvmw.pte && (flags & TTU_MIGRATION)) {
-			VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
-
-			set_pmd_migration_entry(&pvmw, page);
-			continue;
-		}
-#endif
-
 		/*
 		 * If the page is mlock()d, we cannot swap it out.
 		 * If it's recently referenced (perhaps page_referenced
@@ -1507,46 +1491,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			}
 		}
 
-		if (IS_ENABLED(CONFIG_MIGRATION) &&
-		    (flags & TTU_MIGRATION) &&
-		    is_zone_device_page(page)) {
-			swp_entry_t entry;
-			pte_t swp_pte;
-
-			pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
-
-			/*
-			 * Store the pfn of the page in a special migration
-			 * pte. do_swap_page() will wait until the migration
-			 * pte is removed and then restart fault handling.
-			 */
-			entry = make_readable_migration_entry(page_to_pfn(page));
-			swp_pte = swp_entry_to_pte(entry);
-
-			/*
-			 * pteval maps a zone device page and is therefore
-			 * a swap pte.
-			 */
-			if (pte_swp_soft_dirty(pteval))
-				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			if (pte_swp_uffd_wp(pteval))
-				swp_pte = pte_swp_mkuffd_wp(swp_pte);
-			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
-			/*
-			 * No need to invalidate here it will synchronize on
-			 * against the special swap migration pte.
-			 *
-			 * The assignment to subpage above was computed from a
-			 * swap PTE which results in an invalid pointer.
-			 * Since only PAGE_SIZE pages can currently be
-			 * migrated, just set it to page. This will need to be
-			 * changed when hugepage migrations to device private
-			 * memory are supported.
-			 */
-			subpage = page;
-			goto discard;
-		}
-
 		/* Nuke the page table entry. */
 		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
 		if (should_defer_flush(mm, flags)) {
@@ -1599,39 +1543,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			/* We have to invalidate as we cleared the pte */
 			mmu_notifier_invalidate_range(mm, address,
 						      address + PAGE_SIZE);
-		} else if (IS_ENABLED(CONFIG_MIGRATION) &&
-				(flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
-			swp_entry_t entry;
-			pte_t swp_pte;
-
-			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-
-			/*
-			 * Store the pfn of the page in a special migration
-			 * pte. do_swap_page() will wait until the migration
-			 * pte is removed and then restart fault handling.
-			 */
-			if (pte_write(pteval))
-				entry = make_writable_migration_entry(
-							page_to_pfn(subpage));
-			else
-				entry = make_readable_migration_entry(
-							page_to_pfn(subpage));
-			swp_pte = swp_entry_to_pte(entry);
-			if (pte_soft_dirty(pteval))
-				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			if (pte_uffd_wp(pteval))
-				swp_pte = pte_swp_mkuffd_wp(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
-			/*
-			 * No need to invalidate here it will synchronize on
-			 * against the special swap migration pte.
-			 */
 		} else if (PageAnon(page)) {
 			swp_entry_t entry = { .val = page_private(subpage) };
 			pte_t swp_pte;
@@ -1758,6 +1669,268 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
 		.anon_lock = page_lock_anon_vma_read,
 	};
 
+	if (flags & TTU_RMAP_LOCKED)
+		rmap_walk_locked(page, &rwc);
+	else
+		rmap_walk(page, &rwc);
+
+	return !page_mapcount(page) ? true : false;
+}
+
+/*
+ * @arg: enum ttu_flags will be passed to this argument.
+ *
+ * If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
+ * containing migration entries. This and TTU_RMAP_LOCKED are the only supported
+ * flags.
+ */
+static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
+		     unsigned long address, void *arg)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct page_vma_mapped_walk pvmw = {
+		.page = page,
+		.vma = vma,
+		.address = address,
+	};
+	pte_t pteval;
+	struct page *subpage;
+	bool ret = true;
+	struct mmu_notifier_range range;
+	enum ttu_flags flags = (enum ttu_flags)(long)arg;
+
+	if (is_zone_device_page(page) && !is_device_private_page(page))
+		return true;
+
+	/*
+	 * unmap_page() in mm/huge_memory.c is the only user of migration with
+	 * TTU_SPLIT_HUGE_PMD and it wants to freeze.
+	 */
+	if (flags & TTU_SPLIT_HUGE_PMD)
+		split_huge_pmd_address(vma, address, true, page);
+
+	/*
+	 * For THP, we have to assume the worse case ie pmd for invalidation.
+	 * For hugetlb, it could be much worse if we need to do pud
+	 * invalidation in the case of pmd sharing.
+	 *
+	 * Note that the page can not be free in this function as call of
+	 * try_to_unmap() must hold a reference on the page.
+	 */
+	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+				address,
+				min(vma->vm_end, address + page_size(page)));
+	if (PageHuge(page)) {
+		/*
+		 * If sharing is possible, start and end will be adjusted
+		 * accordingly.
+		 */
+		adjust_range_if_pmd_sharing_possible(vma, &range.start,
+						     &range.end);
+	}
+	mmu_notifier_invalidate_range_start(&range);
+
+	while (page_vma_mapped_walk(&pvmw)) {
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+		/* PMD-mapped THP migration entry */
+		if (!pvmw.pte) {
+			VM_BUG_ON_PAGE(PageHuge(page) ||
+				       !PageTransCompound(page), page);
+
+			set_pmd_migration_entry(&pvmw, page);
+			continue;
+		}
+#endif
+
+		/* Unexpected PMD-mapped THP? */
+		VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+		address = pvmw.address;
+
+		if (PageHuge(page) && !PageAnon(page)) {
+			/*
+			 * To call huge_pmd_unshare, i_mmap_rwsem must be
+			 * held in write mode.  Caller needs to explicitly
+			 * do this outside rmap routines.
+			 */
+			VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
+			if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
+				/*
+				 * huge_pmd_unshare unmapped an entire PMD
+				 * page.  There is no way of knowing exactly
+				 * which PMDs may be cached for this mm, so
+				 * we must flush them all.  start/end were
+				 * already adjusted above to cover this range.
+				 */
+				flush_cache_range(vma, range.start, range.end);
+				flush_tlb_range(vma, range.start, range.end);
+				mmu_notifier_invalidate_range(mm, range.start,
+							      range.end);
+
+				/*
+				 * The ref count of the PMD page was dropped
+				 * which is part of the way map counting
+				 * is done for shared PMDs.  Return 'true'
+				 * here.  When there is no other sharing,
+				 * huge_pmd_unshare returns false and we will
+				 * unmap the actual page and drop map count
+				 * to zero.
+				 */
+				page_vma_mapped_walk_done(&pvmw);
+				break;
+			}
+		}
+
+		/* Nuke the page table entry. */
+		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+		pteval = ptep_clear_flush(vma, address, pvmw.pte);
+
+		/* Move the dirty bit to the page. Now the pte is gone. */
+		if (pte_dirty(pteval))
+			set_page_dirty(page);
+
+		/* Update high watermark before we lower rss */
+		update_hiwater_rss(mm);
+
+		if (is_zone_device_page(page)) {
+			swp_entry_t entry;
+			pte_t swp_pte;
+
+			/*
+			 * Store the pfn of the page in a special migration
+			 * pte. do_swap_page() will wait until the migration
+			 * pte is removed and then restart fault handling.
+			 */
+			entry = make_readable_migration_entry(
+							page_to_pfn(page));
+			swp_pte = swp_entry_to_pte(entry);
+
+			/*
+			 * pteval maps a zone device page and is therefore
+			 * a swap pte.
+			 */
+			if (pte_swp_soft_dirty(pteval))
+				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_swp_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
+			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
+			/*
+			 * No need to invalidate here it will synchronize on
+			 * against the special swap migration pte.
+			 *
+			 * The assignment to subpage above was computed from a
+			 * swap PTE which results in an invalid pointer.
+			 * Since only PAGE_SIZE pages can currently be
+			 * migrated, just set it to page. This will need to be
+			 * changed when hugepage migrations to device private
+			 * memory are supported.
+			 */
+			subpage = page;
+		} else if (PageHWPoison(page)) {
+			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+			if (PageHuge(page)) {
+				hugetlb_count_sub(compound_nr(page), mm);
+				set_huge_swap_pte_at(mm, address,
+						     pvmw.pte, pteval,
+						     vma_mmu_pagesize(vma));
+			} else {
+				dec_mm_counter(mm, mm_counter(page));
+				set_pte_at(mm, address, pvmw.pte, pteval);
+			}
+
+		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+			/*
+			 * The guest indicated that the page content is of no
+			 * interest anymore. Simply discard the pte, vmscan
+			 * will take care of the rest.
+			 * A future reference will then fault in a new zero
+			 * page. When userfaultfd is active, we must not drop
+			 * this page though, as its main user (postcopy
+			 * migration) will not expect userfaults on already
+			 * copied pages.
+			 */
+			dec_mm_counter(mm, mm_counter(page));
+			/* We have to invalidate as we cleared the pte */
+			mmu_notifier_invalidate_range(mm, address,
+						      address + PAGE_SIZE);
+		} else {
+			swp_entry_t entry;
+			pte_t swp_pte;
+
+			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+				set_pte_at(mm, address, pvmw.pte, pteval);
+				ret = false;
+				page_vma_mapped_walk_done(&pvmw);
+				break;
+			}
+
+			/*
+			 * Store the pfn of the page in a special migration
+			 * pte. do_swap_page() will wait until the migration
+			 * pte is removed and then restart fault handling.
+			 */
+			if (pte_write(pteval))
+				entry = make_writable_migration_entry(
+							page_to_pfn(subpage));
+			else
+				entry = make_readable_migration_entry(
+							page_to_pfn(subpage));
+
+			swp_pte = swp_entry_to_pte(entry);
+			if (pte_soft_dirty(pteval))
+				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
+			set_pte_at(mm, address, pvmw.pte, swp_pte);
+			/*
+			 * No need to invalidate here it will synchronize on
+			 * against the special swap migration pte.
+			 */
+		}
+
+		/*
+		 * No need to call mmu_notifier_invalidate_range() it has be
+		 * done above for all cases requiring it to happen under page
+		 * table lock before mmu_notifier_invalidate_range_end()
+		 *
+		 * See Documentation/vm/mmu_notifier.rst
+		 */
+		page_remove_rmap(subpage, PageHuge(page));
+		put_page(page);
+	}
+
+	mmu_notifier_invalidate_range_end(&range);
+
+	return ret;
+}
+
+/**
+ * try_to_migrate - try to replace all page table mappings with swap entries
+ * @page: the page to replace page table entries for
+ * @flags: action and flags
+ *
+ * Tries to remove all the page table entries which are mapping this page and
+ * replace them with special swap entries. Caller must hold the page lock.
+ *
+ * If is successful, return true. Otherwise, false.
+ */
+bool try_to_migrate(struct page *page, enum ttu_flags flags)
+{
+	struct rmap_walk_control rwc = {
+		.rmap_one = try_to_migrate_one,
+		.arg = (void *)flags,
+		.done = page_not_mapped,
+		.anon_lock = page_lock_anon_vma_read,
+	};
+
+	/*
+	 * Migration always ignores mlock and only supports TTU_RMAP_LOCKED and
+	 * TTU_SPLIT_HUGE_PMD flags.
+	 */
+	if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD)))
+		return false;
+
 	/*
 	 * During exec, a temporary VMA is setup and later moved.
 	 * The VMA is moved under the anon_vma lock but not the
@@ -1766,8 +1939,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
 	 * locking requirements of exec(), migration skips
 	 * temporary VMAs until after exec() completes.
 	 */
-	if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
-	    && !PageKsm(page) && PageAnon(page))
+	if (!PageKsm(page) && PageAnon(page))
 		rwc.invalid_vma = invalid_migration_vma;
 
 	if (flags & TTU_RMAP_LOCKED)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v10 04/10] mm/rmap: Split migration into its own function
@ 2021-06-07  7:58   ` Alistair Popple
  0 siblings, 0 replies; 3+ messages in thread
From: Alistair Popple @ 2021-06-07  7:58 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: rcampbell, willy, linux-doc, nouveau, Alistair Popple, hughd,
	linux-kernel, dri-devel, hch, peterx, shakeelb, bskeggs, jgg,
	jhubbard, Christoph Hellwig

Migration is currently implemented as a mode of operation for
try_to_unmap_one() generally specified by passing the TTU_MIGRATION flag
or in the case of splitting a huge anonymous page TTU_SPLIT_FREEZE.

However it does not have much in common with the rest of the unmap
functionality of try_to_unmap_one() and thus splitting it into a
separate function reduces the complexity of try_to_unmap_one() making it
more readable.

Several simplifications can also be made in try_to_migrate_one() based
on the following observations:

 - All users of TTU_MIGRATION also set TTU_IGNORE_MLOCK.
 - No users of TTU_MIGRATION ever set TTU_IGNORE_HWPOISON.
 - No users of TTU_MIGRATION ever set TTU_BATCH_FLUSH.

TTU_SPLIT_FREEZE is a special case of migration used when splitting an
anonymous page. This is most easily dealt with by calling the correct
function from unmap_page() in mm/huge_memory.c  - either
try_to_migrate() for PageAnon or try_to_unmap().

Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>

---

v5:
* Added comments about how PMD splitting works for migration vs.
  unmapping
* Tightened up the flag check in try_to_migrate() to be explicit about
  which TTU_XXX flags are supported.
---
 include/linux/rmap.h |   4 +-
 mm/huge_memory.c     |  15 +-
 mm/migrate.c         |   9 +-
 mm/rmap.c            | 358 ++++++++++++++++++++++++++++++++-----------
 4 files changed, 280 insertions(+), 106 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 38a746787c2f..0e25d829f742 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -86,8 +86,6 @@ struct anon_vma_chain {
 };
 
 enum ttu_flags {
-	TTU_MIGRATION		= 0x1,	/* migration mode */
-
 	TTU_SPLIT_HUGE_PMD	= 0x4,	/* split huge PMD if any */
 	TTU_IGNORE_MLOCK	= 0x8,	/* ignore mlock */
 	TTU_IGNORE_HWPOISON	= 0x20,	/* corrupted page is recoverable */
@@ -96,7 +94,6 @@ enum ttu_flags {
 					 * do a final flush if necessary */
 	TTU_RMAP_LOCKED		= 0x80,	/* do not grab rmap lock:
 					 * caller holds it */
-	TTU_SPLIT_FREEZE	= 0x100,		/* freeze pte under splitting thp */
 };
 
 #ifdef CONFIG_MMU
@@ -193,6 +190,7 @@ static inline void page_dup_rmap(struct page *page, bool compound)
 int page_referenced(struct page *, int is_locked,
 			struct mem_cgroup *memcg, unsigned long *vm_flags);
 
+bool try_to_migrate(struct page *page, enum ttu_flags flags);
 bool try_to_unmap(struct page *, enum ttu_flags flags);
 
 /* Avoid racy checks */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ec6dab72217..6dddc75b89ee 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2345,16 +2345,21 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void unmap_page(struct page *page)
 {
-	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
-		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+	enum ttu_flags ttu_flags = TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
 	bool unmap_success;
 
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 
 	if (PageAnon(page))
-		ttu_flags |= TTU_SPLIT_FREEZE;
-
-	unmap_success = try_to_unmap(page, ttu_flags);
+		unmap_success = try_to_migrate(page, ttu_flags);
+	else
+		/*
+		 * Don't install migration entries for file backed pages. This
+		 * helps handle cases when i_size is in the middle of the page
+		 * as there is no need to unmap pages beyond i_size manually.
+		 */
+		unmap_success = try_to_unmap(page, ttu_flags |
+						TTU_IGNORE_MLOCK);
 	VM_BUG_ON_PAGE(!unmap_success, page);
 }
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 930de919b1f2..05740f816bc4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1103,7 +1103,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		/* Establish migration ptes */
 		VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
 				page);
-		try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
+		try_to_migrate(page, 0);
 		page_was_mapped = 1;
 	}
 
@@ -1305,7 +1305,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 
 	if (page_mapped(hpage)) {
 		bool mapping_locked = false;
-		enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
+		enum ttu_flags ttu = 0;
 
 		if (!PageAnon(hpage)) {
 			/*
@@ -1322,7 +1322,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 			ttu |= TTU_RMAP_LOCKED;
 		}
 
-		try_to_unmap(hpage, ttu);
+		try_to_migrate(hpage, ttu);
 		page_was_mapped = 1;
 
 		if (mapping_locked)
@@ -2712,7 +2712,6 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
  */
 static void migrate_vma_unmap(struct migrate_vma *migrate)
 {
-	int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
 	const unsigned long npages = migrate->npages;
 	const unsigned long start = migrate->start;
 	unsigned long addr, i, restore = 0;
@@ -2724,7 +2723,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
 			continue;
 
 		if (page_mapped(page)) {
-			try_to_unmap(page, flags);
+			try_to_migrate(page, 0);
 			if (page_mapped(page))
 				goto restore;
 		}
diff --git a/mm/rmap.c b/mm/rmap.c
index b6c50df08b3b..be0450d905cd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1405,14 +1405,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
-	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
-	    is_zone_device_page(page) && !is_device_private_page(page))
-		return true;
-
-	if (flags & TTU_SPLIT_HUGE_PMD) {
-		split_huge_pmd_address(vma, address,
-				flags & TTU_SPLIT_FREEZE, page);
-	}
+	if (flags & TTU_SPLIT_HUGE_PMD)
+		split_huge_pmd_address(vma, address, false, page);
 
 	/*
 	 * For THP, we have to assume the worse case ie pmd for invalidation.
@@ -1436,16 +1430,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	mmu_notifier_invalidate_range_start(&range);
 
 	while (page_vma_mapped_walk(&pvmw)) {
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
-		/* PMD-mapped THP migration entry */
-		if (!pvmw.pte && (flags & TTU_MIGRATION)) {
-			VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
-
-			set_pmd_migration_entry(&pvmw, page);
-			continue;
-		}
-#endif
-
 		/*
 		 * If the page is mlock()d, we cannot swap it out.
 		 * If it's recently referenced (perhaps page_referenced
@@ -1507,46 +1491,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			}
 		}
 
-		if (IS_ENABLED(CONFIG_MIGRATION) &&
-		    (flags & TTU_MIGRATION) &&
-		    is_zone_device_page(page)) {
-			swp_entry_t entry;
-			pte_t swp_pte;
-
-			pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
-
-			/*
-			 * Store the pfn of the page in a special migration
-			 * pte. do_swap_page() will wait until the migration
-			 * pte is removed and then restart fault handling.
-			 */
-			entry = make_readable_migration_entry(page_to_pfn(page));
-			swp_pte = swp_entry_to_pte(entry);
-
-			/*
-			 * pteval maps a zone device page and is therefore
-			 * a swap pte.
-			 */
-			if (pte_swp_soft_dirty(pteval))
-				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			if (pte_swp_uffd_wp(pteval))
-				swp_pte = pte_swp_mkuffd_wp(swp_pte);
-			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
-			/*
-			 * No need to invalidate here it will synchronize on
-			 * against the special swap migration pte.
-			 *
-			 * The assignment to subpage above was computed from a
-			 * swap PTE which results in an invalid pointer.
-			 * Since only PAGE_SIZE pages can currently be
-			 * migrated, just set it to page. This will need to be
-			 * changed when hugepage migrations to device private
-			 * memory are supported.
-			 */
-			subpage = page;
-			goto discard;
-		}
-
 		/* Nuke the page table entry. */
 		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
 		if (should_defer_flush(mm, flags)) {
@@ -1599,39 +1543,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			/* We have to invalidate as we cleared the pte */
 			mmu_notifier_invalidate_range(mm, address,
 						      address + PAGE_SIZE);
-		} else if (IS_ENABLED(CONFIG_MIGRATION) &&
-				(flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
-			swp_entry_t entry;
-			pte_t swp_pte;
-
-			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-
-			/*
-			 * Store the pfn of the page in a special migration
-			 * pte. do_swap_page() will wait until the migration
-			 * pte is removed and then restart fault handling.
-			 */
-			if (pte_write(pteval))
-				entry = make_writable_migration_entry(
-							page_to_pfn(subpage));
-			else
-				entry = make_readable_migration_entry(
-							page_to_pfn(subpage));
-			swp_pte = swp_entry_to_pte(entry);
-			if (pte_soft_dirty(pteval))
-				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			if (pte_uffd_wp(pteval))
-				swp_pte = pte_swp_mkuffd_wp(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
-			/*
-			 * No need to invalidate here it will synchronize on
-			 * against the special swap migration pte.
-			 */
 		} else if (PageAnon(page)) {
 			swp_entry_t entry = { .val = page_private(subpage) };
 			pte_t swp_pte;
@@ -1758,6 +1669,268 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
 		.anon_lock = page_lock_anon_vma_read,
 	};
 
+	if (flags & TTU_RMAP_LOCKED)
+		rmap_walk_locked(page, &rwc);
+	else
+		rmap_walk(page, &rwc);
+
+	return !page_mapcount(page) ? true : false;
+}
+
+/*
+ * @arg: enum ttu_flags will be passed to this argument.
+ *
+ * If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
+ * containing migration entries. This and TTU_RMAP_LOCKED are the only supported
+ * flags.
+ */
+static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
+		     unsigned long address, void *arg)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct page_vma_mapped_walk pvmw = {
+		.page = page,
+		.vma = vma,
+		.address = address,
+	};
+	pte_t pteval;
+	struct page *subpage;
+	bool ret = true;
+	struct mmu_notifier_range range;
+	enum ttu_flags flags = (enum ttu_flags)(long)arg;
+
+	if (is_zone_device_page(page) && !is_device_private_page(page))
+		return true;
+
+	/*
+	 * unmap_page() in mm/huge_memory.c is the only user of migration with
+	 * TTU_SPLIT_HUGE_PMD and it wants to freeze.
+	 */
+	if (flags & TTU_SPLIT_HUGE_PMD)
+		split_huge_pmd_address(vma, address, true, page);
+
+	/*
+	 * For THP, we have to assume the worse case ie pmd for invalidation.
+	 * For hugetlb, it could be much worse if we need to do pud
+	 * invalidation in the case of pmd sharing.
+	 *
+	 * Note that the page can not be free in this function as call of
+	 * try_to_unmap() must hold a reference on the page.
+	 */
+	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+				address,
+				min(vma->vm_end, address + page_size(page)));
+	if (PageHuge(page)) {
+		/*
+		 * If sharing is possible, start and end will be adjusted
+		 * accordingly.
+		 */
+		adjust_range_if_pmd_sharing_possible(vma, &range.start,
+						     &range.end);
+	}
+	mmu_notifier_invalidate_range_start(&range);
+
+	while (page_vma_mapped_walk(&pvmw)) {
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+		/* PMD-mapped THP migration entry */
+		if (!pvmw.pte) {
+			VM_BUG_ON_PAGE(PageHuge(page) ||
+				       !PageTransCompound(page), page);
+
+			set_pmd_migration_entry(&pvmw, page);
+			continue;
+		}
+#endif
+
+		/* Unexpected PMD-mapped THP? */
+		VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+		address = pvmw.address;
+
+		if (PageHuge(page) && !PageAnon(page)) {
+			/*
+			 * To call huge_pmd_unshare, i_mmap_rwsem must be
+			 * held in write mode.  Caller needs to explicitly
+			 * do this outside rmap routines.
+			 */
+			VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
+			if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
+				/*
+				 * huge_pmd_unshare unmapped an entire PMD
+				 * page.  There is no way of knowing exactly
+				 * which PMDs may be cached for this mm, so
+				 * we must flush them all.  start/end were
+				 * already adjusted above to cover this range.
+				 */
+				flush_cache_range(vma, range.start, range.end);
+				flush_tlb_range(vma, range.start, range.end);
+				mmu_notifier_invalidate_range(mm, range.start,
+							      range.end);
+
+				/*
+				 * The ref count of the PMD page was dropped
+				 * which is part of the way map counting
+				 * is done for shared PMDs.  Return 'true'
+				 * here.  When there is no other sharing,
+				 * huge_pmd_unshare returns false and we will
+				 * unmap the actual page and drop map count
+				 * to zero.
+				 */
+				page_vma_mapped_walk_done(&pvmw);
+				break;
+			}
+		}
+
+		/* Nuke the page table entry. */
+		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+		pteval = ptep_clear_flush(vma, address, pvmw.pte);
+
+		/* Move the dirty bit to the page. Now the pte is gone. */
+		if (pte_dirty(pteval))
+			set_page_dirty(page);
+
+		/* Update high watermark before we lower rss */
+		update_hiwater_rss(mm);
+
+		if (is_zone_device_page(page)) {
+			swp_entry_t entry;
+			pte_t swp_pte;
+
+			/*
+			 * Store the pfn of the page in a special migration
+			 * pte. do_swap_page() will wait until the migration
+			 * pte is removed and then restart fault handling.
+			 */
+			entry = make_readable_migration_entry(
+							page_to_pfn(page));
+			swp_pte = swp_entry_to_pte(entry);
+
+			/*
+			 * pteval maps a zone device page and is therefore
+			 * a swap pte.
+			 */
+			if (pte_swp_soft_dirty(pteval))
+				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_swp_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
+			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
+			/*
+			 * No need to invalidate here it will synchronize on
+			 * against the special swap migration pte.
+			 *
+			 * The assignment to subpage above was computed from a
+			 * swap PTE which results in an invalid pointer.
+			 * Since only PAGE_SIZE pages can currently be
+			 * migrated, just set it to page. This will need to be
+			 * changed when hugepage migrations to device private
+			 * memory are supported.
+			 */
+			subpage = page;
+		} else if (PageHWPoison(page)) {
+			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+			if (PageHuge(page)) {
+				hugetlb_count_sub(compound_nr(page), mm);
+				set_huge_swap_pte_at(mm, address,
+						     pvmw.pte, pteval,
+						     vma_mmu_pagesize(vma));
+			} else {
+				dec_mm_counter(mm, mm_counter(page));
+				set_pte_at(mm, address, pvmw.pte, pteval);
+			}
+
+		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+			/*
+			 * The guest indicated that the page content is of no
+			 * interest anymore. Simply discard the pte, vmscan
+			 * will take care of the rest.
+			 * A future reference will then fault in a new zero
+			 * page. When userfaultfd is active, we must not drop
+			 * this page though, as its main user (postcopy
+			 * migration) will not expect userfaults on already
+			 * copied pages.
+			 */
+			dec_mm_counter(mm, mm_counter(page));
+			/* We have to invalidate as we cleared the pte */
+			mmu_notifier_invalidate_range(mm, address,
+						      address + PAGE_SIZE);
+		} else {
+			swp_entry_t entry;
+			pte_t swp_pte;
+
+			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+				set_pte_at(mm, address, pvmw.pte, pteval);
+				ret = false;
+				page_vma_mapped_walk_done(&pvmw);
+				break;
+			}
+
+			/*
+			 * Store the pfn of the page in a special migration
+			 * pte. do_swap_page() will wait until the migration
+			 * pte is removed and then restart fault handling.
+			 */
+			if (pte_write(pteval))
+				entry = make_writable_migration_entry(
+							page_to_pfn(subpage));
+			else
+				entry = make_readable_migration_entry(
+							page_to_pfn(subpage));
+
+			swp_pte = swp_entry_to_pte(entry);
+			if (pte_soft_dirty(pteval))
+				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
+			set_pte_at(mm, address, pvmw.pte, swp_pte);
+			/*
+			 * No need to invalidate here it will synchronize on
+			 * against the special swap migration pte.
+			 */
+		}
+
+		/*
+		 * No need to call mmu_notifier_invalidate_range() it has be
+		 * done above for all cases requiring it to happen under page
+		 * table lock before mmu_notifier_invalidate_range_end()
+		 *
+		 * See Documentation/vm/mmu_notifier.rst
+		 */
+		page_remove_rmap(subpage, PageHuge(page));
+		put_page(page);
+	}
+
+	mmu_notifier_invalidate_range_end(&range);
+
+	return ret;
+}
+
+/**
+ * try_to_migrate - try to replace all page table mappings with swap entries
+ * @page: the page to replace page table entries for
+ * @flags: action and flags
+ *
+ * Tries to remove all the page table entries which are mapping this page and
+ * replace them with special swap entries. Caller must hold the page lock.
+ *
+ * If is successful, return true. Otherwise, false.
+ */
+bool try_to_migrate(struct page *page, enum ttu_flags flags)
+{
+	struct rmap_walk_control rwc = {
+		.rmap_one = try_to_migrate_one,
+		.arg = (void *)flags,
+		.done = page_not_mapped,
+		.anon_lock = page_lock_anon_vma_read,
+	};
+
+	/*
+	 * Migration always ignores mlock and only supports TTU_RMAP_LOCKED and
+	 * TTU_SPLIT_HUGE_PMD flags.
+	 */
+	if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD)))
+		return false;
+
 	/*
 	 * During exec, a temporary VMA is setup and later moved.
 	 * The VMA is moved under the anon_vma lock but not the
@@ -1766,8 +1939,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
 	 * locking requirements of exec(), migration skips
 	 * temporary VMAs until after exec() completes.
 	 */
-	if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
-	    && !PageKsm(page) && PageAnon(page))
+	if (!PageKsm(page) && PageAnon(page))
 		rwc.invalid_vma = invalid_migration_vma;
 
 	if (flags & TTU_RMAP_LOCKED)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v10 04/10] mm/rmap: Split migration into its own function
@ 2021-06-09 17:43 kernel test robot
  0 siblings, 0 replies; 3+ messages in thread
From: kernel test robot @ 2021-06-09 17:43 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 16246 bytes --]

CC: kbuild-all(a)lists.01.org
In-Reply-To: <20210607075855.5084-5-apopple@nvidia.com>
References: <20210607075855.5084-5-apopple@nvidia.com>
TO: Alistair Popple <apopple@nvidia.com>
TO: linux-mm(a)kvack.org
TO: akpm(a)linux-foundation.org
CC: rcampbell(a)nvidia.com
CC: linux-doc(a)vger.kernel.org
CC: nouveau(a)lists.freedesktop.org
CC: hughd(a)google.com
CC: linux-kernel(a)vger.kernel.org
CC: dri-devel(a)lists.freedesktop.org
CC: hch(a)infradead.org
CC: bskeggs(a)redhat.com

Hi Alistair,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on s390/features]
[also build test WARNING on kselftest/next linus/master v5.13-rc5]
[cannot apply to hnaz-linux-mm/master next-20210609]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Alistair-Popple/Add-support-for-SVM-atomics-in-Nouveau/20210607-160056
base:   https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
:::::: branch date: 2 days ago
:::::: commit date: 2 days ago
config: parisc-randconfig-s032-20210607 (attached as .config)
compiler: hppa64-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-341-g8af24329-dirty
        # https://github.com/0day-ci/linux/commit/80e54e5e679c95e425608a45721ca6cc30ae25f4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Alistair-Popple/Add-support-for-SVM-atomics-in-Nouveau/20210607-160056
        git checkout 80e54e5e679c95e425608a45721ca6cc30ae25f4
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=parisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
   mm/rmap.c: note: in included file (through include/linux/ksm.h):
   include/linux/rmap.h:217:28: sparse: sparse: context imbalance in 'page_referenced_one' - unexpected unlock
   mm/rmap.c:970:25: sparse: sparse: context imbalance in 'page_mkclean_one' - different lock contexts for basic block
   include/linux/rmap.h:217:28: sparse: sparse: context imbalance in 'try_to_unmap_one' - unexpected unlock
>> mm/rmap.c:1899:17: sparse: sparse: context imbalance in 'try_to_migrate_one' - different lock contexts for basic block
   include/linux/rmap.h:217:28: sparse: sparse: context imbalance in 'page_mlock_one' - unexpected unlock

vim +/try_to_migrate_one +1899 mm/rmap.c

80e54e5e679c95 Alistair Popple 2021-06-07  1744  
80e54e5e679c95 Alistair Popple 2021-06-07  1745  		/* Unexpected PMD-mapped THP? */
80e54e5e679c95 Alistair Popple 2021-06-07  1746  		VM_BUG_ON_PAGE(!pvmw.pte, page);
80e54e5e679c95 Alistair Popple 2021-06-07  1747  
80e54e5e679c95 Alistair Popple 2021-06-07  1748  		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
80e54e5e679c95 Alistair Popple 2021-06-07  1749  		address = pvmw.address;
80e54e5e679c95 Alistair Popple 2021-06-07  1750  
80e54e5e679c95 Alistair Popple 2021-06-07  1751  		if (PageHuge(page) && !PageAnon(page)) {
80e54e5e679c95 Alistair Popple 2021-06-07  1752  			/*
80e54e5e679c95 Alistair Popple 2021-06-07  1753  			 * To call huge_pmd_unshare, i_mmap_rwsem must be
80e54e5e679c95 Alistair Popple 2021-06-07  1754  			 * held in write mode.  Caller needs to explicitly
80e54e5e679c95 Alistair Popple 2021-06-07  1755  			 * do this outside rmap routines.
80e54e5e679c95 Alistair Popple 2021-06-07  1756  			 */
80e54e5e679c95 Alistair Popple 2021-06-07  1757  			VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
80e54e5e679c95 Alistair Popple 2021-06-07  1758  			if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
80e54e5e679c95 Alistair Popple 2021-06-07  1759  				/*
80e54e5e679c95 Alistair Popple 2021-06-07  1760  				 * huge_pmd_unshare unmapped an entire PMD
80e54e5e679c95 Alistair Popple 2021-06-07  1761  				 * page.  There is no way of knowing exactly
80e54e5e679c95 Alistair Popple 2021-06-07  1762  				 * which PMDs may be cached for this mm, so
80e54e5e679c95 Alistair Popple 2021-06-07  1763  				 * we must flush them all.  start/end were
80e54e5e679c95 Alistair Popple 2021-06-07  1764  				 * already adjusted above to cover this range.
80e54e5e679c95 Alistair Popple 2021-06-07  1765  				 */
80e54e5e679c95 Alistair Popple 2021-06-07  1766  				flush_cache_range(vma, range.start, range.end);
80e54e5e679c95 Alistair Popple 2021-06-07  1767  				flush_tlb_range(vma, range.start, range.end);
80e54e5e679c95 Alistair Popple 2021-06-07  1768  				mmu_notifier_invalidate_range(mm, range.start,
80e54e5e679c95 Alistair Popple 2021-06-07  1769  							      range.end);
80e54e5e679c95 Alistair Popple 2021-06-07  1770  
80e54e5e679c95 Alistair Popple 2021-06-07  1771  				/*
80e54e5e679c95 Alistair Popple 2021-06-07  1772  				 * The ref count of the PMD page was dropped
80e54e5e679c95 Alistair Popple 2021-06-07  1773  				 * which is part of the way map counting
80e54e5e679c95 Alistair Popple 2021-06-07  1774  				 * is done for shared PMDs.  Return 'true'
80e54e5e679c95 Alistair Popple 2021-06-07  1775  				 * here.  When there is no other sharing,
80e54e5e679c95 Alistair Popple 2021-06-07  1776  				 * huge_pmd_unshare returns false and we will
80e54e5e679c95 Alistair Popple 2021-06-07  1777  				 * unmap the actual page and drop map count
80e54e5e679c95 Alistair Popple 2021-06-07  1778  				 * to zero.
80e54e5e679c95 Alistair Popple 2021-06-07  1779  				 */
80e54e5e679c95 Alistair Popple 2021-06-07  1780  				page_vma_mapped_walk_done(&pvmw);
80e54e5e679c95 Alistair Popple 2021-06-07  1781  				break;
80e54e5e679c95 Alistair Popple 2021-06-07  1782  			}
80e54e5e679c95 Alistair Popple 2021-06-07  1783  		}
80e54e5e679c95 Alistair Popple 2021-06-07  1784  
80e54e5e679c95 Alistair Popple 2021-06-07  1785  		/* Nuke the page table entry. */
80e54e5e679c95 Alistair Popple 2021-06-07  1786  		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
80e54e5e679c95 Alistair Popple 2021-06-07  1787  		pteval = ptep_clear_flush(vma, address, pvmw.pte);
80e54e5e679c95 Alistair Popple 2021-06-07  1788  
80e54e5e679c95 Alistair Popple 2021-06-07  1789  		/* Move the dirty bit to the page. Now the pte is gone. */
80e54e5e679c95 Alistair Popple 2021-06-07  1790  		if (pte_dirty(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07  1791  			set_page_dirty(page);
80e54e5e679c95 Alistair Popple 2021-06-07  1792  
80e54e5e679c95 Alistair Popple 2021-06-07  1793  		/* Update high watermark before we lower rss */
80e54e5e679c95 Alistair Popple 2021-06-07  1794  		update_hiwater_rss(mm);
80e54e5e679c95 Alistair Popple 2021-06-07  1795  
80e54e5e679c95 Alistair Popple 2021-06-07  1796  		if (is_zone_device_page(page)) {
80e54e5e679c95 Alistair Popple 2021-06-07  1797  			swp_entry_t entry;
80e54e5e679c95 Alistair Popple 2021-06-07  1798  			pte_t swp_pte;
80e54e5e679c95 Alistair Popple 2021-06-07  1799  
80e54e5e679c95 Alistair Popple 2021-06-07  1800  			/*
80e54e5e679c95 Alistair Popple 2021-06-07  1801  			 * Store the pfn of the page in a special migration
80e54e5e679c95 Alistair Popple 2021-06-07  1802  			 * pte. do_swap_page() will wait until the migration
80e54e5e679c95 Alistair Popple 2021-06-07  1803  			 * pte is removed and then restart fault handling.
80e54e5e679c95 Alistair Popple 2021-06-07  1804  			 */
80e54e5e679c95 Alistair Popple 2021-06-07  1805  			entry = make_readable_migration_entry(
80e54e5e679c95 Alistair Popple 2021-06-07  1806  							page_to_pfn(page));
80e54e5e679c95 Alistair Popple 2021-06-07  1807  			swp_pte = swp_entry_to_pte(entry);
80e54e5e679c95 Alistair Popple 2021-06-07  1808  
80e54e5e679c95 Alistair Popple 2021-06-07  1809  			/*
80e54e5e679c95 Alistair Popple 2021-06-07  1810  			 * pteval maps a zone device page and is therefore
80e54e5e679c95 Alistair Popple 2021-06-07  1811  			 * a swap pte.
80e54e5e679c95 Alistair Popple 2021-06-07  1812  			 */
80e54e5e679c95 Alistair Popple 2021-06-07  1813  			if (pte_swp_soft_dirty(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07  1814  				swp_pte = pte_swp_mksoft_dirty(swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07  1815  			if (pte_swp_uffd_wp(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07  1816  				swp_pte = pte_swp_mkuffd_wp(swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07  1817  			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07  1818  			/*
80e54e5e679c95 Alistair Popple 2021-06-07  1819  			 * No need to invalidate here it will synchronize on
80e54e5e679c95 Alistair Popple 2021-06-07  1820  			 * against the special swap migration pte.
80e54e5e679c95 Alistair Popple 2021-06-07  1821  			 *
80e54e5e679c95 Alistair Popple 2021-06-07  1822  			 * The assignment to subpage above was computed from a
80e54e5e679c95 Alistair Popple 2021-06-07  1823  			 * swap PTE which results in an invalid pointer.
80e54e5e679c95 Alistair Popple 2021-06-07  1824  			 * Since only PAGE_SIZE pages can currently be
80e54e5e679c95 Alistair Popple 2021-06-07  1825  			 * migrated, just set it to page. This will need to be
80e54e5e679c95 Alistair Popple 2021-06-07  1826  			 * changed when hugepage migrations to device private
80e54e5e679c95 Alistair Popple 2021-06-07  1827  			 * memory are supported.
80e54e5e679c95 Alistair Popple 2021-06-07  1828  			 */
80e54e5e679c95 Alistair Popple 2021-06-07  1829  			subpage = page;
80e54e5e679c95 Alistair Popple 2021-06-07  1830  		} else if (PageHWPoison(page)) {
80e54e5e679c95 Alistair Popple 2021-06-07  1831  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
80e54e5e679c95 Alistair Popple 2021-06-07  1832  			if (PageHuge(page)) {
80e54e5e679c95 Alistair Popple 2021-06-07  1833  				hugetlb_count_sub(compound_nr(page), mm);
80e54e5e679c95 Alistair Popple 2021-06-07  1834  				set_huge_swap_pte_at(mm, address,
80e54e5e679c95 Alistair Popple 2021-06-07  1835  						     pvmw.pte, pteval,
80e54e5e679c95 Alistair Popple 2021-06-07  1836  						     vma_mmu_pagesize(vma));
80e54e5e679c95 Alistair Popple 2021-06-07  1837  			} else {
80e54e5e679c95 Alistair Popple 2021-06-07  1838  				dec_mm_counter(mm, mm_counter(page));
80e54e5e679c95 Alistair Popple 2021-06-07  1839  				set_pte_at(mm, address, pvmw.pte, pteval);
80e54e5e679c95 Alistair Popple 2021-06-07  1840  			}
80e54e5e679c95 Alistair Popple 2021-06-07  1841  
80e54e5e679c95 Alistair Popple 2021-06-07  1842  		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
80e54e5e679c95 Alistair Popple 2021-06-07  1843  			/*
80e54e5e679c95 Alistair Popple 2021-06-07  1844  			 * The guest indicated that the page content is of no
80e54e5e679c95 Alistair Popple 2021-06-07  1845  			 * interest anymore. Simply discard the pte, vmscan
80e54e5e679c95 Alistair Popple 2021-06-07  1846  			 * will take care of the rest.
80e54e5e679c95 Alistair Popple 2021-06-07  1847  			 * A future reference will then fault in a new zero
80e54e5e679c95 Alistair Popple 2021-06-07  1848  			 * page. When userfaultfd is active, we must not drop
80e54e5e679c95 Alistair Popple 2021-06-07  1849  			 * this page though, as its main user (postcopy
80e54e5e679c95 Alistair Popple 2021-06-07  1850  			 * migration) will not expect userfaults on already
80e54e5e679c95 Alistair Popple 2021-06-07  1851  			 * copied pages.
80e54e5e679c95 Alistair Popple 2021-06-07  1852  			 */
80e54e5e679c95 Alistair Popple 2021-06-07  1853  			dec_mm_counter(mm, mm_counter(page));
80e54e5e679c95 Alistair Popple 2021-06-07  1854  			/* We have to invalidate as we cleared the pte */
80e54e5e679c95 Alistair Popple 2021-06-07  1855  			mmu_notifier_invalidate_range(mm, address,
80e54e5e679c95 Alistair Popple 2021-06-07  1856  						      address + PAGE_SIZE);
80e54e5e679c95 Alistair Popple 2021-06-07  1857  		} else {
80e54e5e679c95 Alistair Popple 2021-06-07  1858  			swp_entry_t entry;
80e54e5e679c95 Alistair Popple 2021-06-07  1859  			pte_t swp_pte;
80e54e5e679c95 Alistair Popple 2021-06-07  1860  
80e54e5e679c95 Alistair Popple 2021-06-07  1861  			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
80e54e5e679c95 Alistair Popple 2021-06-07  1862  				set_pte_at(mm, address, pvmw.pte, pteval);
80e54e5e679c95 Alistair Popple 2021-06-07  1863  				ret = false;
80e54e5e679c95 Alistair Popple 2021-06-07  1864  				page_vma_mapped_walk_done(&pvmw);
80e54e5e679c95 Alistair Popple 2021-06-07  1865  				break;
80e54e5e679c95 Alistair Popple 2021-06-07  1866  			}
80e54e5e679c95 Alistair Popple 2021-06-07  1867  
80e54e5e679c95 Alistair Popple 2021-06-07  1868  			/*
80e54e5e679c95 Alistair Popple 2021-06-07  1869  			 * Store the pfn of the page in a special migration
80e54e5e679c95 Alistair Popple 2021-06-07  1870  			 * pte. do_swap_page() will wait until the migration
80e54e5e679c95 Alistair Popple 2021-06-07  1871  			 * pte is removed and then restart fault handling.
80e54e5e679c95 Alistair Popple 2021-06-07  1872  			 */
80e54e5e679c95 Alistair Popple 2021-06-07  1873  			if (pte_write(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07  1874  				entry = make_writable_migration_entry(
80e54e5e679c95 Alistair Popple 2021-06-07  1875  							page_to_pfn(subpage));
80e54e5e679c95 Alistair Popple 2021-06-07  1876  			else
80e54e5e679c95 Alistair Popple 2021-06-07  1877  				entry = make_readable_migration_entry(
80e54e5e679c95 Alistair Popple 2021-06-07  1878  							page_to_pfn(subpage));
80e54e5e679c95 Alistair Popple 2021-06-07  1879  
80e54e5e679c95 Alistair Popple 2021-06-07  1880  			swp_pte = swp_entry_to_pte(entry);
80e54e5e679c95 Alistair Popple 2021-06-07  1881  			if (pte_soft_dirty(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07  1882  				swp_pte = pte_swp_mksoft_dirty(swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07  1883  			if (pte_uffd_wp(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07  1884  				swp_pte = pte_swp_mkuffd_wp(swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07  1885  			set_pte_at(mm, address, pvmw.pte, swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07  1886  			/*
80e54e5e679c95 Alistair Popple 2021-06-07  1887  			 * No need to invalidate here it will synchronize on
80e54e5e679c95 Alistair Popple 2021-06-07  1888  			 * against the special swap migration pte.
80e54e5e679c95 Alistair Popple 2021-06-07  1889  			 */
80e54e5e679c95 Alistair Popple 2021-06-07  1890  		}
80e54e5e679c95 Alistair Popple 2021-06-07  1891  
80e54e5e679c95 Alistair Popple 2021-06-07  1892  		/*
80e54e5e679c95 Alistair Popple 2021-06-07  1893  		 * No need to call mmu_notifier_invalidate_range() it has be
80e54e5e679c95 Alistair Popple 2021-06-07  1894  		 * done above for all cases requiring it to happen under page
80e54e5e679c95 Alistair Popple 2021-06-07  1895  		 * table lock before mmu_notifier_invalidate_range_end()
80e54e5e679c95 Alistair Popple 2021-06-07  1896  		 *
80e54e5e679c95 Alistair Popple 2021-06-07  1897  		 * See Documentation/vm/mmu_notifier.rst
80e54e5e679c95 Alistair Popple 2021-06-07  1898  		 */
80e54e5e679c95 Alistair Popple 2021-06-07 @1899  		page_remove_rmap(subpage, PageHuge(page));
80e54e5e679c95 Alistair Popple 2021-06-07  1900  		put_page(page);
80e54e5e679c95 Alistair Popple 2021-06-07  1901  	}
80e54e5e679c95 Alistair Popple 2021-06-07  1902  
80e54e5e679c95 Alistair Popple 2021-06-07  1903  	mmu_notifier_invalidate_range_end(&range);
80e54e5e679c95 Alistair Popple 2021-06-07  1904  
80e54e5e679c95 Alistair Popple 2021-06-07  1905  	return ret;
80e54e5e679c95 Alistair Popple 2021-06-07  1906  }
80e54e5e679c95 Alistair Popple 2021-06-07  1907  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 37498 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-06-09 17:43 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-06-09 17:43 [PATCH v10 04/10] mm/rmap: Split migration into its own function kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2021-06-07  7:58 [PATCH v10 00/10] Add support for SVM atomics in Nouveau Alistair Popple
2021-06-07  7:58 ` [PATCH v10 04/10] mm/rmap: Split migration into its own function Alistair Popple
2021-06-07  7:58   ` Alistair Popple

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.