* [PATCH v10 04/10] mm/rmap: Split migration into its own function
2021-06-07 7:58 [PATCH v10 00/10] Add support for SVM atomics in Nouveau Alistair Popple
@ 2021-06-07 7:58 ` Alistair Popple
0 siblings, 0 replies; 3+ messages in thread
From: Alistair Popple @ 2021-06-07 7:58 UTC (permalink / raw)
To: linux-mm, akpm
Cc: rcampbell, linux-doc, nouveau, hughd, linux-kernel, dri-devel,
hch, bskeggs, jgg, peterx, shakeelb, jhubbard, willy,
Alistair Popple, Christoph Hellwig
Migration is currently implemented as a mode of operation for
try_to_unmap_one() generally specified by passing the TTU_MIGRATION flag
or in the case of splitting a huge anonymous page TTU_SPLIT_FREEZE.
However it does not have much in common with the rest of the unmap
functionality of try_to_unmap_one() and thus splitting it into a
separate function reduces the complexity of try_to_unmap_one() making it
more readable.
Several simplifications can also be made in try_to_migrate_one() based
on the following observations:
- All users of TTU_MIGRATION also set TTU_IGNORE_MLOCK.
- No users of TTU_MIGRATION ever set TTU_IGNORE_HWPOISON.
- No users of TTU_MIGRATION ever set TTU_BATCH_FLUSH.
TTU_SPLIT_FREEZE is a special case of migration used when splitting an
anonymous page. This is most easily dealt with by calling the correct
function from unmap_page() in mm/huge_memory.c - either
try_to_migrate() for PageAnon or try_to_unmap().
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
---
v5:
* Added comments about how PMD splitting works for migration vs.
unmapping
* Tightened up the flag check in try_to_migrate() to be explicit about
which TTU_XXX flags are supported.
---
include/linux/rmap.h | 4 +-
mm/huge_memory.c | 15 +-
mm/migrate.c | 9 +-
mm/rmap.c | 358 ++++++++++++++++++++++++++++++++-----------
4 files changed, 280 insertions(+), 106 deletions(-)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 38a746787c2f..0e25d829f742 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -86,8 +86,6 @@ struct anon_vma_chain {
};
enum ttu_flags {
- TTU_MIGRATION = 0x1, /* migration mode */
-
TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */
TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */
TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */
@@ -96,7 +94,6 @@ enum ttu_flags {
* do a final flush if necessary */
TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
* caller holds it */
- TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
};
#ifdef CONFIG_MMU
@@ -193,6 +190,7 @@ static inline void page_dup_rmap(struct page *page, bool compound)
int page_referenced(struct page *, int is_locked,
struct mem_cgroup *memcg, unsigned long *vm_flags);
+bool try_to_migrate(struct page *page, enum ttu_flags flags);
bool try_to_unmap(struct page *, enum ttu_flags flags);
/* Avoid racy checks */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ec6dab72217..6dddc75b89ee 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2345,16 +2345,21 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
- enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
- TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+ enum ttu_flags ttu_flags = TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
bool unmap_success;
VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page))
- ttu_flags |= TTU_SPLIT_FREEZE;
-
- unmap_success = try_to_unmap(page, ttu_flags);
+ unmap_success = try_to_migrate(page, ttu_flags);
+ else
+ /*
+ * Don't install migration entries for file backed pages. This
+ * helps handle cases when i_size is in the middle of the page
+ * as there is no need to unmap pages beyond i_size manually.
+ */
+ unmap_success = try_to_unmap(page, ttu_flags |
+ TTU_IGNORE_MLOCK);
VM_BUG_ON_PAGE(!unmap_success, page);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 930de919b1f2..05740f816bc4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1103,7 +1103,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
/* Establish migration ptes */
VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
page);
- try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
+ try_to_migrate(page, 0);
page_was_mapped = 1;
}
@@ -1305,7 +1305,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
if (page_mapped(hpage)) {
bool mapping_locked = false;
- enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
+ enum ttu_flags ttu = 0;
if (!PageAnon(hpage)) {
/*
@@ -1322,7 +1322,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
ttu |= TTU_RMAP_LOCKED;
}
- try_to_unmap(hpage, ttu);
+ try_to_migrate(hpage, ttu);
page_was_mapped = 1;
if (mapping_locked)
@@ -2712,7 +2712,6 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
*/
static void migrate_vma_unmap(struct migrate_vma *migrate)
{
- int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
const unsigned long npages = migrate->npages;
const unsigned long start = migrate->start;
unsigned long addr, i, restore = 0;
@@ -2724,7 +2723,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
continue;
if (page_mapped(page)) {
- try_to_unmap(page, flags);
+ try_to_migrate(page, 0);
if (page_mapped(page))
goto restore;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index b6c50df08b3b..be0450d905cd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1405,14 +1405,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
- if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
- is_zone_device_page(page) && !is_device_private_page(page))
- return true;
-
- if (flags & TTU_SPLIT_HUGE_PMD) {
- split_huge_pmd_address(vma, address,
- flags & TTU_SPLIT_FREEZE, page);
- }
+ if (flags & TTU_SPLIT_HUGE_PMD)
+ split_huge_pmd_address(vma, address, false, page);
/*
* For THP, we have to assume the worse case ie pmd for invalidation.
@@ -1436,16 +1430,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
- /* PMD-mapped THP migration entry */
- if (!pvmw.pte && (flags & TTU_MIGRATION)) {
- VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
-
- set_pmd_migration_entry(&pvmw, page);
- continue;
- }
-#endif
-
/*
* If the page is mlock()d, we cannot swap it out.
* If it's recently referenced (perhaps page_referenced
@@ -1507,46 +1491,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
}
- if (IS_ENABLED(CONFIG_MIGRATION) &&
- (flags & TTU_MIGRATION) &&
- is_zone_device_page(page)) {
- swp_entry_t entry;
- pte_t swp_pte;
-
- pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
-
- /*
- * Store the pfn of the page in a special migration
- * pte. do_swap_page() will wait until the migration
- * pte is removed and then restart fault handling.
- */
- entry = make_readable_migration_entry(page_to_pfn(page));
- swp_pte = swp_entry_to_pte(entry);
-
- /*
- * pteval maps a zone device page and is therefore
- * a swap pte.
- */
- if (pte_swp_soft_dirty(pteval))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_swp_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
- set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
- /*
- * No need to invalidate here it will synchronize on
- * against the special swap migration pte.
- *
- * The assignment to subpage above was computed from a
- * swap PTE which results in an invalid pointer.
- * Since only PAGE_SIZE pages can currently be
- * migrated, just set it to page. This will need to be
- * changed when hugepage migrations to device private
- * memory are supported.
- */
- subpage = page;
- goto discard;
- }
-
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
if (should_defer_flush(mm, flags)) {
@@ -1599,39 +1543,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
/* We have to invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm, address,
address + PAGE_SIZE);
- } else if (IS_ENABLED(CONFIG_MIGRATION) &&
- (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
- swp_entry_t entry;
- pte_t swp_pte;
-
- if (arch_unmap_one(mm, vma, address, pteval) < 0) {
- set_pte_at(mm, address, pvmw.pte, pteval);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
-
- /*
- * Store the pfn of the page in a special migration
- * pte. do_swap_page() will wait until the migration
- * pte is removed and then restart fault handling.
- */
- if (pte_write(pteval))
- entry = make_writable_migration_entry(
- page_to_pfn(subpage));
- else
- entry = make_readable_migration_entry(
- page_to_pfn(subpage));
- swp_pte = swp_entry_to_pte(entry);
- if (pte_soft_dirty(pteval))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
- set_pte_at(mm, address, pvmw.pte, swp_pte);
- /*
- * No need to invalidate here it will synchronize on
- * against the special swap migration pte.
- */
} else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(subpage) };
pte_t swp_pte;
@@ -1758,6 +1669,268 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
.anon_lock = page_lock_anon_vma_read,
};
+ if (flags & TTU_RMAP_LOCKED)
+ rmap_walk_locked(page, &rwc);
+ else
+ rmap_walk(page, &rwc);
+
+ return !page_mapcount(page) ? true : false;
+}
+
+/*
+ * @arg: enum ttu_flags will be passed to this argument.
+ *
+ * If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
+ * containing migration entries. This and TTU_RMAP_LOCKED are the only supported
+ * flags.
+ */
+static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
+ unsigned long address, void *arg)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ .address = address,
+ };
+ pte_t pteval;
+ struct page *subpage;
+ bool ret = true;
+ struct mmu_notifier_range range;
+ enum ttu_flags flags = (enum ttu_flags)(long)arg;
+
+ if (is_zone_device_page(page) && !is_device_private_page(page))
+ return true;
+
+ /*
+ * unmap_page() in mm/huge_memory.c is the only user of migration with
+ * TTU_SPLIT_HUGE_PMD and it wants to freeze.
+ */
+ if (flags & TTU_SPLIT_HUGE_PMD)
+ split_huge_pmd_address(vma, address, true, page);
+
+ /*
+ * For THP, we have to assume the worse case ie pmd for invalidation.
+ * For hugetlb, it could be much worse if we need to do pud
+ * invalidation in the case of pmd sharing.
+ *
+ * Note that the page can not be free in this function as call of
+ * try_to_unmap() must hold a reference on the page.
+ */
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+ address,
+ min(vma->vm_end, address + page_size(page)));
+ if (PageHuge(page)) {
+ /*
+ * If sharing is possible, start and end will be adjusted
+ * accordingly.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &range.start,
+ &range.end);
+ }
+ mmu_notifier_invalidate_range_start(&range);
+
+ while (page_vma_mapped_walk(&pvmw)) {
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+ /* PMD-mapped THP migration entry */
+ if (!pvmw.pte) {
+ VM_BUG_ON_PAGE(PageHuge(page) ||
+ !PageTransCompound(page), page);
+
+ set_pmd_migration_entry(&pvmw, page);
+ continue;
+ }
+#endif
+
+ /* Unexpected PMD-mapped THP? */
+ VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+ subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+ address = pvmw.address;
+
+ if (PageHuge(page) && !PageAnon(page)) {
+ /*
+ * To call huge_pmd_unshare, i_mmap_rwsem must be
+ * held in write mode. Caller needs to explicitly
+ * do this outside rmap routines.
+ */
+ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
+ if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
+ /*
+ * huge_pmd_unshare unmapped an entire PMD
+ * page. There is no way of knowing exactly
+ * which PMDs may be cached for this mm, so
+ * we must flush them all. start/end were
+ * already adjusted above to cover this range.
+ */
+ flush_cache_range(vma, range.start, range.end);
+ flush_tlb_range(vma, range.start, range.end);
+ mmu_notifier_invalidate_range(mm, range.start,
+ range.end);
+
+ /*
+ * The ref count of the PMD page was dropped
+ * which is part of the way map counting
+ * is done for shared PMDs. Return 'true'
+ * here. When there is no other sharing,
+ * huge_pmd_unshare returns false and we will
+ * unmap the actual page and drop map count
+ * to zero.
+ */
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ }
+
+ /* Nuke the page table entry. */
+ flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+ pteval = ptep_clear_flush(vma, address, pvmw.pte);
+
+ /* Move the dirty bit to the page. Now the pte is gone. */
+ if (pte_dirty(pteval))
+ set_page_dirty(page);
+
+ /* Update high watermark before we lower rss */
+ update_hiwater_rss(mm);
+
+ if (is_zone_device_page(page)) {
+ swp_entry_t entry;
+ pte_t swp_pte;
+
+ /*
+ * Store the pfn of the page in a special migration
+ * pte. do_swap_page() will wait until the migration
+ * pte is removed and then restart fault handling.
+ */
+ entry = make_readable_migration_entry(
+ page_to_pfn(page));
+ swp_pte = swp_entry_to_pte(entry);
+
+ /*
+ * pteval maps a zone device page and is therefore
+ * a swap pte.
+ */
+ if (pte_swp_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_swp_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
+ /*
+ * No need to invalidate here it will synchronize on
+ * against the special swap migration pte.
+ *
+ * The assignment to subpage above was computed from a
+ * swap PTE which results in an invalid pointer.
+ * Since only PAGE_SIZE pages can currently be
+ * migrated, just set it to page. This will need to be
+ * changed when hugepage migrations to device private
+ * memory are supported.
+ */
+ subpage = page;
+ } else if (PageHWPoison(page)) {
+ pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+ if (PageHuge(page)) {
+ hugetlb_count_sub(compound_nr(page), mm);
+ set_huge_swap_pte_at(mm, address,
+ pvmw.pte, pteval,
+ vma_mmu_pagesize(vma));
+ } else {
+ dec_mm_counter(mm, mm_counter(page));
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ }
+
+ } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+ /*
+ * The guest indicated that the page content is of no
+ * interest anymore. Simply discard the pte, vmscan
+ * will take care of the rest.
+ * A future reference will then fault in a new zero
+ * page. When userfaultfd is active, we must not drop
+ * this page though, as its main user (postcopy
+ * migration) will not expect userfaults on already
+ * copied pages.
+ */
+ dec_mm_counter(mm, mm_counter(page));
+ /* We have to invalidate as we cleared the pte */
+ mmu_notifier_invalidate_range(mm, address,
+ address + PAGE_SIZE);
+ } else {
+ swp_entry_t entry;
+ pte_t swp_pte;
+
+ if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+
+ /*
+ * Store the pfn of the page in a special migration
+ * pte. do_swap_page() will wait until the migration
+ * pte is removed and then restart fault handling.
+ */
+ if (pte_write(pteval))
+ entry = make_writable_migration_entry(
+ page_to_pfn(subpage));
+ else
+ entry = make_readable_migration_entry(
+ page_to_pfn(subpage));
+
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ set_pte_at(mm, address, pvmw.pte, swp_pte);
+ /*
+ * No need to invalidate here it will synchronize on
+ * against the special swap migration pte.
+ */
+ }
+
+ /*
+ * No need to call mmu_notifier_invalidate_range() it has be
+ * done above for all cases requiring it to happen under page
+ * table lock before mmu_notifier_invalidate_range_end()
+ *
+ * See Documentation/vm/mmu_notifier.rst
+ */
+ page_remove_rmap(subpage, PageHuge(page));
+ put_page(page);
+ }
+
+ mmu_notifier_invalidate_range_end(&range);
+
+ return ret;
+}
+
+/**
+ * try_to_migrate - try to replace all page table mappings with swap entries
+ * @page: the page to replace page table entries for
+ * @flags: action and flags
+ *
+ * Tries to remove all the page table entries which are mapping this page and
+ * replace them with special swap entries. Caller must hold the page lock.
+ *
+ * If is successful, return true. Otherwise, false.
+ */
+bool try_to_migrate(struct page *page, enum ttu_flags flags)
+{
+ struct rmap_walk_control rwc = {
+ .rmap_one = try_to_migrate_one,
+ .arg = (void *)flags,
+ .done = page_not_mapped,
+ .anon_lock = page_lock_anon_vma_read,
+ };
+
+ /*
+ * Migration always ignores mlock and only supports TTU_RMAP_LOCKED and
+ * TTU_SPLIT_HUGE_PMD flags.
+ */
+ if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD)))
+ return false;
+
/*
* During exec, a temporary VMA is setup and later moved.
* The VMA is moved under the anon_vma lock but not the
@@ -1766,8 +1939,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
* locking requirements of exec(), migration skips
* temporary VMAs until after exec() completes.
*/
- if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
- && !PageKsm(page) && PageAnon(page))
+ if (!PageKsm(page) && PageAnon(page))
rwc.invalid_vma = invalid_migration_vma;
if (flags & TTU_RMAP_LOCKED)
--
2.20.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v10 04/10] mm/rmap: Split migration into its own function
@ 2021-06-07 7:58 ` Alistair Popple
0 siblings, 0 replies; 3+ messages in thread
From: Alistair Popple @ 2021-06-07 7:58 UTC (permalink / raw)
To: linux-mm, akpm
Cc: rcampbell, willy, linux-doc, nouveau, Alistair Popple, hughd,
linux-kernel, dri-devel, hch, peterx, shakeelb, bskeggs, jgg,
jhubbard, Christoph Hellwig
Migration is currently implemented as a mode of operation for
try_to_unmap_one() generally specified by passing the TTU_MIGRATION flag
or in the case of splitting a huge anonymous page TTU_SPLIT_FREEZE.
However it does not have much in common with the rest of the unmap
functionality of try_to_unmap_one() and thus splitting it into a
separate function reduces the complexity of try_to_unmap_one() making it
more readable.
Several simplifications can also be made in try_to_migrate_one() based
on the following observations:
- All users of TTU_MIGRATION also set TTU_IGNORE_MLOCK.
- No users of TTU_MIGRATION ever set TTU_IGNORE_HWPOISON.
- No users of TTU_MIGRATION ever set TTU_BATCH_FLUSH.
TTU_SPLIT_FREEZE is a special case of migration used when splitting an
anonymous page. This is most easily dealt with by calling the correct
function from unmap_page() in mm/huge_memory.c - either
try_to_migrate() for PageAnon or try_to_unmap().
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
---
v5:
* Added comments about how PMD splitting works for migration vs.
unmapping
* Tightened up the flag check in try_to_migrate() to be explicit about
which TTU_XXX flags are supported.
---
include/linux/rmap.h | 4 +-
mm/huge_memory.c | 15 +-
mm/migrate.c | 9 +-
mm/rmap.c | 358 ++++++++++++++++++++++++++++++++-----------
4 files changed, 280 insertions(+), 106 deletions(-)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 38a746787c2f..0e25d829f742 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -86,8 +86,6 @@ struct anon_vma_chain {
};
enum ttu_flags {
- TTU_MIGRATION = 0x1, /* migration mode */
-
TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */
TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */
TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */
@@ -96,7 +94,6 @@ enum ttu_flags {
* do a final flush if necessary */
TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
* caller holds it */
- TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
};
#ifdef CONFIG_MMU
@@ -193,6 +190,7 @@ static inline void page_dup_rmap(struct page *page, bool compound)
int page_referenced(struct page *, int is_locked,
struct mem_cgroup *memcg, unsigned long *vm_flags);
+bool try_to_migrate(struct page *page, enum ttu_flags flags);
bool try_to_unmap(struct page *, enum ttu_flags flags);
/* Avoid racy checks */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ec6dab72217..6dddc75b89ee 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2345,16 +2345,21 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
- enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
- TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+ enum ttu_flags ttu_flags = TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
bool unmap_success;
VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page))
- ttu_flags |= TTU_SPLIT_FREEZE;
-
- unmap_success = try_to_unmap(page, ttu_flags);
+ unmap_success = try_to_migrate(page, ttu_flags);
+ else
+ /*
+ * Don't install migration entries for file backed pages. This
+ * helps handle cases when i_size is in the middle of the page
+ * as there is no need to unmap pages beyond i_size manually.
+ */
+ unmap_success = try_to_unmap(page, ttu_flags |
+ TTU_IGNORE_MLOCK);
VM_BUG_ON_PAGE(!unmap_success, page);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 930de919b1f2..05740f816bc4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1103,7 +1103,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
/* Establish migration ptes */
VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
page);
- try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
+ try_to_migrate(page, 0);
page_was_mapped = 1;
}
@@ -1305,7 +1305,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
if (page_mapped(hpage)) {
bool mapping_locked = false;
- enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
+ enum ttu_flags ttu = 0;
if (!PageAnon(hpage)) {
/*
@@ -1322,7 +1322,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
ttu |= TTU_RMAP_LOCKED;
}
- try_to_unmap(hpage, ttu);
+ try_to_migrate(hpage, ttu);
page_was_mapped = 1;
if (mapping_locked)
@@ -2712,7 +2712,6 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
*/
static void migrate_vma_unmap(struct migrate_vma *migrate)
{
- int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
const unsigned long npages = migrate->npages;
const unsigned long start = migrate->start;
unsigned long addr, i, restore = 0;
@@ -2724,7 +2723,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
continue;
if (page_mapped(page)) {
- try_to_unmap(page, flags);
+ try_to_migrate(page, 0);
if (page_mapped(page))
goto restore;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index b6c50df08b3b..be0450d905cd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1405,14 +1405,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
- if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
- is_zone_device_page(page) && !is_device_private_page(page))
- return true;
-
- if (flags & TTU_SPLIT_HUGE_PMD) {
- split_huge_pmd_address(vma, address,
- flags & TTU_SPLIT_FREEZE, page);
- }
+ if (flags & TTU_SPLIT_HUGE_PMD)
+ split_huge_pmd_address(vma, address, false, page);
/*
* For THP, we have to assume the worse case ie pmd for invalidation.
@@ -1436,16 +1430,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
- /* PMD-mapped THP migration entry */
- if (!pvmw.pte && (flags & TTU_MIGRATION)) {
- VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
-
- set_pmd_migration_entry(&pvmw, page);
- continue;
- }
-#endif
-
/*
* If the page is mlock()d, we cannot swap it out.
* If it's recently referenced (perhaps page_referenced
@@ -1507,46 +1491,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
}
- if (IS_ENABLED(CONFIG_MIGRATION) &&
- (flags & TTU_MIGRATION) &&
- is_zone_device_page(page)) {
- swp_entry_t entry;
- pte_t swp_pte;
-
- pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
-
- /*
- * Store the pfn of the page in a special migration
- * pte. do_swap_page() will wait until the migration
- * pte is removed and then restart fault handling.
- */
- entry = make_readable_migration_entry(page_to_pfn(page));
- swp_pte = swp_entry_to_pte(entry);
-
- /*
- * pteval maps a zone device page and is therefore
- * a swap pte.
- */
- if (pte_swp_soft_dirty(pteval))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_swp_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
- set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
- /*
- * No need to invalidate here it will synchronize on
- * against the special swap migration pte.
- *
- * The assignment to subpage above was computed from a
- * swap PTE which results in an invalid pointer.
- * Since only PAGE_SIZE pages can currently be
- * migrated, just set it to page. This will need to be
- * changed when hugepage migrations to device private
- * memory are supported.
- */
- subpage = page;
- goto discard;
- }
-
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
if (should_defer_flush(mm, flags)) {
@@ -1599,39 +1543,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
/* We have to invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm, address,
address + PAGE_SIZE);
- } else if (IS_ENABLED(CONFIG_MIGRATION) &&
- (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
- swp_entry_t entry;
- pte_t swp_pte;
-
- if (arch_unmap_one(mm, vma, address, pteval) < 0) {
- set_pte_at(mm, address, pvmw.pte, pteval);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
-
- /*
- * Store the pfn of the page in a special migration
- * pte. do_swap_page() will wait until the migration
- * pte is removed and then restart fault handling.
- */
- if (pte_write(pteval))
- entry = make_writable_migration_entry(
- page_to_pfn(subpage));
- else
- entry = make_readable_migration_entry(
- page_to_pfn(subpage));
- swp_pte = swp_entry_to_pte(entry);
- if (pte_soft_dirty(pteval))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
- set_pte_at(mm, address, pvmw.pte, swp_pte);
- /*
- * No need to invalidate here it will synchronize on
- * against the special swap migration pte.
- */
} else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(subpage) };
pte_t swp_pte;
@@ -1758,6 +1669,268 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
.anon_lock = page_lock_anon_vma_read,
};
+ if (flags & TTU_RMAP_LOCKED)
+ rmap_walk_locked(page, &rwc);
+ else
+ rmap_walk(page, &rwc);
+
+ return !page_mapcount(page) ? true : false;
+}
+
+/*
+ * @arg: enum ttu_flags will be passed to this argument.
+ *
+ * If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
+ * containing migration entries. This and TTU_RMAP_LOCKED are the only supported
+ * flags.
+ */
+static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
+ unsigned long address, void *arg)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ .address = address,
+ };
+ pte_t pteval;
+ struct page *subpage;
+ bool ret = true;
+ struct mmu_notifier_range range;
+ enum ttu_flags flags = (enum ttu_flags)(long)arg;
+
+ if (is_zone_device_page(page) && !is_device_private_page(page))
+ return true;
+
+ /*
+ * unmap_page() in mm/huge_memory.c is the only user of migration with
+ * TTU_SPLIT_HUGE_PMD and it wants to freeze.
+ */
+ if (flags & TTU_SPLIT_HUGE_PMD)
+ split_huge_pmd_address(vma, address, true, page);
+
+ /*
+ * For THP, we have to assume the worse case ie pmd for invalidation.
+ * For hugetlb, it could be much worse if we need to do pud
+ * invalidation in the case of pmd sharing.
+ *
+ * Note that the page can not be free in this function as call of
+ * try_to_unmap() must hold a reference on the page.
+ */
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+ address,
+ min(vma->vm_end, address + page_size(page)));
+ if (PageHuge(page)) {
+ /*
+ * If sharing is possible, start and end will be adjusted
+ * accordingly.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &range.start,
+ &range.end);
+ }
+ mmu_notifier_invalidate_range_start(&range);
+
+ while (page_vma_mapped_walk(&pvmw)) {
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+ /* PMD-mapped THP migration entry */
+ if (!pvmw.pte) {
+ VM_BUG_ON_PAGE(PageHuge(page) ||
+ !PageTransCompound(page), page);
+
+ set_pmd_migration_entry(&pvmw, page);
+ continue;
+ }
+#endif
+
+ /* Unexpected PMD-mapped THP? */
+ VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+ subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+ address = pvmw.address;
+
+ if (PageHuge(page) && !PageAnon(page)) {
+ /*
+ * To call huge_pmd_unshare, i_mmap_rwsem must be
+ * held in write mode. Caller needs to explicitly
+ * do this outside rmap routines.
+ */
+ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
+ if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
+ /*
+ * huge_pmd_unshare unmapped an entire PMD
+ * page. There is no way of knowing exactly
+ * which PMDs may be cached for this mm, so
+ * we must flush them all. start/end were
+ * already adjusted above to cover this range.
+ */
+ flush_cache_range(vma, range.start, range.end);
+ flush_tlb_range(vma, range.start, range.end);
+ mmu_notifier_invalidate_range(mm, range.start,
+ range.end);
+
+ /*
+ * The ref count of the PMD page was dropped
+ * which is part of the way map counting
+ * is done for shared PMDs. Return 'true'
+ * here. When there is no other sharing,
+ * huge_pmd_unshare returns false and we will
+ * unmap the actual page and drop map count
+ * to zero.
+ */
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ }
+
+ /* Nuke the page table entry. */
+ flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+ pteval = ptep_clear_flush(vma, address, pvmw.pte);
+
+ /* Move the dirty bit to the page. Now the pte is gone. */
+ if (pte_dirty(pteval))
+ set_page_dirty(page);
+
+ /* Update high watermark before we lower rss */
+ update_hiwater_rss(mm);
+
+ if (is_zone_device_page(page)) {
+ swp_entry_t entry;
+ pte_t swp_pte;
+
+ /*
+ * Store the pfn of the page in a special migration
+ * pte. do_swap_page() will wait until the migration
+ * pte is removed and then restart fault handling.
+ */
+ entry = make_readable_migration_entry(
+ page_to_pfn(page));
+ swp_pte = swp_entry_to_pte(entry);
+
+ /*
+ * pteval maps a zone device page and is therefore
+ * a swap pte.
+ */
+ if (pte_swp_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_swp_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
+ /*
+ * No need to invalidate here it will synchronize on
+ * against the special swap migration pte.
+ *
+ * The assignment to subpage above was computed from a
+ * swap PTE which results in an invalid pointer.
+ * Since only PAGE_SIZE pages can currently be
+ * migrated, just set it to page. This will need to be
+ * changed when hugepage migrations to device private
+ * memory are supported.
+ */
+ subpage = page;
+ } else if (PageHWPoison(page)) {
+ pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+ if (PageHuge(page)) {
+ hugetlb_count_sub(compound_nr(page), mm);
+ set_huge_swap_pte_at(mm, address,
+ pvmw.pte, pteval,
+ vma_mmu_pagesize(vma));
+ } else {
+ dec_mm_counter(mm, mm_counter(page));
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ }
+
+ } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+ /*
+ * The guest indicated that the page content is of no
+ * interest anymore. Simply discard the pte, vmscan
+ * will take care of the rest.
+ * A future reference will then fault in a new zero
+ * page. When userfaultfd is active, we must not drop
+ * this page though, as its main user (postcopy
+ * migration) will not expect userfaults on already
+ * copied pages.
+ */
+ dec_mm_counter(mm, mm_counter(page));
+ /* We have to invalidate as we cleared the pte */
+ mmu_notifier_invalidate_range(mm, address,
+ address + PAGE_SIZE);
+ } else {
+ swp_entry_t entry;
+ pte_t swp_pte;
+
+ if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+
+ /*
+ * Store the pfn of the page in a special migration
+ * pte. do_swap_page() will wait until the migration
+ * pte is removed and then restart fault handling.
+ */
+ if (pte_write(pteval))
+ entry = make_writable_migration_entry(
+ page_to_pfn(subpage));
+ else
+ entry = make_readable_migration_entry(
+ page_to_pfn(subpage));
+
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ set_pte_at(mm, address, pvmw.pte, swp_pte);
+ /*
+ * No need to invalidate here it will synchronize on
+ * against the special swap migration pte.
+ */
+ }
+
+ /*
+ * No need to call mmu_notifier_invalidate_range() it has be
+ * done above for all cases requiring it to happen under page
+ * table lock before mmu_notifier_invalidate_range_end()
+ *
+ * See Documentation/vm/mmu_notifier.rst
+ */
+ page_remove_rmap(subpage, PageHuge(page));
+ put_page(page);
+ }
+
+ mmu_notifier_invalidate_range_end(&range);
+
+ return ret;
+}
+
+/**
+ * try_to_migrate - try to replace all page table mappings with swap entries
+ * @page: the page to replace page table entries for
+ * @flags: action and flags
+ *
+ * Tries to remove all the page table entries which are mapping this page and
+ * replace them with special swap entries. Caller must hold the page lock.
+ *
+ * If is successful, return true. Otherwise, false.
+ */
+bool try_to_migrate(struct page *page, enum ttu_flags flags)
+{
+ struct rmap_walk_control rwc = {
+ .rmap_one = try_to_migrate_one,
+ .arg = (void *)flags,
+ .done = page_not_mapped,
+ .anon_lock = page_lock_anon_vma_read,
+ };
+
+ /*
+ * Migration always ignores mlock and only supports TTU_RMAP_LOCKED and
+ * TTU_SPLIT_HUGE_PMD flags.
+ */
+ if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD)))
+ return false;
+
/*
* During exec, a temporary VMA is setup and later moved.
* The VMA is moved under the anon_vma lock but not the
@@ -1766,8 +1939,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
* locking requirements of exec(), migration skips
* temporary VMAs until after exec() completes.
*/
- if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
- && !PageKsm(page) && PageAnon(page))
+ if (!PageKsm(page) && PageAnon(page))
rwc.invalid_vma = invalid_migration_vma;
if (flags & TTU_RMAP_LOCKED)
--
2.20.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v10 04/10] mm/rmap: Split migration into its own function
@ 2021-06-09 17:43 kernel test robot
0 siblings, 0 replies; 3+ messages in thread
From: kernel test robot @ 2021-06-09 17:43 UTC (permalink / raw)
To: kbuild
[-- Attachment #1: Type: text/plain, Size: 16246 bytes --]
CC: kbuild-all(a)lists.01.org
In-Reply-To: <20210607075855.5084-5-apopple@nvidia.com>
References: <20210607075855.5084-5-apopple@nvidia.com>
TO: Alistair Popple <apopple@nvidia.com>
TO: linux-mm(a)kvack.org
TO: akpm(a)linux-foundation.org
CC: rcampbell(a)nvidia.com
CC: linux-doc(a)vger.kernel.org
CC: nouveau(a)lists.freedesktop.org
CC: hughd(a)google.com
CC: linux-kernel(a)vger.kernel.org
CC: dri-devel(a)lists.freedesktop.org
CC: hch(a)infradead.org
CC: bskeggs(a)redhat.com
Hi Alistair,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on s390/features]
[also build test WARNING on kselftest/next linus/master v5.13-rc5]
[cannot apply to hnaz-linux-mm/master next-20210609]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Alistair-Popple/Add-support-for-SVM-atomics-in-Nouveau/20210607-160056
base: https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
:::::: branch date: 2 days ago
:::::: commit date: 2 days ago
config: parisc-randconfig-s032-20210607 (attached as .config)
compiler: hppa64-linux-gcc (GCC) 9.3.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.3-341-g8af24329-dirty
# https://github.com/0day-ci/linux/commit/80e54e5e679c95e425608a45721ca6cc30ae25f4
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Alistair-Popple/Add-support-for-SVM-atomics-in-Nouveau/20210607-160056
git checkout 80e54e5e679c95e425608a45721ca6cc30ae25f4
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=parisc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
sparse warnings: (new ones prefixed by >>)
mm/rmap.c: note: in included file (through include/linux/ksm.h):
include/linux/rmap.h:217:28: sparse: sparse: context imbalance in 'page_referenced_one' - unexpected unlock
mm/rmap.c:970:25: sparse: sparse: context imbalance in 'page_mkclean_one' - different lock contexts for basic block
include/linux/rmap.h:217:28: sparse: sparse: context imbalance in 'try_to_unmap_one' - unexpected unlock
>> mm/rmap.c:1899:17: sparse: sparse: context imbalance in 'try_to_migrate_one' - different lock contexts for basic block
include/linux/rmap.h:217:28: sparse: sparse: context imbalance in 'page_mlock_one' - unexpected unlock
vim +/try_to_migrate_one +1899 mm/rmap.c
80e54e5e679c95 Alistair Popple 2021-06-07 1744
80e54e5e679c95 Alistair Popple 2021-06-07 1745 /* Unexpected PMD-mapped THP? */
80e54e5e679c95 Alistair Popple 2021-06-07 1746 VM_BUG_ON_PAGE(!pvmw.pte, page);
80e54e5e679c95 Alistair Popple 2021-06-07 1747
80e54e5e679c95 Alistair Popple 2021-06-07 1748 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
80e54e5e679c95 Alistair Popple 2021-06-07 1749 address = pvmw.address;
80e54e5e679c95 Alistair Popple 2021-06-07 1750
80e54e5e679c95 Alistair Popple 2021-06-07 1751 if (PageHuge(page) && !PageAnon(page)) {
80e54e5e679c95 Alistair Popple 2021-06-07 1752 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1753 * To call huge_pmd_unshare, i_mmap_rwsem must be
80e54e5e679c95 Alistair Popple 2021-06-07 1754 * held in write mode. Caller needs to explicitly
80e54e5e679c95 Alistair Popple 2021-06-07 1755 * do this outside rmap routines.
80e54e5e679c95 Alistair Popple 2021-06-07 1756 */
80e54e5e679c95 Alistair Popple 2021-06-07 1757 VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
80e54e5e679c95 Alistair Popple 2021-06-07 1758 if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
80e54e5e679c95 Alistair Popple 2021-06-07 1759 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1760 * huge_pmd_unshare unmapped an entire PMD
80e54e5e679c95 Alistair Popple 2021-06-07 1761 * page. There is no way of knowing exactly
80e54e5e679c95 Alistair Popple 2021-06-07 1762 * which PMDs may be cached for this mm, so
80e54e5e679c95 Alistair Popple 2021-06-07 1763 * we must flush them all. start/end were
80e54e5e679c95 Alistair Popple 2021-06-07 1764 * already adjusted above to cover this range.
80e54e5e679c95 Alistair Popple 2021-06-07 1765 */
80e54e5e679c95 Alistair Popple 2021-06-07 1766 flush_cache_range(vma, range.start, range.end);
80e54e5e679c95 Alistair Popple 2021-06-07 1767 flush_tlb_range(vma, range.start, range.end);
80e54e5e679c95 Alistair Popple 2021-06-07 1768 mmu_notifier_invalidate_range(mm, range.start,
80e54e5e679c95 Alistair Popple 2021-06-07 1769 range.end);
80e54e5e679c95 Alistair Popple 2021-06-07 1770
80e54e5e679c95 Alistair Popple 2021-06-07 1771 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1772 * The ref count of the PMD page was dropped
80e54e5e679c95 Alistair Popple 2021-06-07 1773 * which is part of the way map counting
80e54e5e679c95 Alistair Popple 2021-06-07 1774 * is done for shared PMDs. Return 'true'
80e54e5e679c95 Alistair Popple 2021-06-07 1775 * here. When there is no other sharing,
80e54e5e679c95 Alistair Popple 2021-06-07 1776 * huge_pmd_unshare returns false and we will
80e54e5e679c95 Alistair Popple 2021-06-07 1777 * unmap the actual page and drop map count
80e54e5e679c95 Alistair Popple 2021-06-07 1778 * to zero.
80e54e5e679c95 Alistair Popple 2021-06-07 1779 */
80e54e5e679c95 Alistair Popple 2021-06-07 1780 page_vma_mapped_walk_done(&pvmw);
80e54e5e679c95 Alistair Popple 2021-06-07 1781 break;
80e54e5e679c95 Alistair Popple 2021-06-07 1782 }
80e54e5e679c95 Alistair Popple 2021-06-07 1783 }
80e54e5e679c95 Alistair Popple 2021-06-07 1784
80e54e5e679c95 Alistair Popple 2021-06-07 1785 /* Nuke the page table entry. */
80e54e5e679c95 Alistair Popple 2021-06-07 1786 flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
80e54e5e679c95 Alistair Popple 2021-06-07 1787 pteval = ptep_clear_flush(vma, address, pvmw.pte);
80e54e5e679c95 Alistair Popple 2021-06-07 1788
80e54e5e679c95 Alistair Popple 2021-06-07 1789 /* Move the dirty bit to the page. Now the pte is gone. */
80e54e5e679c95 Alistair Popple 2021-06-07 1790 if (pte_dirty(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07 1791 set_page_dirty(page);
80e54e5e679c95 Alistair Popple 2021-06-07 1792
80e54e5e679c95 Alistair Popple 2021-06-07 1793 /* Update high watermark before we lower rss */
80e54e5e679c95 Alistair Popple 2021-06-07 1794 update_hiwater_rss(mm);
80e54e5e679c95 Alistair Popple 2021-06-07 1795
80e54e5e679c95 Alistair Popple 2021-06-07 1796 if (is_zone_device_page(page)) {
80e54e5e679c95 Alistair Popple 2021-06-07 1797 swp_entry_t entry;
80e54e5e679c95 Alistair Popple 2021-06-07 1798 pte_t swp_pte;
80e54e5e679c95 Alistair Popple 2021-06-07 1799
80e54e5e679c95 Alistair Popple 2021-06-07 1800 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1801 * Store the pfn of the page in a special migration
80e54e5e679c95 Alistair Popple 2021-06-07 1802 * pte. do_swap_page() will wait until the migration
80e54e5e679c95 Alistair Popple 2021-06-07 1803 * pte is removed and then restart fault handling.
80e54e5e679c95 Alistair Popple 2021-06-07 1804 */
80e54e5e679c95 Alistair Popple 2021-06-07 1805 entry = make_readable_migration_entry(
80e54e5e679c95 Alistair Popple 2021-06-07 1806 page_to_pfn(page));
80e54e5e679c95 Alistair Popple 2021-06-07 1807 swp_pte = swp_entry_to_pte(entry);
80e54e5e679c95 Alistair Popple 2021-06-07 1808
80e54e5e679c95 Alistair Popple 2021-06-07 1809 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1810 * pteval maps a zone device page and is therefore
80e54e5e679c95 Alistair Popple 2021-06-07 1811 * a swap pte.
80e54e5e679c95 Alistair Popple 2021-06-07 1812 */
80e54e5e679c95 Alistair Popple 2021-06-07 1813 if (pte_swp_soft_dirty(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07 1814 swp_pte = pte_swp_mksoft_dirty(swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07 1815 if (pte_swp_uffd_wp(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07 1816 swp_pte = pte_swp_mkuffd_wp(swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07 1817 set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07 1818 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1819 * No need to invalidate here it will synchronize on
80e54e5e679c95 Alistair Popple 2021-06-07 1820 * against the special swap migration pte.
80e54e5e679c95 Alistair Popple 2021-06-07 1821 *
80e54e5e679c95 Alistair Popple 2021-06-07 1822 * The assignment to subpage above was computed from a
80e54e5e679c95 Alistair Popple 2021-06-07 1823 * swap PTE which results in an invalid pointer.
80e54e5e679c95 Alistair Popple 2021-06-07 1824 * Since only PAGE_SIZE pages can currently be
80e54e5e679c95 Alistair Popple 2021-06-07 1825 * migrated, just set it to page. This will need to be
80e54e5e679c95 Alistair Popple 2021-06-07 1826 * changed when hugepage migrations to device private
80e54e5e679c95 Alistair Popple 2021-06-07 1827 * memory are supported.
80e54e5e679c95 Alistair Popple 2021-06-07 1828 */
80e54e5e679c95 Alistair Popple 2021-06-07 1829 subpage = page;
80e54e5e679c95 Alistair Popple 2021-06-07 1830 } else if (PageHWPoison(page)) {
80e54e5e679c95 Alistair Popple 2021-06-07 1831 pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
80e54e5e679c95 Alistair Popple 2021-06-07 1832 if (PageHuge(page)) {
80e54e5e679c95 Alistair Popple 2021-06-07 1833 hugetlb_count_sub(compound_nr(page), mm);
80e54e5e679c95 Alistair Popple 2021-06-07 1834 set_huge_swap_pte_at(mm, address,
80e54e5e679c95 Alistair Popple 2021-06-07 1835 pvmw.pte, pteval,
80e54e5e679c95 Alistair Popple 2021-06-07 1836 vma_mmu_pagesize(vma));
80e54e5e679c95 Alistair Popple 2021-06-07 1837 } else {
80e54e5e679c95 Alistair Popple 2021-06-07 1838 dec_mm_counter(mm, mm_counter(page));
80e54e5e679c95 Alistair Popple 2021-06-07 1839 set_pte_at(mm, address, pvmw.pte, pteval);
80e54e5e679c95 Alistair Popple 2021-06-07 1840 }
80e54e5e679c95 Alistair Popple 2021-06-07 1841
80e54e5e679c95 Alistair Popple 2021-06-07 1842 } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
80e54e5e679c95 Alistair Popple 2021-06-07 1843 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1844 * The guest indicated that the page content is of no
80e54e5e679c95 Alistair Popple 2021-06-07 1845 * interest anymore. Simply discard the pte, vmscan
80e54e5e679c95 Alistair Popple 2021-06-07 1846 * will take care of the rest.
80e54e5e679c95 Alistair Popple 2021-06-07 1847 * A future reference will then fault in a new zero
80e54e5e679c95 Alistair Popple 2021-06-07 1848 * page. When userfaultfd is active, we must not drop
80e54e5e679c95 Alistair Popple 2021-06-07 1849 * this page though, as its main user (postcopy
80e54e5e679c95 Alistair Popple 2021-06-07 1850 * migration) will not expect userfaults on already
80e54e5e679c95 Alistair Popple 2021-06-07 1851 * copied pages.
80e54e5e679c95 Alistair Popple 2021-06-07 1852 */
80e54e5e679c95 Alistair Popple 2021-06-07 1853 dec_mm_counter(mm, mm_counter(page));
80e54e5e679c95 Alistair Popple 2021-06-07 1854 /* We have to invalidate as we cleared the pte */
80e54e5e679c95 Alistair Popple 2021-06-07 1855 mmu_notifier_invalidate_range(mm, address,
80e54e5e679c95 Alistair Popple 2021-06-07 1856 address + PAGE_SIZE);
80e54e5e679c95 Alistair Popple 2021-06-07 1857 } else {
80e54e5e679c95 Alistair Popple 2021-06-07 1858 swp_entry_t entry;
80e54e5e679c95 Alistair Popple 2021-06-07 1859 pte_t swp_pte;
80e54e5e679c95 Alistair Popple 2021-06-07 1860
80e54e5e679c95 Alistair Popple 2021-06-07 1861 if (arch_unmap_one(mm, vma, address, pteval) < 0) {
80e54e5e679c95 Alistair Popple 2021-06-07 1862 set_pte_at(mm, address, pvmw.pte, pteval);
80e54e5e679c95 Alistair Popple 2021-06-07 1863 ret = false;
80e54e5e679c95 Alistair Popple 2021-06-07 1864 page_vma_mapped_walk_done(&pvmw);
80e54e5e679c95 Alistair Popple 2021-06-07 1865 break;
80e54e5e679c95 Alistair Popple 2021-06-07 1866 }
80e54e5e679c95 Alistair Popple 2021-06-07 1867
80e54e5e679c95 Alistair Popple 2021-06-07 1868 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1869 * Store the pfn of the page in a special migration
80e54e5e679c95 Alistair Popple 2021-06-07 1870 * pte. do_swap_page() will wait until the migration
80e54e5e679c95 Alistair Popple 2021-06-07 1871 * pte is removed and then restart fault handling.
80e54e5e679c95 Alistair Popple 2021-06-07 1872 */
80e54e5e679c95 Alistair Popple 2021-06-07 1873 if (pte_write(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07 1874 entry = make_writable_migration_entry(
80e54e5e679c95 Alistair Popple 2021-06-07 1875 page_to_pfn(subpage));
80e54e5e679c95 Alistair Popple 2021-06-07 1876 else
80e54e5e679c95 Alistair Popple 2021-06-07 1877 entry = make_readable_migration_entry(
80e54e5e679c95 Alistair Popple 2021-06-07 1878 page_to_pfn(subpage));
80e54e5e679c95 Alistair Popple 2021-06-07 1879
80e54e5e679c95 Alistair Popple 2021-06-07 1880 swp_pte = swp_entry_to_pte(entry);
80e54e5e679c95 Alistair Popple 2021-06-07 1881 if (pte_soft_dirty(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07 1882 swp_pte = pte_swp_mksoft_dirty(swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07 1883 if (pte_uffd_wp(pteval))
80e54e5e679c95 Alistair Popple 2021-06-07 1884 swp_pte = pte_swp_mkuffd_wp(swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07 1885 set_pte_at(mm, address, pvmw.pte, swp_pte);
80e54e5e679c95 Alistair Popple 2021-06-07 1886 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1887 * No need to invalidate here it will synchronize on
80e54e5e679c95 Alistair Popple 2021-06-07 1888 * against the special swap migration pte.
80e54e5e679c95 Alistair Popple 2021-06-07 1889 */
80e54e5e679c95 Alistair Popple 2021-06-07 1890 }
80e54e5e679c95 Alistair Popple 2021-06-07 1891
80e54e5e679c95 Alistair Popple 2021-06-07 1892 /*
80e54e5e679c95 Alistair Popple 2021-06-07 1893 * No need to call mmu_notifier_invalidate_range() it has be
80e54e5e679c95 Alistair Popple 2021-06-07 1894 * done above for all cases requiring it to happen under page
80e54e5e679c95 Alistair Popple 2021-06-07 1895 * table lock before mmu_notifier_invalidate_range_end()
80e54e5e679c95 Alistair Popple 2021-06-07 1896 *
80e54e5e679c95 Alistair Popple 2021-06-07 1897 * See Documentation/vm/mmu_notifier.rst
80e54e5e679c95 Alistair Popple 2021-06-07 1898 */
80e54e5e679c95 Alistair Popple 2021-06-07 @1899 page_remove_rmap(subpage, PageHuge(page));
80e54e5e679c95 Alistair Popple 2021-06-07 1900 put_page(page);
80e54e5e679c95 Alistair Popple 2021-06-07 1901 }
80e54e5e679c95 Alistair Popple 2021-06-07 1902
80e54e5e679c95 Alistair Popple 2021-06-07 1903 mmu_notifier_invalidate_range_end(&range);
80e54e5e679c95 Alistair Popple 2021-06-07 1904
80e54e5e679c95 Alistair Popple 2021-06-07 1905 return ret;
80e54e5e679c95 Alistair Popple 2021-06-07 1906 }
80e54e5e679c95 Alistair Popple 2021-06-07 1907
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 37498 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-06-09 17:43 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-06-09 17:43 [PATCH v10 04/10] mm/rmap: Split migration into its own function kernel test robot
-- strict thread matches above, loose matches on Subject: below --
2021-06-07 7:58 [PATCH v10 00/10] Add support for SVM atomics in Nouveau Alistair Popple
2021-06-07 7:58 ` [PATCH v10 04/10] mm/rmap: Split migration into its own function Alistair Popple
2021-06-07 7:58 ` Alistair Popple
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.