linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Jérôme Glisse" <jglisse@redhat.com>
To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: "John Hubbard" <jhubbard@nvidia.com>,
	"Naoya Horiguchi" <n-horiguchi@ah.jp.nec.com>,
	"David Nellans" <dnellans@nvidia.com>,
	"Jérôme Glisse" <jglisse@redhat.com>
Subject: [HMM v17 12/14] mm/hmm/migrate: support un-addressable ZONE_DEVICE page in migration
Date: Fri, 27 Jan 2017 17:52:19 -0500	[thread overview]
Message-ID: <1485557541-7806-13-git-send-email-jglisse@redhat.com> (raw)
In-Reply-To: <1485557541-7806-1-git-send-email-jglisse@redhat.com>

Allow to unmap and restore special swap entry of un-addressable
ZONE_DEVICE memory.

Signed-off-by: JA(C)rA'me Glisse <jglisse@redhat.com>
---
 include/linux/migrate.h |   2 +
 mm/migrate.c            | 134 +++++++++++++++++++++++++++++++++++++-----------
 mm/rmap.c               |  47 +++++++++++++++++
 3 files changed, 153 insertions(+), 30 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index cd56e41..2d7904a 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -129,6 +129,8 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 #define MIGRATE_PFN_LOCKED	(1UL << (BITS_PER_LONG_LONG - 4))
 #define MIGRATE_PFN_WRITE	(1UL << (BITS_PER_LONG_LONG - 5))
 #define MIGRATE_PFN_ZERO	(1UL << (BITS_PER_LONG_LONG - 6))
+#define MIGRATE_PFN_DEVICE	(1UL << (BITS_PER_LONG_LONG - 7))
+#define MIGRATE_PFN_ERROR	(1UL << (BITS_PER_LONG_LONG - 8))
 #define MIGRATE_PFN_MASK	((1UL << (BITS_PER_LONG_LONG - PAGE_SHIFT)) - 1)
 
 static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
diff --git a/mm/migrate.c b/mm/migrate.c
index d78c0e7..bc14b8e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -40,6 +40,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
 #include <linux/page_owner.h>
+#include <linux/memremap.h>
 
 #include <asm/tlbflush.h>
 
@@ -248,7 +249,15 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 		pte = arch_make_huge_pte(pte, vma, new, 0);
 	}
 #endif
-	flush_dcache_page(new);
+
+	if (unlikely(is_zone_device_page(new)) && !is_addressable_page(new)) {
+		entry = make_device_entry(new, pte_write(pte));
+		pte = swp_entry_to_pte(entry);
+		if (pte_swp_soft_dirty(*ptep))
+			pte = pte_mksoft_dirty(pte);
+	} else
+		flush_dcache_page(new);
+
 	set_pte_at(mm, addr, ptep, pte);
 
 	if (PageHuge(new)) {
@@ -2165,17 +2174,44 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 		pte = *ptep;
 		pfn = pte_pfn(pte);
 
-		if (!pte_present(pte)) {
+		if (pte_none(pte)) {
 			flags = pfn = 0;
 			goto next;
 		}
 
+		if (!pte_present(pte)) {
+			flags = pfn = 0;
+
+			/*
+			 * Only care about un-addressable device page special
+			 * page table entry. Other special swap entry are not
+			 * migratable and we ignore regular swaped page.
+			 */
+			entry = pte_to_swp_entry(pte);
+			if (!is_device_entry(entry))
+				goto next;
+
+			page = device_entry_to_page(entry);
+			if (!dev_page_allow_migrate(page))
+				goto next;
+
+			flags = MIGRATE_PFN_VALID |
+				MIGRATE_PFN_DEVICE |
+				MIGRATE_PFN_MIGRATE;
+			if (is_write_device_entry(entry))
+				flags |= MIGRATE_PFN_WRITE;
+		} else {
+			page = vm_normal_page(migrate->vma, addr, pte);
+			flags = MIGRATE_PFN_VALID | MIGRATE_PFN_MIGRATE;
+			flags |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
+		}
+
 		/* FIXME support THP */
-		page = vm_normal_page(migrate->vma, addr, pte);
 		if (!page || !page->mapping || PageTransCompound(page)) {
 			flags = pfn = 0;
 			goto next;
 		}
+		pfn = page_to_pfn(page);
 
 		/*
 		 * By getting a reference on the page we pin it and blocks any
@@ -2187,8 +2223,6 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 		 */
 		get_page(page);
 		migrate->cpages++;
-		flags = MIGRATE_PFN_VALID | MIGRATE_PFN_MIGRATE;
-		flags |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
 
 		/*
 		 * Optimize for common case where page is only map once in one
@@ -2290,6 +2324,13 @@ static bool migrate_vma_check_page(struct page *page)
 	if (PageCompound(page))
 		return false;
 
+	/* Page from ZONE_DEVICE have one extra reference */
+	if (is_zone_device_page(page)) {
+		if (!dev_page_allow_migrate(page))
+			return false;
+		extra++;
+	}
+
 	if ((page_count(page) - extra) > page_mapcount(page))
 		return false;
 
@@ -2327,28 +2368,31 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
 			migrate->src[i] |= MIGRATE_PFN_LOCKED;
 		}
 
-		if (!PageLRU(page) && allow_drain) {
-			/* Drain CPU's pagevec */
-			lru_add_drain_all();
-			allow_drain = false;
-		}
+		/* ZONE_DEVICE page are not on LRU */
+		if (!is_zone_device_page(page)) {
+			if (!PageLRU(page) && allow_drain) {
+				/* Drain CPU's pagevec */
+				lru_add_drain_all();
+				allow_drain = false;
+			}
 
-		if (isolate_lru_page(page)) {
-			if (remap) {
-				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
-				migrate->cpages--;
-				restore++;
-			} else {
-				migrate->src[i] = 0;
-				unlock_page(page);
-				migrate->cpages--;
-				put_page(page);
+			if (isolate_lru_page(page)) {
+				if (remap) {
+					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+					migrate->cpages--;
+					restore++;
+				} else {
+					migrate->src[i] = 0;
+					unlock_page(page);
+					migrate->cpages--;
+					put_page(page);
+				}
+				continue;
 			}
-			continue;
-		}
 
-		/* Drop the reference we took in collect */
-		put_page(page);
+			/* Drop the reference we took in collect */
+			put_page(page);
+		}
 
 		if (!migrate_vma_check_page(page)) {
 			if (remap) {
@@ -2356,14 +2400,19 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
 				migrate->cpages--;
 				restore++;
 
-				get_page(page);
-				putback_lru_page(page);
+				if (!is_zone_device_page(page)) {
+					get_page(page);
+					putback_lru_page(page);
+				}
 			} else {
 				migrate->src[i] = 0;
 				unlock_page(page);
 				migrate->cpages--;
 
-				putback_lru_page(page);
+				if (!is_zone_device_page(page))
+					putback_lru_page(page);
+				else
+					put_page(page);
 			}
 		}
 	}
@@ -2428,7 +2477,10 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
 		unlock_page(page);
 		restore--;
 
-		putback_lru_page(page);
+		if (is_zone_device_page(page))
+			put_page(page);
+		else
+			putback_lru_page(page);
 	}
 }
 
@@ -2459,6 +2511,22 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
 
 		mapping = page_mapping(page);
 
+		if (is_zone_device_page(newpage)) {
+			if (!dev_page_allow_migrate(newpage)) {
+				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+				continue;
+			}
+
+			/*
+			 * For now only support private anonymous when migrating
+			 * to un-addressable device memory.
+			 */
+			if (mapping && !is_addressable_page(newpage)) {
+				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+				continue;
+			}
+		}
+
 		r = migrate_page(mapping, newpage, page, MIGRATE_SYNC, false);
 		if (r != MIGRATEPAGE_SUCCESS)
 			migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
@@ -2497,11 +2565,17 @@ static void migrate_vma_finalize(struct migrate_vma *migrate)
 		unlock_page(page);
 		migrate->cpages--;
 
-		putback_lru_page(page);
+		if (is_zone_device_page(page))
+			put_page(page);
+		else
+			putback_lru_page(page);
 
 		if (newpage != page) {
 			unlock_page(newpage);
-			putback_lru_page(newpage);
+			if (is_zone_device_page(newpage))
+				put_page(newpage);
+			else
+				putback_lru_page(newpage);
 		}
 	}
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 91619fd..c7b0b54 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -61,6 +61,7 @@
 #include <linux/hugetlb.h>
 #include <linux/backing-dev.h>
 #include <linux/page_idle.h>
+#include <linux/memremap.h>
 
 #include <asm/tlbflush.h>
 
@@ -1454,6 +1455,52 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			goto out;
 	}
 
+	if ((flags & TTU_MIGRATION) && is_zone_device_page(page)) {
+		swp_entry_t entry;
+		pte_t swp_pte;
+		pmd_t *pmdp;
+
+		if (!dev_page_allow_migrate(page))
+			goto out;
+
+		pmdp = mm_find_pmd(mm, address);
+		if (!pmdp)
+			goto out;
+
+		pte = pte_offset_map_lock(mm, pmdp, address, &ptl);
+		if (!pte)
+			goto out;
+
+		pteval = ptep_get_and_clear(mm, address, pte);
+		if (pte_present(pteval) || pte_none(pteval)) {
+			set_pte_at(mm, address, pte, pteval);
+			goto out_unmap;
+		}
+
+		entry = pte_to_swp_entry(pteval);
+		if (!is_device_entry(entry)) {
+			set_pte_at(mm, address, pte, pteval);
+			goto out_unmap;
+		}
+
+		if (device_entry_to_page(entry) != page) {
+			set_pte_at(mm, address, pte, pteval);
+			goto out_unmap;
+		}
+
+		/*
+		 * Store the pfn of the page in a special migration
+		 * pte. do_swap_page() will wait until the migration
+		 * pte is removed and then restart fault handling.
+		 */
+		entry = make_migration_entry(page, 0);
+		swp_pte = swp_entry_to_pte(entry);
+		if (pte_soft_dirty(*pte))
+			swp_pte = pte_swp_mksoft_dirty(swp_pte);
+		set_pte_at(mm, address, pte, swp_pte);
+		goto discard;
+	}
+
 	pte = page_check_address(page, mm, address, &ptl,
 				 PageTransCompound(page));
 	if (!pte)
-- 
2.4.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-01-27 21:51 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-27 22:52 [HMM v17 00/14] HMM (Heterogeneous Memory Management) v17 Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 01/14] mm/memory/hotplug: convert device bool to int to allow for more flags v2 Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 02/14] mm/ZONE_DEVICE/free-page: callback when page is freed v2 Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 03/14] mm/ZONE_DEVICE/unaddressable: add support for un-addressable device memory v3 Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 04/14] mm/ZONE_DEVICE/x86: add support for un-addressable device memory Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 05/14] mm/migrate: add new boolean copy flag to migratepage() callback Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 06/14] mm/migrate: new memory migration helper for use with device memory v3 Jérôme Glisse
2017-02-22 15:31   ` Reza Arbab
2017-01-27 22:52 ` [HMM v17 07/14] mm/migrate: migrate_vma() unmap page from vma while collecting pages Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 08/14] mm/hmm: heterogeneous memory management (HMM for short) Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 09/14] mm/hmm/mirror: mirror process address space on device with HMM helpers Jérôme Glisse
2017-03-13 22:10   ` Krishna Reddy
2017-01-27 22:52 ` [HMM v17 10/14] mm/hmm/mirror: helper to snapshot CPU page table Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 11/14] mm/hmm/mirror: device page fault handler Jérôme Glisse
2017-01-27 22:52 ` Jérôme Glisse [this message]
2017-01-27 22:52 ` [HMM v17 13/14] mm/hmm/devmem: device memory hotplug using ZONE_DEVICE Jérôme Glisse
2017-01-27 22:52 ` [HMM v17 14/14] mm/hmm/devmem: dummy HMM device for ZONE_DEVICE memory v2 Jérôme Glisse
2017-01-28  5:39 ` [HMM v17 00/14] HMM (Heterogeneous Memory Management) v17 John Hubbard
2017-02-22  7:19 ` Balbir Singh
2017-02-22  8:16   ` Andrew Morton
2017-02-22  8:27     ` Balbir Singh
2017-02-22 15:49       ` Reza Arbab
2017-02-22 23:58       ` John Hubbard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1485557541-7806-13-git-send-email-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dnellans@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=n-horiguchi@ah.jp.nec.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).