linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Balbir Singh <bsingharora@gmail.com>
To: "Jérôme Glisse" <jglisse@redhat.com>
Cc: akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, Dan Williams <dan.j.williams@intel.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	John Hubbard <jhubbard@nvidia.com>
Subject: Re: [HMM 14/15] mm/migrate: support un-addressable ZONE_DEVICE page in migration v2
Date: Wed, 31 May 2017 14:09:04 +1000	[thread overview]
Message-ID: <20170531140904.5c956b9a@firefly.ozlabs.ibm.com> (raw)
In-Reply-To: <20170524172024.30810-15-jglisse@redhat.com>

On Wed, 24 May 2017 13:20:23 -0400
Jérôme Glisse <jglisse@redhat.com> wrote:

> Allow to unmap and restore special swap entry of un-addressable
> ZONE_DEVICE memory.
> 
> Changed since v1:
>   - s/device unaddressable/device private/
> 
> Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  include/linux/migrate.h |  10 +++-
>  mm/migrate.c            | 134 ++++++++++++++++++++++++++++++++++++++----------
>  mm/page_vma_mapped.c    |  10 ++++
>  mm/rmap.c               |  25 +++++++++
>  4 files changed, 150 insertions(+), 29 deletions(-)
> 
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 576b3f5..7dd875a 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -130,12 +130,18 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
>  
>  #ifdef CONFIG_MIGRATION
>  
> +/*
> + * Watch out for PAE architecture, which has an unsigned long, and might not
> + * have enough bits to store all physical address and flags. So far we have
> + * enough room for all our flags.
> + */
>  #define MIGRATE_PFN_VALID	(1UL << 0)
>  #define MIGRATE_PFN_MIGRATE	(1UL << 1)
>  #define MIGRATE_PFN_LOCKED	(1UL << 2)
>  #define MIGRATE_PFN_WRITE	(1UL << 3)
> -#define MIGRATE_PFN_ERROR	(1UL << 4)
> -#define MIGRATE_PFN_SHIFT	5
> +#define MIGRATE_PFN_DEVICE	(1UL << 4)
> +#define MIGRATE_PFN_ERROR	(1UL << 5)
> +#define MIGRATE_PFN_SHIFT	6
>  
>  static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
>  {
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 1f2bc61..9e68399 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -36,6 +36,7 @@
>  #include <linux/hugetlb.h>
>  #include <linux/hugetlb_cgroup.h>
>  #include <linux/gfp.h>
> +#include <linux/memremap.h>
>  #include <linux/balloon_compaction.h>
>  #include <linux/mmu_notifier.h>
>  #include <linux/page_idle.h>
> @@ -227,7 +228,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
>  		if (is_write_migration_entry(entry))
>  			pte = maybe_mkwrite(pte, vma);
>  
> -		flush_dcache_page(new);
> +		if (unlikely(is_zone_device_page(new)) &&
> +		    is_device_private_page(new)) {

I would expect HMM-CDM to never hit this pattern, given that
we should not be creating migration entries for CDM memory.
Is that a fair assumption?

> +			entry = make_device_private_entry(new, pte_write(pte));
> +			pte = swp_entry_to_pte(entry);
> +			if (pte_swp_soft_dirty(*pvmw.pte))
> +				pte = pte_mksoft_dirty(pte);
> +		} else
> +			flush_dcache_page(new);
> +
>  #ifdef CONFIG_HUGETLB_PAGE
>  		if (PageHuge(new)) {
>  			pte = pte_mkhuge(pte);
> @@ -2140,17 +2149,40 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>  		pte = *ptep;
>  		pfn = pte_pfn(pte);
>  
> -		if (!pte_present(pte)) {
> +		if (pte_none(pte)) {
>  			mpfn = pfn = 0;
>  			goto next;
>  		}
>  
> +		if (!pte_present(pte)) {
> +			mpfn = pfn = 0;
> +
> +			/*
> +			 * Only care about unaddressable device page special
> +			 * page table entry. Other special swap entries are not
> +			 * migratable, and we ignore regular swapped page.
> +			 */
> +			entry = pte_to_swp_entry(pte);
> +			if (!is_device_private_entry(entry))
> +				goto next;
> +
> +			page = device_private_entry_to_page(entry);
> +			mpfn = migrate_pfn(page_to_pfn(page))|
> +				MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE;
> +			if (is_write_device_private_entry(entry))
> +				mpfn |= MIGRATE_PFN_WRITE;
> +		} else {
> +			page = vm_normal_page(migrate->vma, addr, pte);
> +			mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
> +			mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
> +		}
> +
>  		/* FIXME support THP */
> -		page = vm_normal_page(migrate->vma, addr, pte);
>  		if (!page || !page->mapping || PageTransCompound(page)) {
>  			mpfn = pfn = 0;
>  			goto next;
>  		}
> +		pfn = page_to_pfn(page);
>  
>  		/*
>  		 * By getting a reference on the page we pin it and that blocks
> @@ -2163,8 +2195,6 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>  		 */
>  		get_page(page);
>  		migrate->cpages++;
> -		mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
> -		mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
>  
>  		/*
>  		 * Optimize for the common case where page is only mapped once
> @@ -2195,6 +2225,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>  		}
>  
>  next:
> +		migrate->dst[migrate->npages] = 0;
>  		migrate->src[migrate->npages++] = mpfn;
>  	}
>  	arch_leave_lazy_mmu_mode();
> @@ -2264,6 +2295,15 @@ static bool migrate_vma_check_page(struct page *page)
>  	if (PageCompound(page))
>  		return false;
>  
> +	/* Page from ZONE_DEVICE have one extra reference */
> +	if (is_zone_device_page(page)) {
> +		if (is_device_private_page(page)) {
> +			extra++;
> +		} else
> +			/* Other ZONE_DEVICE memory type are not supported */
> +			return false;
> +	}
> +
>  	if ((page_count(page) - extra) > page_mapcount(page))
>  		return false;
>  
> @@ -2301,24 +2341,30 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
>  			migrate->src[i] |= MIGRATE_PFN_LOCKED;
>  		}
>  
> -		if (!PageLRU(page) && allow_drain) {
> -			/* Drain CPU's pagevec */
> -			lru_add_drain_all();
> -			allow_drain = false;
> -		}
> +		/* ZONE_DEVICE pages are not on LRU */
> +		if (!is_zone_device_page(page)) {
> +			if (!PageLRU(page) && allow_drain) {
> +				/* Drain CPU's pagevec */
> +				lru_add_drain_all();
> +				allow_drain = false;
> +			}
>  
> -		if (isolate_lru_page(page)) {
> -			if (remap) {
> -				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
> -				migrate->cpages--;
> -				restore++;
> -			} else {
> -				migrate->src[i] = 0;
> -				unlock_page(page);
> -				migrate->cpages--;
> -				put_page(page);
> +			if (isolate_lru_page(page)) {
> +				if (remap) {
> +					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
> +					migrate->cpages--;
> +					restore++;
> +				} else {
> +					migrate->src[i] = 0;
> +					unlock_page(page);
> +					migrate->cpages--;
> +					put_page(page);
> +				}
> +				continue;
>  			}
> -			continue;
> +
> +			/* Drop the reference we took in collect */
> +			put_page(page);
>  		}
>  
>  		if (!migrate_vma_check_page(page)) {
> @@ -2327,14 +2373,19 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
>  				migrate->cpages--;
>  				restore++;
>  
> -				get_page(page);
> -				putback_lru_page(page);
> +				if (!is_zone_device_page(page)) {
> +					get_page(page);
> +					putback_lru_page(page);
> +				}
>  			} else {
>  				migrate->src[i] = 0;
>  				unlock_page(page);
>  				migrate->cpages--;
>  
> -				putback_lru_page(page);
> +				if (!is_zone_device_page(page))
> +					putback_lru_page(page);
> +				else
> +					put_page(page);
>  			}
>  		}
>  	}
> @@ -2405,7 +2456,10 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
>  		unlock_page(page);
>  		restore--;
>  
> -		putback_lru_page(page);
> +		if (is_zone_device_page(page))
> +			put_page(page);
> +		else
> +			putback_lru_page(page);
>  	}
>  }
>  
> @@ -2436,6 +2490,26 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
>  
>  		mapping = page_mapping(page);
>  
> +		if (is_zone_device_page(newpage)) {
> +			if (is_device_private_page(newpage)) {
> +				/*
> +				 * For now only support private anonymous when
> +				 * migrating to un-addressable device memory.
> +				 */
> +				if (mapping) {
> +					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
> +					continue;
> +				}
> +			} else {
> +				/*
> +				 * Other types of ZONE_DEVICE page are not
> +				 * supported.
> +				 */
> +				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
> +				continue;
> +			}
> +		}
> +
>  		r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
>  		if (r != MIGRATEPAGE_SUCCESS)
>  			migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
> @@ -2476,11 +2550,17 @@ static void migrate_vma_finalize(struct migrate_vma *migrate)
>  		unlock_page(page);
>  		migrate->cpages--;
>  
> -		putback_lru_page(page);
> +		if (is_zone_device_page(page))
> +			put_page(page);
> +		else
> +			putback_lru_page(page);
>  
>  		if (newpage != page) {
>  			unlock_page(newpage);
> -			putback_lru_page(newpage);
> +			if (is_zone_device_page(newpage))
> +				put_page(newpage);
> +			else
> +				putback_lru_page(newpage);
>  		}
>  	}
>  }
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index de9c40d..f95765c 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -48,6 +48,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
>  		if (!is_swap_pte(*pvmw->pte))
>  			return false;
>  		entry = pte_to_swp_entry(*pvmw->pte);
> +
>  		if (!is_migration_entry(entry))
>  			return false;
>  		if (migration_entry_to_page(entry) - pvmw->page >=
> @@ -60,6 +61,15 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
>  		WARN_ON_ONCE(1);
>  #endif
>  	} else {
> +		if (is_swap_pte(*pvmw->pte)) {
> +			swp_entry_t entry;
> +
> +			entry = pte_to_swp_entry(*pvmw->pte);
> +			if (is_device_private_entry(entry) &&
> +			    device_private_entry_to_page(entry) == pvmw->page)
> +				return true;
> +		}
> +
>  		if (!pte_present(*pvmw->pte))
>  			return false;
>  
> diff --git a/mm/rmap.c b/mm/rmap.c
> index d405f0e..515cea6 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -63,6 +63,7 @@
>  #include <linux/hugetlb.h>
>  #include <linux/backing-dev.h>
>  #include <linux/page_idle.h>
> +#include <linux/memremap.h>
>  
>  #include <asm/tlbflush.h>
>  
> @@ -1308,6 +1309,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
>  	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
>  		return true;
>  
> +	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
> +	    is_zone_device_page(page) && !is_device_private_page(page))
> +		return true;
> +

I wonder how CDM would ever work with this?

>  	if (flags & TTU_SPLIT_HUGE_PMD) {
>  		split_huge_pmd_address(vma, address,
>  				flags & TTU_MIGRATION, page);
> @@ -1343,6 +1348,26 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
>  		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
>  		address = pvmw.address;
>  
> +		if (IS_ENABLED(CONFIG_MIGRATION) &&
> +		    (flags & TTU_MIGRATION) &&
> +		    is_zone_device_page(page)) {
> +			swp_entry_t entry;
> +			pte_t swp_pte;
> +
> +			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
> +
> +			/*
> +			 * Store the pfn of the page in a special migration
> +			 * pte. do_swap_page() will wait until the migration
> +			 * pte is removed and then restart fault handling.
> +			 */
> +			entry = make_migration_entry(page, 0);
> +			swp_pte = swp_entry_to_pte(entry);
> +			if (pte_soft_dirty(pteval))
> +				swp_pte = pte_swp_mksoft_dirty(swp_pte);
> +			set_pte_at(mm, address, pvmw.pte, swp_pte);
> +			goto discard;
> +		}
>  
>  		if (!(flags & TTU_IGNORE_ACCESS)) {
>  			if (ptep_clear_flush_young_notify(vma, address,


Balbir Singh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2017-05-31  4:09 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-24 17:20 [HMM 00/15] HMM (Heterogeneous Memory Management) v23 Jérôme Glisse
2017-05-24 17:20 ` [HMM 01/15] hmm: heterogeneous memory management documentation Jérôme Glisse
2017-06-24  6:15   ` John Hubbard
2017-05-24 17:20 ` [HMM 02/15] mm/hmm: heterogeneous memory management (HMM for short) v4 Jérôme Glisse
2017-05-31  2:10   ` Balbir Singh
2017-06-01 22:35     ` Jerome Glisse
2017-05-24 17:20 ` [HMM 03/15] mm/hmm/mirror: mirror process address space on device with HMM helpers v3 Jérôme Glisse
2017-05-24 17:20 ` [HMM 04/15] mm/hmm/mirror: helper to snapshot CPU page table v3 Jérôme Glisse
2017-05-24 17:20 ` [HMM 05/15] mm/hmm/mirror: device page fault handler Jérôme Glisse
2017-05-24 17:20 ` [HMM 06/15] mm/memory_hotplug: introduce add_pages Jérôme Glisse
2017-05-31  1:31   ` Balbir Singh
2017-05-24 17:20 ` [HMM 07/15] mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory v3 Jérôme Glisse
2017-05-30 16:43   ` Ross Zwisler
2017-05-30 21:43     ` Jerome Glisse
2017-05-31  1:23   ` Balbir Singh
2017-06-09  3:55   ` John Hubbard
2017-06-12 17:57     ` Jerome Glisse
2017-06-15  3:41   ` zhong jiang
2017-06-15 17:43     ` Jerome Glisse
2017-05-24 17:20 ` [HMM 08/15] mm/ZONE_DEVICE: special case put_page() for device private pages v2 Jérôme Glisse
2017-05-24 17:20 ` [HMM 09/15] mm/hmm/devmem: device memory hotplug using ZONE_DEVICE v5 Jérôme Glisse
2017-06-24  3:54   ` John Hubbard
2017-05-24 17:20 ` [HMM 10/15] mm/hmm/devmem: dummy HMM device for ZONE_DEVICE memory v3 Jérôme Glisse
2017-05-24 17:20 ` [HMM 11/15] mm/migrate: new migrate mode MIGRATE_SYNC_NO_COPY Jérôme Glisse
2017-05-24 17:20 ` [HMM 12/15] mm/migrate: new memory migration helper for use with device memory v4 Jérôme Glisse
2017-05-31  3:59   ` Balbir Singh
2017-06-01 22:35     ` Jerome Glisse
2017-06-07  9:02       ` Balbir Singh
2017-06-07 14:06         ` Jerome Glisse
2017-05-24 17:20 ` [HMM 13/15] mm/migrate: migrate_vma() unmap page from vma while collecting pages Jérôme Glisse
2017-05-24 17:20 ` [HMM 14/15] mm/migrate: support un-addressable ZONE_DEVICE page in migration v2 Jérôme Glisse
2017-05-31  4:09   ` Balbir Singh [this message]
2017-05-31  8:39     ` Balbir Singh
2017-05-24 17:20 ` [HMM 15/15] mm/migrate: allow migrate_vma() to alloc new page on empty entry v2 Jérôme Glisse
2017-06-16  7:22 ` [HMM 00/15] HMM (Heterogeneous Memory Management) v23 Bridgman, John
2017-06-16 14:47   ` Jerome Glisse
2017-06-16 17:55     ` Bridgman, John
2017-06-16 18:04       ` Jerome Glisse
2017-06-23 15:00 ` Bob Liu
2017-06-23 15:28   ` Jerome Glisse
  -- strict thread matches above, loose matches on Subject: below --
2017-05-22 16:51 [HMM 00/15] HMM (Heterogeneous Memory Management) v22 Jérôme Glisse
2017-05-22 16:52 ` [HMM 14/15] mm/migrate: support un-addressable ZONE_DEVICE page in migration v2 Jérôme Glisse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170531140904.5c956b9a@firefly.ozlabs.ibm.com \
    --to=bsingharora@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).