All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill@shutemov.name>
To: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: linux-mm@kvack.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Hugh Dickins <hughd@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dave Hansen <dave.hansen@intel.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Michal Hocko <mhocko@kernel.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	Pavel Emelyanov <xemul@parallels.com>,
	Zi Yan <zi.yan@cs.rutgers.edu>,
	Balbir Singh <bsingharora@gmail.com>,
	linux-kernel@vger.kernel.org,
	Naoya Horiguchi <nao.horiguchi@gmail.com>
Subject: Re: [PATCH v2 05/12] mm: thp: add core routines for thp/pmd migration
Date: Mon, 14 Nov 2016 14:45:03 +0300	[thread overview]
Message-ID: <20161114114503.GA9231@node.shutemov.name> (raw)
In-Reply-To: <1478561517-4317-6-git-send-email-n-horiguchi@ah.jp.nec.com>

On Tue, Nov 08, 2016 at 08:31:50AM +0900, Naoya Horiguchi wrote:
> This patch prepares thp migration's core code. These code will be open when
> unmap_and_move() stops unconditionally splitting thp and get_new_page() starts
> to allocate destination thps.
> 
> Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> ---
> ChangeLog v1 -> v2:
> - support pte-mapped thp, doubly-mapped thp
> ---
>  arch/x86/include/asm/pgtable_64.h |   2 +
>  include/linux/swapops.h           |  61 +++++++++++++++
>  mm/huge_memory.c                  | 154 ++++++++++++++++++++++++++++++++++++++
>  mm/migrate.c                      |  44 ++++++++++-
>  mm/pgtable-generic.c              |   3 +-
>  5 files changed, 262 insertions(+), 2 deletions(-)
> 
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/arch/x86/include/asm/pgtable_64.h v4.9-rc2-mmotm-2016-10-27-18-27_patched/arch/x86/include/asm/pgtable_64.h
> index 1cc82ec..3a1b48e 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/arch/x86/include/asm/pgtable_64.h
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/arch/x86/include/asm/pgtable_64.h
> @@ -167,7 +167,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
>  					 ((type) << (SWP_TYPE_FIRST_BIT)) \
>  					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
>  #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
> +#define __pmd_to_swp_entry(pte)		((swp_entry_t) { pmd_val((pmd)) })
>  #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
> +#define __swp_entry_to_pmd(x)		((pmd_t) { .pmd = (x).val })
>  
>  extern int kern_addr_valid(unsigned long addr);
>  extern void cleanup_highmap(void);
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/include/linux/swapops.h v4.9-rc2-mmotm-2016-10-27-18-27_patched/include/linux/swapops.h
> index 5c3a5f3..b6b22a2 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/include/linux/swapops.h
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/include/linux/swapops.h
> @@ -163,6 +163,67 @@ static inline int is_write_migration_entry(swp_entry_t entry)
>  
>  #endif
>  
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +extern void set_pmd_migration_entry(struct page *page,
> +		struct vm_area_struct *vma, unsigned long address);
> +
> +extern int remove_migration_pmd(struct page *new, pmd_t *pmd,
> +		struct vm_area_struct *vma, unsigned long addr, void *old);
> +
> +extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
> +
> +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
> +{
> +	swp_entry_t arch_entry;
> +
> +	arch_entry = __pmd_to_swp_entry(pmd);
> +	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
> +}
> +
> +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
> +{
> +	swp_entry_t arch_entry;
> +
> +	arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
> +	return __swp_entry_to_pmd(arch_entry);
> +}
> +
> +static inline int is_pmd_migration_entry(pmd_t pmd)
> +{
> +	return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
> +}
> +#else
> +static inline void set_pmd_migration_entry(struct page *page,
> +			struct vm_area_struct *vma, unsigned long address)
> +{

VM_BUG()? Or BUILD_BUG()?

> +}
> +
> +static inline int remove_migration_pmd(struct page *new, pmd_t *pmd,
> +		struct vm_area_struct *vma, unsigned long addr, void *old)
> +{
> +	return 0;

Ditto.

> +}
> +
> +static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { }
> +
> +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
> +{
> +	return swp_entry(0, 0);

Ditto.

> +}
> +
> +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
> +{
> +	pmd_t pmd = {};

Ditto.

> +	return pmd;
> +}
> +
> +static inline int is_pmd_migration_entry(pmd_t pmd)
> +{
> +	return 0;
> +}
> +#endif
> +
>  #ifdef CONFIG_MEMORY_FAILURE
>  
>  extern atomic_long_t num_poisoned_pages __read_mostly;
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/huge_memory.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/huge_memory.c
> index 0509d17..b3022b3 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/huge_memory.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/huge_memory.c
> @@ -2310,3 +2310,157 @@ static int __init split_huge_pages_debugfs(void)
>  }
>  late_initcall(split_huge_pages_debugfs);
>  #endif
> +
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +void set_pmd_migration_entry(struct page *page, struct vm_area_struct *vma,
> +				unsigned long addr)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	pgd_t *pgd;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	pmd_t pmdval;
> +	swp_entry_t entry;
> +	spinlock_t *ptl;
> +
> +	pgd = pgd_offset(mm, addr);
> +	if (!pgd_present(*pgd))
> +		return;
> +	pud = pud_offset(pgd, addr);
> +	if (!pud_present(*pud))
> +		return;
> +	pmd = pmd_offset(pud, addr);
> +	pmdval = *pmd;
> +	barrier();
> +	if (!pmd_present(pmdval))
> +		return;
> +
> +	mmu_notifier_invalidate_range_start(mm, addr, addr + HPAGE_PMD_SIZE);
> +	if (pmd_trans_huge(pmdval)) {
> +		pmd_t pmdswp;
> +
> +		ptl = pmd_lock(mm, pmd);
> +		if (!pmd_present(*pmd))
> +			goto unlock_pmd;
> +		if (unlikely(!pmd_trans_huge(*pmd)))
> +			goto unlock_pmd;

Just check *pmd == pmdval?

> +		if (pmd_page(*pmd) != page)
> +			goto unlock_pmd;
> +
> +		pmdval = pmdp_huge_get_and_clear(mm, addr, pmd);
> +		if (pmd_dirty(pmdval))
> +			set_page_dirty(page);
> +		entry = make_migration_entry(page, pmd_write(pmdval));
> +		pmdswp = swp_entry_to_pmd(entry);
> +		pmdswp = pmd_mkhuge(pmdswp);
> +		set_pmd_at(mm, addr, pmd, pmdswp);
> +		page_remove_rmap(page, true);
> +		put_page(page);
> +unlock_pmd:
> +		spin_unlock(ptl);
> +	} else { /* pte-mapped thp */
> +		pte_t *pte;
> +		pte_t pteval;
> +		struct page *tmp = compound_head(page);
> +		unsigned long address = addr & HPAGE_PMD_MASK;
> +		pte_t swp_pte;
> +		int i;
> +
> +		pte = pte_offset_map(pmd, address);
> +		ptl = pte_lockptr(mm, pmd);
> +		spin_lock(ptl);

pte_offset_map_lock() ?

> +		for (i = 0; i < HPAGE_PMD_NR; i++, pte++, tmp++) {
> +			if (!(pte_present(*pte) &&
> +			      page_to_pfn(tmp) == pte_pfn(*pte)))

			if (!pte_present(*pte) || pte_page(*pte) != tmp) ?

> +				continue;
> +			pteval = ptep_clear_flush(vma, address, pte);
> +			if (pte_dirty(pteval))
> +				set_page_dirty(tmp);
> +			entry = make_migration_entry(tmp, pte_write(pteval));
> +			swp_pte = swp_entry_to_pte(entry);
> +			set_pte_at(mm, address, pte, swp_pte);
> +			page_remove_rmap(tmp, false);
> +			put_page(tmp);
> +		}
> +		pte_unmap_unlock(pte, ptl);
> +	}
> +	mmu_notifier_invalidate_range_end(mm, addr, addr + HPAGE_PMD_SIZE);
> +	return;
> +}
> +
> +int remove_migration_pmd(struct page *new, pmd_t *pmd,
> +		struct vm_area_struct *vma, unsigned long addr, void *old)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	spinlock_t *ptl;
> +	pmd_t pmde;
> +	swp_entry_t entry;
> +
> +	pmde = *pmd;
> +	barrier();
> +
> +	if (!pmd_present(pmde)) {
> +		if (is_migration_entry(pmd_to_swp_entry(pmde))) {

		if (!is_migration_entry(pmd_to_swp_entry(pmde)))
			return SWAP_AGAIN;

And one level less indentation below.

> +			unsigned long mmun_start = addr & HPAGE_PMD_MASK;
> +			unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
> +
> +			ptl = pmd_lock(mm, pmd);
> +			entry = pmd_to_swp_entry(*pmd);
> +			if (migration_entry_to_page(entry) != old)
> +				goto unlock_ptl;
> +			get_page(new);
> +			pmde = pmd_mkold(mk_huge_pmd(new, vma->vm_page_prot));
> +			if (is_write_migration_entry(entry))
> +				pmde = maybe_pmd_mkwrite(pmde, vma);
> +			flush_cache_range(vma, mmun_start, mmun_end);
> +			page_add_anon_rmap(new, vma, mmun_start, true);
> +			pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
> +			set_pmd_at(mm, mmun_start, pmd, pmde);
> +			flush_tlb_range(vma, mmun_start, mmun_end);
> +			if (vma->vm_flags & VM_LOCKED)
> +				mlock_vma_page(new);
> +			update_mmu_cache_pmd(vma, addr, pmd);
> +unlock_ptl:
> +			spin_unlock(ptl);

			return SWAP_AGAIN;

And one level less indentation below.

> +		}
> +	} else { /* pte-mapped thp */
> +		pte_t *ptep;
> +		pte_t pte;
> +		int i;
> +		struct page *tmpnew = compound_head(new);
> +		struct page *tmpold = compound_head((struct page *)old);
> +		unsigned long address = addr & HPAGE_PMD_MASK;
> +
> +		ptep = pte_offset_map(pmd, addr);
> +		ptl = pte_lockptr(mm, pmd);
> +		spin_lock(ptl);

pte_offset_map_lock() ?

> +
> +		for (i = 0; i < HPAGE_PMD_NR;
> +		     i++, ptep++, tmpnew++, tmpold++, address += PAGE_SIZE) {
> +			pte = *ptep;
> +			if (!is_swap_pte(pte))
> +				continue;
> +			entry = pte_to_swp_entry(pte);
> +			if (!is_migration_entry(entry) ||
> +			    migration_entry_to_page(entry) != tmpold)
> +				continue;
> +			get_page(tmpnew);
> +			pte = pte_mkold(mk_pte(tmpnew,
> +					       READ_ONCE(vma->vm_page_prot)));

READ_ONCE()? Do we get here under mmap_sem, right?

> +			if (pte_swp_soft_dirty(*ptep))
> +				pte = pte_mksoft_dirty(pte);
> +			if (is_write_migration_entry(entry))
> +				pte = maybe_mkwrite(pte, vma);
> +			flush_dcache_page(tmpnew);
> +			set_pte_at(mm, address, ptep, pte);
> +			if (PageAnon(new))
> +				page_add_anon_rmap(tmpnew, vma, address, false);
> +			else
> +				page_add_file_rmap(tmpnew, false);
> +			update_mmu_cache(vma, address, ptep);
> +		}
> +		pte_unmap_unlock(ptep, ptl);
> +	}
> +	return SWAP_AGAIN;
> +}
> +#endif
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/migrate.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/migrate.c
> index 66ce6b4..54f2eb6 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/migrate.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/migrate.c
> @@ -198,6 +198,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
>  {
>  	struct mm_struct *mm = vma->vm_mm;
>  	swp_entry_t entry;
> +	pgd_t *pgd;
> +	pud_t *pud;
>   	pmd_t *pmd;
>  	pte_t *ptep, pte;
>   	spinlock_t *ptl;
> @@ -208,10 +210,29 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
>  			goto out;
>  		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
>  	} else {
> -		pmd = mm_find_pmd(mm, addr);
> +		pmd_t pmde;
> +
> +		pgd = pgd_offset(mm, addr);
> +		if (!pgd_present(*pgd))
> +			goto out;
> +		pud = pud_offset(pgd, addr);
> +		if (!pud_present(*pud))
> +			goto out;
> +		pmd = pmd_offset(pud, addr);
>  		if (!pmd)
>  			goto out;
>  
> +		if (PageTransCompound(new)) {
> +			remove_migration_pmd(new, pmd, vma, addr, old);
> +			goto out;
> +		}
> +
> +		pmde = *pmd;
> +		barrier();
> +
> +		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
> +			goto out;
> +
>  		ptep = pte_offset_map(pmd, addr);
>  
>  		/*
> @@ -344,6 +365,27 @@ void migration_entry_wait_huge(struct vm_area_struct *vma,
>  	__migration_entry_wait(mm, pte, ptl);
>  }
>  
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
> +{
> +	spinlock_t *ptl;
> +	struct page *page;
> +
> +	ptl = pmd_lock(mm, pmd);
> +	if (!is_pmd_migration_entry(*pmd))
> +		goto unlock;
> +	page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
> +	if (!get_page_unless_zero(page))
> +		goto unlock;
> +	spin_unlock(ptl);
> +	wait_on_page_locked(page);
> +	put_page(page);
> +	return;
> +unlock:
> +	spin_unlock(ptl);
> +}
> +#endif
> +
>  #ifdef CONFIG_BLOCK
>  /* Returns true if all buffers are successfully locked */
>  static bool buffer_migrate_lock_buffers(struct buffer_head *head,
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/pgtable-generic.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/pgtable-generic.c
> index 71c5f91..6012343 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/pgtable-generic.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/pgtable-generic.c
> @@ -118,7 +118,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
>  {
>  	pmd_t pmd;
>  	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
> -	VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
> +	VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
> +		  !pmd_devmap(*pmdp));
>  	pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
>  	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
>  	return pmd;
> -- 
> 2.7.0
> 

-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: "Kirill A. Shutemov" <kirill@shutemov.name>
To: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: linux-mm@kvack.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Hugh Dickins <hughd@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dave Hansen <dave.hansen@intel.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Michal Hocko <mhocko@kernel.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	Pavel Emelyanov <xemul@parallels.com>,
	Zi Yan <zi.yan@cs.rutgers.edu>,
	Balbir Singh <bsingharora@gmail.com>,
	linux-kernel@vger.kernel.org,
	Naoya Horiguchi <nao.horiguchi@gmail.com>
Subject: Re: [PATCH v2 05/12] mm: thp: add core routines for thp/pmd migration
Date: Mon, 14 Nov 2016 14:45:03 +0300	[thread overview]
Message-ID: <20161114114503.GA9231@node.shutemov.name> (raw)
In-Reply-To: <1478561517-4317-6-git-send-email-n-horiguchi@ah.jp.nec.com>

On Tue, Nov 08, 2016 at 08:31:50AM +0900, Naoya Horiguchi wrote:
> This patch prepares thp migration's core code. These code will be open when
> unmap_and_move() stops unconditionally splitting thp and get_new_page() starts
> to allocate destination thps.
> 
> Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> ---
> ChangeLog v1 -> v2:
> - support pte-mapped thp, doubly-mapped thp
> ---
>  arch/x86/include/asm/pgtable_64.h |   2 +
>  include/linux/swapops.h           |  61 +++++++++++++++
>  mm/huge_memory.c                  | 154 ++++++++++++++++++++++++++++++++++++++
>  mm/migrate.c                      |  44 ++++++++++-
>  mm/pgtable-generic.c              |   3 +-
>  5 files changed, 262 insertions(+), 2 deletions(-)
> 
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/arch/x86/include/asm/pgtable_64.h v4.9-rc2-mmotm-2016-10-27-18-27_patched/arch/x86/include/asm/pgtable_64.h
> index 1cc82ec..3a1b48e 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/arch/x86/include/asm/pgtable_64.h
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/arch/x86/include/asm/pgtable_64.h
> @@ -167,7 +167,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
>  					 ((type) << (SWP_TYPE_FIRST_BIT)) \
>  					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
>  #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
> +#define __pmd_to_swp_entry(pte)		((swp_entry_t) { pmd_val((pmd)) })
>  #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
> +#define __swp_entry_to_pmd(x)		((pmd_t) { .pmd = (x).val })
>  
>  extern int kern_addr_valid(unsigned long addr);
>  extern void cleanup_highmap(void);
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/include/linux/swapops.h v4.9-rc2-mmotm-2016-10-27-18-27_patched/include/linux/swapops.h
> index 5c3a5f3..b6b22a2 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/include/linux/swapops.h
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/include/linux/swapops.h
> @@ -163,6 +163,67 @@ static inline int is_write_migration_entry(swp_entry_t entry)
>  
>  #endif
>  
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +extern void set_pmd_migration_entry(struct page *page,
> +		struct vm_area_struct *vma, unsigned long address);
> +
> +extern int remove_migration_pmd(struct page *new, pmd_t *pmd,
> +		struct vm_area_struct *vma, unsigned long addr, void *old);
> +
> +extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
> +
> +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
> +{
> +	swp_entry_t arch_entry;
> +
> +	arch_entry = __pmd_to_swp_entry(pmd);
> +	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
> +}
> +
> +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
> +{
> +	swp_entry_t arch_entry;
> +
> +	arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
> +	return __swp_entry_to_pmd(arch_entry);
> +}
> +
> +static inline int is_pmd_migration_entry(pmd_t pmd)
> +{
> +	return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
> +}
> +#else
> +static inline void set_pmd_migration_entry(struct page *page,
> +			struct vm_area_struct *vma, unsigned long address)
> +{

VM_BUG()? Or BUILD_BUG()?

> +}
> +
> +static inline int remove_migration_pmd(struct page *new, pmd_t *pmd,
> +		struct vm_area_struct *vma, unsigned long addr, void *old)
> +{
> +	return 0;

Ditto.

> +}
> +
> +static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { }
> +
> +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
> +{
> +	return swp_entry(0, 0);

Ditto.

> +}
> +
> +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
> +{
> +	pmd_t pmd = {};

Ditto.

> +	return pmd;
> +}
> +
> +static inline int is_pmd_migration_entry(pmd_t pmd)
> +{
> +	return 0;
> +}
> +#endif
> +
>  #ifdef CONFIG_MEMORY_FAILURE
>  
>  extern atomic_long_t num_poisoned_pages __read_mostly;
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/huge_memory.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/huge_memory.c
> index 0509d17..b3022b3 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/huge_memory.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/huge_memory.c
> @@ -2310,3 +2310,157 @@ static int __init split_huge_pages_debugfs(void)
>  }
>  late_initcall(split_huge_pages_debugfs);
>  #endif
> +
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +void set_pmd_migration_entry(struct page *page, struct vm_area_struct *vma,
> +				unsigned long addr)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	pgd_t *pgd;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	pmd_t pmdval;
> +	swp_entry_t entry;
> +	spinlock_t *ptl;
> +
> +	pgd = pgd_offset(mm, addr);
> +	if (!pgd_present(*pgd))
> +		return;
> +	pud = pud_offset(pgd, addr);
> +	if (!pud_present(*pud))
> +		return;
> +	pmd = pmd_offset(pud, addr);
> +	pmdval = *pmd;
> +	barrier();
> +	if (!pmd_present(pmdval))
> +		return;
> +
> +	mmu_notifier_invalidate_range_start(mm, addr, addr + HPAGE_PMD_SIZE);
> +	if (pmd_trans_huge(pmdval)) {
> +		pmd_t pmdswp;
> +
> +		ptl = pmd_lock(mm, pmd);
> +		if (!pmd_present(*pmd))
> +			goto unlock_pmd;
> +		if (unlikely(!pmd_trans_huge(*pmd)))
> +			goto unlock_pmd;

Just check *pmd == pmdval?

> +		if (pmd_page(*pmd) != page)
> +			goto unlock_pmd;
> +
> +		pmdval = pmdp_huge_get_and_clear(mm, addr, pmd);
> +		if (pmd_dirty(pmdval))
> +			set_page_dirty(page);
> +		entry = make_migration_entry(page, pmd_write(pmdval));
> +		pmdswp = swp_entry_to_pmd(entry);
> +		pmdswp = pmd_mkhuge(pmdswp);
> +		set_pmd_at(mm, addr, pmd, pmdswp);
> +		page_remove_rmap(page, true);
> +		put_page(page);
> +unlock_pmd:
> +		spin_unlock(ptl);
> +	} else { /* pte-mapped thp */
> +		pte_t *pte;
> +		pte_t pteval;
> +		struct page *tmp = compound_head(page);
> +		unsigned long address = addr & HPAGE_PMD_MASK;
> +		pte_t swp_pte;
> +		int i;
> +
> +		pte = pte_offset_map(pmd, address);
> +		ptl = pte_lockptr(mm, pmd);
> +		spin_lock(ptl);

pte_offset_map_lock() ?

> +		for (i = 0; i < HPAGE_PMD_NR; i++, pte++, tmp++) {
> +			if (!(pte_present(*pte) &&
> +			      page_to_pfn(tmp) == pte_pfn(*pte)))

			if (!pte_present(*pte) || pte_page(*pte) != tmp) ?

> +				continue;
> +			pteval = ptep_clear_flush(vma, address, pte);
> +			if (pte_dirty(pteval))
> +				set_page_dirty(tmp);
> +			entry = make_migration_entry(tmp, pte_write(pteval));
> +			swp_pte = swp_entry_to_pte(entry);
> +			set_pte_at(mm, address, pte, swp_pte);
> +			page_remove_rmap(tmp, false);
> +			put_page(tmp);
> +		}
> +		pte_unmap_unlock(pte, ptl);
> +	}
> +	mmu_notifier_invalidate_range_end(mm, addr, addr + HPAGE_PMD_SIZE);
> +	return;
> +}
> +
> +int remove_migration_pmd(struct page *new, pmd_t *pmd,
> +		struct vm_area_struct *vma, unsigned long addr, void *old)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	spinlock_t *ptl;
> +	pmd_t pmde;
> +	swp_entry_t entry;
> +
> +	pmde = *pmd;
> +	barrier();
> +
> +	if (!pmd_present(pmde)) {
> +		if (is_migration_entry(pmd_to_swp_entry(pmde))) {

		if (!is_migration_entry(pmd_to_swp_entry(pmde)))
			return SWAP_AGAIN;

And one level less indentation below.

> +			unsigned long mmun_start = addr & HPAGE_PMD_MASK;
> +			unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
> +
> +			ptl = pmd_lock(mm, pmd);
> +			entry = pmd_to_swp_entry(*pmd);
> +			if (migration_entry_to_page(entry) != old)
> +				goto unlock_ptl;
> +			get_page(new);
> +			pmde = pmd_mkold(mk_huge_pmd(new, vma->vm_page_prot));
> +			if (is_write_migration_entry(entry))
> +				pmde = maybe_pmd_mkwrite(pmde, vma);
> +			flush_cache_range(vma, mmun_start, mmun_end);
> +			page_add_anon_rmap(new, vma, mmun_start, true);
> +			pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
> +			set_pmd_at(mm, mmun_start, pmd, pmde);
> +			flush_tlb_range(vma, mmun_start, mmun_end);
> +			if (vma->vm_flags & VM_LOCKED)
> +				mlock_vma_page(new);
> +			update_mmu_cache_pmd(vma, addr, pmd);
> +unlock_ptl:
> +			spin_unlock(ptl);

			return SWAP_AGAIN;

And one level less indentation below.

> +		}
> +	} else { /* pte-mapped thp */
> +		pte_t *ptep;
> +		pte_t pte;
> +		int i;
> +		struct page *tmpnew = compound_head(new);
> +		struct page *tmpold = compound_head((struct page *)old);
> +		unsigned long address = addr & HPAGE_PMD_MASK;
> +
> +		ptep = pte_offset_map(pmd, addr);
> +		ptl = pte_lockptr(mm, pmd);
> +		spin_lock(ptl);

pte_offset_map_lock() ?

> +
> +		for (i = 0; i < HPAGE_PMD_NR;
> +		     i++, ptep++, tmpnew++, tmpold++, address += PAGE_SIZE) {
> +			pte = *ptep;
> +			if (!is_swap_pte(pte))
> +				continue;
> +			entry = pte_to_swp_entry(pte);
> +			if (!is_migration_entry(entry) ||
> +			    migration_entry_to_page(entry) != tmpold)
> +				continue;
> +			get_page(tmpnew);
> +			pte = pte_mkold(mk_pte(tmpnew,
> +					       READ_ONCE(vma->vm_page_prot)));

READ_ONCE()? Do we get here under mmap_sem, right?

> +			if (pte_swp_soft_dirty(*ptep))
> +				pte = pte_mksoft_dirty(pte);
> +			if (is_write_migration_entry(entry))
> +				pte = maybe_mkwrite(pte, vma);
> +			flush_dcache_page(tmpnew);
> +			set_pte_at(mm, address, ptep, pte);
> +			if (PageAnon(new))
> +				page_add_anon_rmap(tmpnew, vma, address, false);
> +			else
> +				page_add_file_rmap(tmpnew, false);
> +			update_mmu_cache(vma, address, ptep);
> +		}
> +		pte_unmap_unlock(ptep, ptl);
> +	}
> +	return SWAP_AGAIN;
> +}
> +#endif
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/migrate.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/migrate.c
> index 66ce6b4..54f2eb6 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/migrate.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/migrate.c
> @@ -198,6 +198,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
>  {
>  	struct mm_struct *mm = vma->vm_mm;
>  	swp_entry_t entry;
> +	pgd_t *pgd;
> +	pud_t *pud;
>   	pmd_t *pmd;
>  	pte_t *ptep, pte;
>   	spinlock_t *ptl;
> @@ -208,10 +210,29 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
>  			goto out;
>  		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
>  	} else {
> -		pmd = mm_find_pmd(mm, addr);
> +		pmd_t pmde;
> +
> +		pgd = pgd_offset(mm, addr);
> +		if (!pgd_present(*pgd))
> +			goto out;
> +		pud = pud_offset(pgd, addr);
> +		if (!pud_present(*pud))
> +			goto out;
> +		pmd = pmd_offset(pud, addr);
>  		if (!pmd)
>  			goto out;
>  
> +		if (PageTransCompound(new)) {
> +			remove_migration_pmd(new, pmd, vma, addr, old);
> +			goto out;
> +		}
> +
> +		pmde = *pmd;
> +		barrier();
> +
> +		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
> +			goto out;
> +
>  		ptep = pte_offset_map(pmd, addr);
>  
>  		/*
> @@ -344,6 +365,27 @@ void migration_entry_wait_huge(struct vm_area_struct *vma,
>  	__migration_entry_wait(mm, pte, ptl);
>  }
>  
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
> +{
> +	spinlock_t *ptl;
> +	struct page *page;
> +
> +	ptl = pmd_lock(mm, pmd);
> +	if (!is_pmd_migration_entry(*pmd))
> +		goto unlock;
> +	page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
> +	if (!get_page_unless_zero(page))
> +		goto unlock;
> +	spin_unlock(ptl);
> +	wait_on_page_locked(page);
> +	put_page(page);
> +	return;
> +unlock:
> +	spin_unlock(ptl);
> +}
> +#endif
> +
>  #ifdef CONFIG_BLOCK
>  /* Returns true if all buffers are successfully locked */
>  static bool buffer_migrate_lock_buffers(struct buffer_head *head,
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/pgtable-generic.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/pgtable-generic.c
> index 71c5f91..6012343 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/pgtable-generic.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/pgtable-generic.c
> @@ -118,7 +118,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
>  {
>  	pmd_t pmd;
>  	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
> -	VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
> +	VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
> +		  !pmd_devmap(*pmdp));
>  	pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
>  	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
>  	return pmd;
> -- 
> 2.7.0
> 

-- 
 Kirill A. Shutemov

  parent reply	other threads:[~2016-11-14 11:45 UTC|newest]

Thread overview: 141+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-07 23:31 [PATCH v2 00/12] mm: page migration enhancement for thp Naoya Horiguchi
2016-11-07 23:31 ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 01/12] mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit 6 Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-10 23:29   ` Dave Hansen
2016-11-10 23:29     ` Dave Hansen
2016-11-11  1:08     ` Naoya Horiguchi
2016-11-11  1:08       ` Naoya Horiguchi
2016-11-11 11:12     ` Kirill A. Shutemov
2016-11-11 11:12       ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 02/12] mm: mempolicy: add queue_pages_node_check() Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 03/12] mm: thp: introduce separate TTU flag for thp freezing Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-10  8:31   ` Anshuman Khandual
2016-11-10  8:31     ` Anshuman Khandual
2016-11-10  9:09     ` Naoya Horiguchi
2016-11-10  9:09       ` Naoya Horiguchi
2016-11-11  3:18       ` Anshuman Khandual
2016-11-11  3:18         ` Anshuman Khandual
2016-11-11 11:16   ` Kirill A. Shutemov
2016-11-11 11:16     ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 04/12] mm: thp: introduce CONFIG_ARCH_ENABLE_THP_MIGRATION Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-11 11:18   ` Kirill A. Shutemov
2016-11-11 11:18     ` Kirill A. Shutemov
2016-11-15  2:05     ` Naoya Horiguchi
2016-11-15  2:05       ` Naoya Horiguchi
2016-11-28 14:21   ` Michal Hocko
2016-11-28 14:21     ` Michal Hocko
2016-11-29  7:50     ` Naoya Horiguchi
2016-11-29  7:50       ` Naoya Horiguchi
2016-11-29  8:45       ` Michal Hocko
2016-11-29  8:45         ` Michal Hocko
2016-11-07 23:31 ` [PATCH v2 05/12] mm: thp: add core routines for thp/pmd migration Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-08  3:05   ` kbuild test robot
2016-11-08  3:05     ` kbuild test robot
2016-11-08  6:46     ` Naoya Horiguchi
2016-11-08  6:46       ` Naoya Horiguchi
2016-11-08  8:13   ` Anshuman Khandual
2016-11-08  8:13     ` Anshuman Khandual
2016-11-08  8:16     ` Naoya Horiguchi
2016-11-08  8:16       ` Naoya Horiguchi
2016-11-10  8:29   ` Anshuman Khandual
2016-11-10  8:29     ` Anshuman Khandual
2016-11-10  9:43     ` Naoya Horiguchi
2016-11-10  9:43       ` Naoya Horiguchi
2016-11-14 11:45   ` Kirill A. Shutemov [this message]
2016-11-14 11:45     ` Kirill A. Shutemov
2016-11-15  4:57     ` Naoya Horiguchi
2016-11-15  4:57       ` Naoya Horiguchi
2016-11-28 14:31   ` Michal Hocko
2016-11-28 14:31     ` Michal Hocko
2016-11-29  7:57     ` Naoya Horiguchi
2016-11-29  7:57       ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 06/12] mm: thp: enable thp migration in generic path Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-28 14:33   ` Michal Hocko
2016-11-28 14:33     ` Michal Hocko
2016-11-29  8:16     ` Naoya Horiguchi
2016-11-29  8:16       ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 07/12] mm: thp: check pmd migration entry in common path Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-08  0:23   ` kbuild test robot
2016-11-08  0:23     ` kbuild test robot
2016-11-08  1:22     ` Naoya Horiguchi
2016-11-08  1:22       ` Naoya Horiguchi
2016-11-10  8:36   ` Anshuman Khandual
2016-11-10  8:36     ` Anshuman Khandual
2016-11-10  9:12     ` Naoya Horiguchi
2016-11-10  9:12       ` Naoya Horiguchi
2016-11-10  9:08   ` Hillf Danton
2016-11-10  9:08     ` Hillf Danton
2016-11-10  9:21     ` Naoya Horiguchi
2016-11-10  9:21       ` Naoya Horiguchi
2016-11-10  9:28       ` Hillf Danton
2016-11-10  9:28         ` Hillf Danton
2016-11-10  9:34         ` Naoya Horiguchi
2016-11-10  9:34           ` Naoya Horiguchi
2016-11-17 23:56   ` Kirill A. Shutemov
2016-11-17 23:56     ` Kirill A. Shutemov
2016-11-29  6:46     ` Naoya Horiguchi
2016-11-29  6:46       ` Naoya Horiguchi
2016-11-28 14:35   ` Michal Hocko
2016-11-28 14:35     ` Michal Hocko
2016-11-07 23:31 ` [PATCH v2 08/12] mm: soft-dirty: keep soft-dirty bits over thp migration Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-10  8:38   ` Anshuman Khandual
2016-11-10  8:38     ` Anshuman Khandual
2016-11-10  9:13     ` Naoya Horiguchi
2016-11-10  9:13       ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 09/12] mm: hwpoison: soft offline supports " Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-10 10:31   ` Balbir Singh
2016-11-10 10:31     ` Balbir Singh
2016-11-10 23:58     ` Naoya Horiguchi
2016-11-10 23:58       ` Naoya Horiguchi
2016-11-14 23:22       ` Balbir Singh
2016-11-14 23:22         ` Balbir Singh
2016-11-07 23:31 ` [PATCH v2 10/12] mm: mempolicy: mbind and migrate_pages support " Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-25 12:27   ` Anshuman Khandual
2016-11-25 12:27     ` Anshuman Khandual
2016-11-29  7:07     ` Naoya Horiguchi
2016-11-29  7:07       ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 11/12] mm: migrate: move_pages() supports " Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-18  0:01   ` Kirill A. Shutemov
2016-11-18  0:01     ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 12/12] mm: memory_hotplug: memory hotremove " Naoya Horiguchi
2016-11-07 23:31   ` Naoya Horiguchi
2016-11-08  0:29   ` kbuild test robot
2016-11-08  0:29     ` kbuild test robot
2016-11-08  0:30   ` kbuild test robot
2016-11-08  0:30     ` kbuild test robot
2016-11-08  1:36     ` Naoya Horiguchi
2016-11-08  1:36       ` Naoya Horiguchi
2016-11-08  1:41       ` [PATCH] mm: fix unused variable warning Naoya Horiguchi
2016-11-08  1:41         ` Naoya Horiguchi
2016-11-09  2:32 ` [PATCH v2 00/12] mm: page migration enhancement for thp Balbir Singh
2016-11-09  2:32   ` Balbir Singh
2016-11-09  4:59   ` Naoya Horiguchi
2016-11-09  4:59     ` Naoya Horiguchi
2016-11-09 21:28     ` Balbir Singh
2016-11-09 21:28       ` Balbir Singh
2016-11-09 10:33 ` Anshuman Khandual
2016-11-09 10:33   ` Anshuman Khandual
2016-11-09 23:52   ` Naoya Horiguchi
2016-11-09 23:52     ` Naoya Horiguchi
2016-11-10 14:01     ` Zi Yan
2016-11-11  3:48       ` Anshuman Khandual
2016-11-11  3:48         ` Anshuman Khandual
2017-01-30 10:47 ` Anshuman Khandual
2017-01-30 10:47   ` Anshuman Khandual
2017-01-31 19:54   ` Zi Yan
2017-01-31 19:54     ` Zi Yan
2017-02-03  3:12     ` Hillf Danton
2017-02-03  3:12       ` Hillf Danton
2017-02-03  3:24       ` Zi Yan
2017-02-03  3:24         ` Zi Yan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161114114503.GA9231@node.shutemov.name \
    --to=kirill@shutemov.name \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=bsingharora@gmail.com \
    --cc=dave.hansen@intel.com \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@kernel.org \
    --cc=n-horiguchi@ah.jp.nec.com \
    --cc=nao.horiguchi@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=xemul@parallels.com \
    --cc=zi.yan@cs.rutgers.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.