All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov@virtuozzo.com>
To: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Hugh Dickins <hughd@google.com>,
	Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>,
	Sasha Levin <sasha.levin@oracle.com>,
	Minchan Kim <minchan@kernel.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCH 4/4] mm: prepare page_referenced() and page_idle to new THP refcounting
Date: Thu, 5 Nov 2015 15:07:26 +0300	[thread overview]
Message-ID: <20151105120726.GD29259@esperanza> (raw)
In-Reply-To: <20151105092459.GC7614@node.shutemov.name>

On Thu, Nov 05, 2015 at 11:24:59AM +0200, Kirill A. Shutemov wrote:
> On Thu, Nov 05, 2015 at 12:10:13PM +0300, Vladimir Davydov wrote:
> > On Tue, Nov 03, 2015 at 05:26:15PM +0200, Kirill A. Shutemov wrote:
> > ...
> > > @@ -56,23 +56,69 @@ static int page_idle_clear_pte_refs_one(struct page *page,
> > >  {
> > >  	struct mm_struct *mm = vma->vm_mm;
> > >  	spinlock_t *ptl;
> > > +	pgd_t *pgd;
> > > +	pud_t *pud;
> > >  	pmd_t *pmd;
> > >  	pte_t *pte;
> > >  	bool referenced = false;
> > >  
> > > -	if (unlikely(PageTransHuge(page))) {
> > > -		pmd = page_check_address_pmd(page, mm, addr, &ptl);
> > > -		if (pmd) {
> > > -			referenced = pmdp_clear_young_notify(vma, addr, pmd);
> > > +	pgd = pgd_offset(mm, addr);
> > > +	if (!pgd_present(*pgd))
> > > +		return SWAP_AGAIN;
> > > +	pud = pud_offset(pgd, addr);
> > > +	if (!pud_present(*pud))
> > > +		return SWAP_AGAIN;
> > > +	pmd = pmd_offset(pud, addr);
> > > +
> > > +	if (pmd_trans_huge(*pmd)) {
> > > +		ptl = pmd_lock(mm, pmd);
> > > +                if (!pmd_present(*pmd))
> > > +			goto unlock_pmd;
> > > +		if (unlikely(!pmd_trans_huge(*pmd))) {
> > >  			spin_unlock(ptl);
> > > +			goto map_pte;
> > >  		}
> > > +
> > > +		if (pmd_page(*pmd) != page)
> > > +			goto unlock_pmd;
> > > +
> > > +		referenced = pmdp_clear_young_notify(vma, addr, pmd);
> > > +		spin_unlock(ptl);
> > > +		goto found;
> > > +unlock_pmd:
> > > +		spin_unlock(ptl);
> > > +		return SWAP_AGAIN;
> > >  	} else {
> > > -		pte = page_check_address(page, mm, addr, &ptl, 0);
> > > -		if (pte) {
> > > -			referenced = ptep_clear_young_notify(vma, addr, pte);
> > > -			pte_unmap_unlock(pte, ptl);
> > > -		}
> > > +		pmd_t pmde = *pmd;
> > > +		barrier();
> > > +		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
> > > +			return SWAP_AGAIN;
> > > +
> > > +	}
> > > +map_pte:
> > > +	pte = pte_offset_map(pmd, addr);
> > > +	if (!pte_present(*pte)) {
> > > +		pte_unmap(pte);
> > > +		return SWAP_AGAIN;
> > >  	}
> > > +
> > > +	ptl = pte_lockptr(mm, pmd);
> > > +	spin_lock(ptl);
> > > +
> > > +	if (!pte_present(*pte)) {
> > > +		pte_unmap_unlock(pte, ptl);
> > > +		return SWAP_AGAIN;
> > > +	}
> > > +
> > > +	/* THP can be referenced by any subpage */
> > > +	if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
> > > +		pte_unmap_unlock(pte, ptl);
> > > +		return SWAP_AGAIN;
> > > +	}
> > > +
> > > +	referenced = ptep_clear_young_notify(vma, addr, pte);
> > > +	pte_unmap_unlock(pte, ptl);
> > > +found:
> > 
> > Can't we hide this stuff in a helper function, which would be used by
> > both page_referenced_one and page_idle_clear_pte_refs_one, instead of
> > duplicating page_referenced_one code here?
> 
> I would like to, but there's no obvious way to do that: PMDs and PTEs
> require different handling.
> 
> Any ideas?

Something like this? [COMPLETELY UNTESTED]
---
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 853f4f3c6742..bb9169d07c2b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -216,6 +216,10 @@ static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm,
 	return ptep;
 }
 
+pte_t *page_check_address_transhuge(struct page *page, struct mm_struct *mm,
+				    unsigned long address,
+				    pmd_t **pmdp, spinlock_t **ptlp);
+
 /*
  * Used by swapoff to help locate where page is expected in vma.
  */
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 2c9ebe12b40d..6574ef6a1a96 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -56,69 +56,21 @@ static int page_idle_clear_pte_refs_one(struct page *page,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
-	pgd_t *pgd;
-	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	bool referenced = false;
 
-	pgd = pgd_offset(mm, addr);
-	if (!pgd_present(*pgd))
+	pte = page_check_address_transhuge(page, mm, address, &pmd, &ptl);
+	if (!pte)
 		return SWAP_AGAIN;
-	pud = pud_offset(pgd, addr);
-	if (!pud_present(*pud))
-		return SWAP_AGAIN;
-	pmd = pmd_offset(pud, addr);
-
-	if (pmd_trans_huge(*pmd)) {
-		ptl = pmd_lock(mm, pmd);
-                if (!pmd_present(*pmd))
-			goto unlock_pmd;
-		if (unlikely(!pmd_trans_huge(*pmd))) {
-			spin_unlock(ptl);
-			goto map_pte;
-		}
 
-		if (pmd_page(*pmd) != page)
-			goto unlock_pmd;
-
-		referenced = pmdp_clear_young_notify(vma, addr, pmd);
-		spin_unlock(ptl);
-		goto found;
-unlock_pmd:
-		spin_unlock(ptl);
-		return SWAP_AGAIN;
-	} else {
-		pmd_t pmde = *pmd;
-		barrier();
-		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-			return SWAP_AGAIN;
-
-	}
-map_pte:
-	pte = pte_offset_map(pmd, addr);
-	if (!pte_present(*pte)) {
-		pte_unmap(pte);
-		return SWAP_AGAIN;
-	}
+	if (pte == pmd) /* trans huge */
+		referenced = pmdp_clear_young_notify(vma, address, pmd);
+	else
+		referenced = ptep_clear_young_notify(vma, addr, pte);
 
-	ptl = pte_lockptr(mm, pmd);
-	spin_lock(ptl);
-
-	if (!pte_present(*pte)) {
-		pte_unmap_unlock(pte, ptl);
-		return SWAP_AGAIN;
-	}
-
-	/* THP can be referenced by any subpage */
-	if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
-		pte_unmap_unlock(pte, ptl);
-		return SWAP_AGAIN;
-	}
-
-	referenced = ptep_clear_young_notify(vma, addr, pte);
 	pte_unmap_unlock(pte, ptl);
-found:
+
 	if (referenced) {
 		clear_page_idle(page);
 		/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 1f90bda685b6..3638190cf7bc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -796,48 +796,35 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 	return 1;
 }
 
-struct page_referenced_arg {
-	int mapcount;
-	int referenced;
-	unsigned long vm_flags;
-	struct mem_cgroup *memcg;
-};
-/*
- * arg: page_referenced_arg will be passed
- */
-static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
-			unsigned long address, void *arg)
+pte_t *page_check_address_transhuge(struct page *page, struct mm_struct *mm,
+				    unsigned long address,
+				    pmd_t **pmdp, spinlock_t **ptlp)
 {
-	struct mm_struct *mm = vma->vm_mm;
-	spinlock_t *ptl;
-	int referenced = 0;
-	struct page_referenced_arg *pra = arg;
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
+	spinlock_t *ptl;
 
 	if (unlikely(PageHuge(page))) {
 		/* when pud is not present, pte will be NULL */
 		pte = huge_pte_offset(mm, address);
 		if (!pte)
-			return SWAP_AGAIN;
+			return NULL;
 
 		ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
+		pmd = NULL;
 		goto check_pte;
 	}
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
-		return SWAP_AGAIN;
-	pud = pud_offset(pgd, address);
+		return NULL;
 	if (!pud_present(*pud))
-		return SWAP_AGAIN;
+		return NULL;
 	pmd = pmd_offset(pud, address);
 
 	if (pmd_trans_huge(*pmd)) {
-		int ret = SWAP_AGAIN;
-
 		ptl = pmd_lock(mm, pmd);
 		if (!pmd_present(*pmd))
 			goto unlock_pmd;
@@ -849,30 +836,23 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 		if (pmd_page(*pmd) != page)
 			goto unlock_pmd;
 
-		if (vma->vm_flags & VM_LOCKED) {
-			pra->vm_flags |= VM_LOCKED;
-			ret = SWAP_FAIL; /* To break the loop */
-			goto unlock_pmd;
-		}
-
-		if (pmdp_clear_flush_young_notify(vma, address, pmd))
-			referenced++;
-		spin_unlock(ptl);
+		pte = (pte_t *)pmd;
 		goto found;
 unlock_pmd:
 		spin_unlock(ptl);
-		return ret;
+		return NULL;
 	} else {
 		pmd_t pmde = *pmd;
 		barrier();
 		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-			return SWAP_AGAIN;
+			return NULL;
 	}
+
 map_pte:
 	pte = pte_offset_map(pmd, address);
 	if (!pte_present(*pte)) {
 		pte_unmap(pte);
-		return SWAP_AGAIN;
+		return NULL;
 	}
 
 	ptl = pte_lockptr(mm, pmd);
@@ -881,35 +861,66 @@ check_pte:
 
 	if (!pte_present(*pte)) {
 		pte_unmap_unlock(pte, ptl);
-		return SWAP_AGAIN;
+		return NULL;
 	}
 
 	/* THP can be referenced by any subpage */
 	if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
 		pte_unmap_unlock(pte, ptl);
-		return SWAP_AGAIN;
+		return NULL;
 	}
+found:
+	*ptlp = ptl;
+	*pmdp = pmd;
+	return pte;
+}
+
+struct page_referenced_arg {
+	int mapcount;
+	int referenced;
+	unsigned long vm_flags;
+	struct mem_cgroup *memcg;
+};
+/*
+ * arg: page_referenced_arg will be passed
+ */
+static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+			unsigned long address, void *arg)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int referenced = 0;
+	struct page_referenced_arg *pra = arg;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = page_check_address_transhuge(page, mm, address, &pmd, &ptl);
+	if (!pte)
+		return SWAP_AGAIN;
 
 	if (vma->vm_flags & VM_LOCKED) {
 		pte_unmap_unlock(pte, ptl);
-		pra->vm_flags |= VM_LOCKED;
 		return SWAP_FAIL; /* To break the loop */
 	}
 
-	if (ptep_clear_flush_young_notify(vma, address, pte)) {
-		/*
-		 * Don't treat a reference through a sequentially read
-		 * mapping as such.  If the page has been used in
-		 * another mapping, we will catch it; if this other
-		 * mapping is already gone, the unmap path will have
-		 * set PG_referenced or activated the page.
-		 */
-		if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+	if (pte == pmd) { /* trans huge */
+		if (pmdp_clear_flush_young_notify(vma, address, pmd))
 			referenced++;
+	} else {
+		if (ptep_clear_flush_young_notify(vma, address, pte)) {
+			/*
+			 * Don't treat a reference through a sequentially read
+			 * mapping as such.  If the page has been used in
+			 * another mapping, we will catch it; if this other
+			 * mapping is already gone, the unmap path will have
+			 * set PG_referenced or activated the page.
+			 */
+			if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+				referenced++;
+		}
 	}
 	pte_unmap_unlock(pte, ptl);
 
-found:
 	if (referenced)
 		clear_page_idle(page);
 	if (test_and_clear_page_young(page))

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Vladimir Davydov <vdavydov@virtuozzo.com>
To: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Hugh Dickins <hughd@google.com>,
	Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>,
	Sasha Levin <sasha.levin@oracle.com>,
	Minchan Kim <minchan@kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-mm@kvack.org>
Subject: Re: [PATCH 4/4] mm: prepare page_referenced() and page_idle to new THP refcounting
Date: Thu, 5 Nov 2015 15:07:26 +0300	[thread overview]
Message-ID: <20151105120726.GD29259@esperanza> (raw)
In-Reply-To: <20151105092459.GC7614@node.shutemov.name>

On Thu, Nov 05, 2015 at 11:24:59AM +0200, Kirill A. Shutemov wrote:
> On Thu, Nov 05, 2015 at 12:10:13PM +0300, Vladimir Davydov wrote:
> > On Tue, Nov 03, 2015 at 05:26:15PM +0200, Kirill A. Shutemov wrote:
> > ...
> > > @@ -56,23 +56,69 @@ static int page_idle_clear_pte_refs_one(struct page *page,
> > >  {
> > >  	struct mm_struct *mm = vma->vm_mm;
> > >  	spinlock_t *ptl;
> > > +	pgd_t *pgd;
> > > +	pud_t *pud;
> > >  	pmd_t *pmd;
> > >  	pte_t *pte;
> > >  	bool referenced = false;
> > >  
> > > -	if (unlikely(PageTransHuge(page))) {
> > > -		pmd = page_check_address_pmd(page, mm, addr, &ptl);
> > > -		if (pmd) {
> > > -			referenced = pmdp_clear_young_notify(vma, addr, pmd);
> > > +	pgd = pgd_offset(mm, addr);
> > > +	if (!pgd_present(*pgd))
> > > +		return SWAP_AGAIN;
> > > +	pud = pud_offset(pgd, addr);
> > > +	if (!pud_present(*pud))
> > > +		return SWAP_AGAIN;
> > > +	pmd = pmd_offset(pud, addr);
> > > +
> > > +	if (pmd_trans_huge(*pmd)) {
> > > +		ptl = pmd_lock(mm, pmd);
> > > +                if (!pmd_present(*pmd))
> > > +			goto unlock_pmd;
> > > +		if (unlikely(!pmd_trans_huge(*pmd))) {
> > >  			spin_unlock(ptl);
> > > +			goto map_pte;
> > >  		}
> > > +
> > > +		if (pmd_page(*pmd) != page)
> > > +			goto unlock_pmd;
> > > +
> > > +		referenced = pmdp_clear_young_notify(vma, addr, pmd);
> > > +		spin_unlock(ptl);
> > > +		goto found;
> > > +unlock_pmd:
> > > +		spin_unlock(ptl);
> > > +		return SWAP_AGAIN;
> > >  	} else {
> > > -		pte = page_check_address(page, mm, addr, &ptl, 0);
> > > -		if (pte) {
> > > -			referenced = ptep_clear_young_notify(vma, addr, pte);
> > > -			pte_unmap_unlock(pte, ptl);
> > > -		}
> > > +		pmd_t pmde = *pmd;
> > > +		barrier();
> > > +		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
> > > +			return SWAP_AGAIN;
> > > +
> > > +	}
> > > +map_pte:
> > > +	pte = pte_offset_map(pmd, addr);
> > > +	if (!pte_present(*pte)) {
> > > +		pte_unmap(pte);
> > > +		return SWAP_AGAIN;
> > >  	}
> > > +
> > > +	ptl = pte_lockptr(mm, pmd);
> > > +	spin_lock(ptl);
> > > +
> > > +	if (!pte_present(*pte)) {
> > > +		pte_unmap_unlock(pte, ptl);
> > > +		return SWAP_AGAIN;
> > > +	}
> > > +
> > > +	/* THP can be referenced by any subpage */
> > > +	if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
> > > +		pte_unmap_unlock(pte, ptl);
> > > +		return SWAP_AGAIN;
> > > +	}
> > > +
> > > +	referenced = ptep_clear_young_notify(vma, addr, pte);
> > > +	pte_unmap_unlock(pte, ptl);
> > > +found:
> > 
> > Can't we hide this stuff in a helper function, which would be used by
> > both page_referenced_one and page_idle_clear_pte_refs_one, instead of
> > duplicating page_referenced_one code here?
> 
> I would like to, but there's no obvious way to do that: PMDs and PTEs
> require different handling.
> 
> Any ideas?

Something like this? [COMPLETELY UNTESTED]
---
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 853f4f3c6742..bb9169d07c2b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -216,6 +216,10 @@ static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm,
 	return ptep;
 }
 
+pte_t *page_check_address_transhuge(struct page *page, struct mm_struct *mm,
+				    unsigned long address,
+				    pmd_t **pmdp, spinlock_t **ptlp);
+
 /*
  * Used by swapoff to help locate where page is expected in vma.
  */
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 2c9ebe12b40d..6574ef6a1a96 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -56,69 +56,21 @@ static int page_idle_clear_pte_refs_one(struct page *page,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
-	pgd_t *pgd;
-	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	bool referenced = false;
 
-	pgd = pgd_offset(mm, addr);
-	if (!pgd_present(*pgd))
+	pte = page_check_address_transhuge(page, mm, address, &pmd, &ptl);
+	if (!pte)
 		return SWAP_AGAIN;
-	pud = pud_offset(pgd, addr);
-	if (!pud_present(*pud))
-		return SWAP_AGAIN;
-	pmd = pmd_offset(pud, addr);
-
-	if (pmd_trans_huge(*pmd)) {
-		ptl = pmd_lock(mm, pmd);
-                if (!pmd_present(*pmd))
-			goto unlock_pmd;
-		if (unlikely(!pmd_trans_huge(*pmd))) {
-			spin_unlock(ptl);
-			goto map_pte;
-		}
 
-		if (pmd_page(*pmd) != page)
-			goto unlock_pmd;
-
-		referenced = pmdp_clear_young_notify(vma, addr, pmd);
-		spin_unlock(ptl);
-		goto found;
-unlock_pmd:
-		spin_unlock(ptl);
-		return SWAP_AGAIN;
-	} else {
-		pmd_t pmde = *pmd;
-		barrier();
-		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-			return SWAP_AGAIN;
-
-	}
-map_pte:
-	pte = pte_offset_map(pmd, addr);
-	if (!pte_present(*pte)) {
-		pte_unmap(pte);
-		return SWAP_AGAIN;
-	}
+	if (pte == pmd) /* trans huge */
+		referenced = pmdp_clear_young_notify(vma, address, pmd);
+	else
+		referenced = ptep_clear_young_notify(vma, addr, pte);
 
-	ptl = pte_lockptr(mm, pmd);
-	spin_lock(ptl);
-
-	if (!pte_present(*pte)) {
-		pte_unmap_unlock(pte, ptl);
-		return SWAP_AGAIN;
-	}
-
-	/* THP can be referenced by any subpage */
-	if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
-		pte_unmap_unlock(pte, ptl);
-		return SWAP_AGAIN;
-	}
-
-	referenced = ptep_clear_young_notify(vma, addr, pte);
 	pte_unmap_unlock(pte, ptl);
-found:
+
 	if (referenced) {
 		clear_page_idle(page);
 		/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 1f90bda685b6..3638190cf7bc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -796,48 +796,35 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 	return 1;
 }
 
-struct page_referenced_arg {
-	int mapcount;
-	int referenced;
-	unsigned long vm_flags;
-	struct mem_cgroup *memcg;
-};
-/*
- * arg: page_referenced_arg will be passed
- */
-static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
-			unsigned long address, void *arg)
+pte_t *page_check_address_transhuge(struct page *page, struct mm_struct *mm,
+				    unsigned long address,
+				    pmd_t **pmdp, spinlock_t **ptlp)
 {
-	struct mm_struct *mm = vma->vm_mm;
-	spinlock_t *ptl;
-	int referenced = 0;
-	struct page_referenced_arg *pra = arg;
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
+	spinlock_t *ptl;
 
 	if (unlikely(PageHuge(page))) {
 		/* when pud is not present, pte will be NULL */
 		pte = huge_pte_offset(mm, address);
 		if (!pte)
-			return SWAP_AGAIN;
+			return NULL;
 
 		ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
+		pmd = NULL;
 		goto check_pte;
 	}
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
-		return SWAP_AGAIN;
-	pud = pud_offset(pgd, address);
+		return NULL;
 	if (!pud_present(*pud))
-		return SWAP_AGAIN;
+		return NULL;
 	pmd = pmd_offset(pud, address);
 
 	if (pmd_trans_huge(*pmd)) {
-		int ret = SWAP_AGAIN;
-
 		ptl = pmd_lock(mm, pmd);
 		if (!pmd_present(*pmd))
 			goto unlock_pmd;
@@ -849,30 +836,23 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 		if (pmd_page(*pmd) != page)
 			goto unlock_pmd;
 
-		if (vma->vm_flags & VM_LOCKED) {
-			pra->vm_flags |= VM_LOCKED;
-			ret = SWAP_FAIL; /* To break the loop */
-			goto unlock_pmd;
-		}
-
-		if (pmdp_clear_flush_young_notify(vma, address, pmd))
-			referenced++;
-		spin_unlock(ptl);
+		pte = (pte_t *)pmd;
 		goto found;
 unlock_pmd:
 		spin_unlock(ptl);
-		return ret;
+		return NULL;
 	} else {
 		pmd_t pmde = *pmd;
 		barrier();
 		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-			return SWAP_AGAIN;
+			return NULL;
 	}
+
 map_pte:
 	pte = pte_offset_map(pmd, address);
 	if (!pte_present(*pte)) {
 		pte_unmap(pte);
-		return SWAP_AGAIN;
+		return NULL;
 	}
 
 	ptl = pte_lockptr(mm, pmd);
@@ -881,35 +861,66 @@ check_pte:
 
 	if (!pte_present(*pte)) {
 		pte_unmap_unlock(pte, ptl);
-		return SWAP_AGAIN;
+		return NULL;
 	}
 
 	/* THP can be referenced by any subpage */
 	if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
 		pte_unmap_unlock(pte, ptl);
-		return SWAP_AGAIN;
+		return NULL;
 	}
+found:
+	*ptlp = ptl;
+	*pmdp = pmd;
+	return pte;
+}
+
+struct page_referenced_arg {
+	int mapcount;
+	int referenced;
+	unsigned long vm_flags;
+	struct mem_cgroup *memcg;
+};
+/*
+ * arg: page_referenced_arg will be passed
+ */
+static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+			unsigned long address, void *arg)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int referenced = 0;
+	struct page_referenced_arg *pra = arg;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = page_check_address_transhuge(page, mm, address, &pmd, &ptl);
+	if (!pte)
+		return SWAP_AGAIN;
 
 	if (vma->vm_flags & VM_LOCKED) {
 		pte_unmap_unlock(pte, ptl);
-		pra->vm_flags |= VM_LOCKED;
 		return SWAP_FAIL; /* To break the loop */
 	}
 
-	if (ptep_clear_flush_young_notify(vma, address, pte)) {
-		/*
-		 * Don't treat a reference through a sequentially read
-		 * mapping as such.  If the page has been used in
-		 * another mapping, we will catch it; if this other
-		 * mapping is already gone, the unmap path will have
-		 * set PG_referenced or activated the page.
-		 */
-		if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+	if (pte == pmd) { /* trans huge */
+		if (pmdp_clear_flush_young_notify(vma, address, pmd))
 			referenced++;
+	} else {
+		if (ptep_clear_flush_young_notify(vma, address, pte)) {
+			/*
+			 * Don't treat a reference through a sequentially read
+			 * mapping as such.  If the page has been used in
+			 * another mapping, we will catch it; if this other
+			 * mapping is already gone, the unmap path will have
+			 * set PG_referenced or activated the page.
+			 */
+			if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+				referenced++;
+		}
 	}
 	pte_unmap_unlock(pte, ptl);
 
-found:
 	if (referenced)
 		clear_page_idle(page);
 	if (test_and_clear_page_young(page))

  reply	other threads:[~2015-11-05 12:07 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-03 15:26 [PATCH 0/4] Bugfixes for THP refcounting Kirill A. Shutemov
2015-11-03 15:26 ` Kirill A. Shutemov
2015-11-03 15:26 ` [PATCH 1/4] mm: do not crash on PageDoubleMap() for non-head pages Kirill A. Shutemov
2015-11-03 15:26   ` Kirill A. Shutemov
2015-11-03 15:26 ` [PATCH 2/4] mm: duplicate rmap reference for hugetlb pages as compound Kirill A. Shutemov
2015-11-03 15:26   ` Kirill A. Shutemov
2015-11-03 15:26 ` [PATCH 3/4] thp: fix split vs. unmap race Kirill A. Shutemov
2015-11-03 15:26   ` Kirill A. Shutemov
2015-11-03 15:26 ` [PATCH 4/4] mm: prepare page_referenced() and page_idle to new THP refcounting Kirill A. Shutemov
2015-11-03 15:26   ` Kirill A. Shutemov
2015-11-05  9:10   ` Vladimir Davydov
2015-11-05  9:10     ` Vladimir Davydov
2015-11-05  9:24     ` Kirill A. Shutemov
2015-11-05  9:24       ` Kirill A. Shutemov
2015-11-05 12:07       ` Vladimir Davydov [this message]
2015-11-05 12:07         ` Vladimir Davydov
2015-11-05 12:36         ` Kirill A. Shutemov
2015-11-05 12:36           ` Kirill A. Shutemov
2015-11-05 12:53           ` Vladimir Davydov
2015-11-05 12:53             ` Vladimir Davydov
2015-11-05 12:58             ` Kirill A. Shutemov
2015-11-05 12:58               ` Kirill A. Shutemov
2015-11-05 16:31               ` Vladimir Davydov
2015-11-05 16:31                 ` Vladimir Davydov
2015-11-06 14:37               ` [PATCH] mm: add page_check_address_transhuge helper Vladimir Davydov
2015-11-06 14:37                 ` Vladimir Davydov
2015-11-06 15:24                 ` Kirill A. Shutemov
2015-11-06 15:24                   ` Kirill A. Shutemov
2015-11-05 16:03   ` [PATCH 4/4] mm: prepare page_referenced() and page_idle to new THP refcounting Vladimir Davydov
2015-11-05 16:03     ` Vladimir Davydov
2015-11-05 17:27     ` Kirill A. Shutemov
2015-11-05 17:27       ` Kirill A. Shutemov
2015-11-06  0:32   ` Andrew Morton
2015-11-06  0:32     ` Andrew Morton
2015-11-06 10:29     ` Kirill A. Shutemov
2015-11-06 10:29       ` Kirill A. Shutemov
2015-11-06 22:39       ` Andrew Morton
2015-11-06 22:39         ` Andrew Morton
2015-11-08 23:40         ` Kirill A. Shutemov
2015-11-08 23:40           ` Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151105120726.GD29259@esperanza \
    --to=vdavydov@virtuozzo.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=n-horiguchi@ah.jp.nec.com \
    --cc=sasha.levin@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.