linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: dan.j.williams@intel.com, willy@infradead.org, jack@suse.cz,
	viro@zeniv.linux.org.uk, akpm@linux-foundation.org,
	apopple@nvidia.com, shy828301@gmail.com, rcampbell@nvidia.com,
	hughd@google.com, xiyuyang19@fudan.edu.cn,
	kirill.shutemov@linux.intel.com, zwisler@kernel.org
Cc: linux-fsdevel@vger.kernel.org, nvdimm@lists.linux.dev,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH 3/5] mm: page_vma_mapped: support checking if a pfn is mapped into a vma
Date: Fri, 21 Jan 2022 15:55:13 +0800	[thread overview]
Message-ID: <20220121075515.79311-3-songmuchun@bytedance.com> (raw)
In-Reply-To: <20220121075515.79311-1-songmuchun@bytedance.com>

page_vma_mapped_walk() is supposed to check if a page is mapped into a vma.
However, not all page frames (e.g. PFN_DEV) have a associated struct page
with it. There is going to be some duplicate codes similar with this function
if someone want to check if a pfn (without a struct page) is mapped into a
vma. So add support for checking if a pfn is mapped into a vma. In the next
patch, the dax will use this new feature.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 include/linux/rmap.h | 13 +++++++++--
 mm/internal.h        | 25 +++++++++++++-------
 mm/page_vma_mapped.c | 65 +++++++++++++++++++++++++++++++++-------------------
 3 files changed, 70 insertions(+), 33 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 221c3c6438a7..7628474732e7 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -204,9 +204,18 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
 #define PVMW_SYNC		(1 << 0)
 /* Look for migarion entries rather than present PTEs */
 #define PVMW_MIGRATION		(1 << 1)
+/* Walk the page table by checking the pfn instead of a struct page */
+#define PVMW_PFN_WALK		(1 << 2)
 
 struct page_vma_mapped_walk {
-	struct page *page;
+	union {
+		struct page *page;
+		struct {
+			unsigned long pfn;
+			unsigned int nr;
+			pgoff_t index;
+		};
+	};
 	struct vm_area_struct *vma;
 	unsigned long address;
 	pmd_t *pmd;
@@ -218,7 +227,7 @@ struct page_vma_mapped_walk {
 static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
 {
 	/* HugeTLB pte is set to the relevant page table entry without pte_mapped. */
-	if (pvmw->pte && !PageHuge(pvmw->page))
+	if (pvmw->pte && ((pvmw->flags & PVMW_PFN_WALK) || !PageHuge(pvmw->page)))
 		pte_unmap(pvmw->pte);
 	if (pvmw->ptl)
 		spin_unlock(pvmw->ptl);
diff --git a/mm/internal.h b/mm/internal.h
index deb9bda18e59..d6e3e8e1be2d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -478,25 +478,34 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 }
 
 /*
- * Then at what user virtual address will none of the page be found in vma?
- * Assumes that vma_address() already returned a good starting address.
- * If page is a compound head, the entire compound page is considered.
+ * Return the end of user virtual address at the specific offset within
+ * a vma.
  */
 static inline unsigned long
-vma_address_end(struct page *page, struct vm_area_struct *vma)
+vma_pgoff_address_end(pgoff_t pgoff, unsigned long nr_pages,
+		      struct vm_area_struct *vma)
 {
-	pgoff_t pgoff;
 	unsigned long address;
 
-	VM_BUG_ON_PAGE(PageKsm(page), page);	/* KSM page->index unusable */
-	pgoff = page_to_pgoff(page) + compound_nr(page);
-	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	address = vma->vm_start + ((pgoff + nr_pages - vma->vm_pgoff) << PAGE_SHIFT);
 	/* Check for address beyond vma (or wrapped through 0?) */
 	if (address < vma->vm_start || address > vma->vm_end)
 		address = vma->vm_end;
 	return address;
 }
 
+/*
+ * Then at what user virtual address will none of the page be found in vma?
+ * Assumes that vma_address() already returned a good starting address.
+ * If page is a compound head, the entire compound page is considered.
+ */
+static inline unsigned long
+vma_address_end(struct page *page, struct vm_area_struct *vma)
+{
+	VM_BUG_ON_PAGE(PageKsm(page), page);	/* KSM page->index unusable */
+	return vma_pgoff_address_end(page_to_pgoff(page), compound_nr(page), vma);
+}
+
 static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
 						    struct file *fpin)
 {
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index f7b331081791..c8819770d457 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -53,10 +53,16 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
 	return true;
 }
 
-static inline bool pfn_is_match(struct page *page, unsigned long pfn)
+static inline bool pfn_is_match(struct page_vma_mapped_walk *pvmw, unsigned long pfn)
 {
-	unsigned long page_pfn = page_to_pfn(page);
+	struct page *page;
+	unsigned long page_pfn;
 
+	if (pvmw->flags & PVMW_PFN_WALK)
+		return pfn >= pvmw->pfn && pfn - pvmw->pfn < pvmw->nr;
+
+	page = pvmw->page;
+	page_pfn = page_to_pfn(page);
 	/* normal page and hugetlbfs page */
 	if (!PageTransCompound(page) || PageHuge(page))
 		return page_pfn == pfn;
@@ -116,7 +122,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
 		pfn = pte_pfn(*pvmw->pte);
 	}
 
-	return pfn_is_match(pvmw->page, pfn);
+	return pfn_is_match(pvmw, pfn);
 }
 
 static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
@@ -127,24 +133,24 @@ static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
 }
 
 /**
- * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
- * @pvmw->address
- * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
- * must be set. pmd, pte and ptl must be NULL.
+ * page_vma_mapped_walk - check if @pvmw->page or @pvmw->pfn is mapped in
+ * @pvmw->vma at @pvmw->address
+ * @pvmw: pointer to struct page_vma_mapped_walk. page (or pfn and nr and
+ * index), vma, address and flags must be set. pmd, pte and ptl must be NULL.
  *
- * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
- * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
- * adjusted if needed (for PTE-mapped THPs).
+ * Returns true if the page or pfn is mapped in the vma. @pvmw->pmd and
+ * @pvmw->pte point to relevant page table entries. @pvmw->ptl is locked.
+ * @pvmw->address is adjusted if needed (for PTE-mapped THPs).
  *
  * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
- * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
- * a loop to find all PTEs that map the THP.
+ * (usually THP or Huge DEVMAP). For PMD-mapped page, you should run
+ * page_vma_mapped_walk() in a loop to find all PTEs that map the huge page.
  *
  * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
  * regardless of which page table level the page is mapped at. @pvmw->pmd is
  * NULL.
  *
- * Returns false if there are no more page table entries for the page in
+ * Returns false if there are no more page table entries for the page or pfn in
  * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
  *
  * If you need to stop the walk before page_vma_mapped_walk() returned false,
@@ -153,18 +159,27 @@ static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 {
 	struct mm_struct *mm = pvmw->vma->vm_mm;
-	struct page *page = pvmw->page;
+	struct page *page;
 	unsigned long end;
+	unsigned long pfn;
 	pgd_t *pgd;
 	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t pmde;
 
+	if (pvmw->flags & PVMW_PFN_WALK) {
+		page = NULL;
+		pfn = pvmw->pfn;
+	} else {
+		page = pvmw->page;
+		pfn = page_to_pfn(page);
+	}
+
 	/* The only possible pmd mapping has been handled on last iteration */
 	if (pvmw->pmd && !pvmw->pte)
 		return not_found(pvmw);
 
-	if (unlikely(PageHuge(page))) {
+	if (unlikely(page && PageHuge(page))) {
 		/* The only possible mapping was handled on last iteration */
 		if (pvmw->pte)
 			return not_found(pvmw);
@@ -187,9 +202,13 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	 * any PageKsm page: whose page->index misleads vma_address()
 	 * and vma_address_end() to disaster.
 	 */
-	end = PageTransCompound(page) ?
-		vma_address_end(page, pvmw->vma) :
-		pvmw->address + PAGE_SIZE;
+	if (page)
+		end = PageTransCompound(page) ?
+		      vma_address_end(page, pvmw->vma) :
+		      pvmw->address + PAGE_SIZE;
+	else
+		end = vma_pgoff_address_end(pvmw->index, pvmw->nr, pvmw->vma);
+
 	if (pvmw->pte)
 		goto next_pte;
 restart:
@@ -218,13 +237,13 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 		 */
 		pmde = READ_ONCE(*pvmw->pmd);
 
-		if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+		if (pmd_leaf(pmde) || is_pmd_migration_entry(pmde)) {
 			pvmw->ptl = pmd_lock(mm, pvmw->pmd);
 			pmde = *pvmw->pmd;
-			if (likely(pmd_trans_huge(pmde))) {
+			if (likely(pmd_leaf(pmde))) {
 				if (pvmw->flags & PVMW_MIGRATION)
 					return not_found(pvmw);
-				if (pmd_page(pmde) != page)
+				if (pmd_pfn(pmde) != pfn)
 					return not_found(pvmw);
 				return true;
 			}
@@ -236,7 +255,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 					return not_found(pvmw);
 				entry = pmd_to_swp_entry(pmde);
 				if (!is_migration_entry(entry) ||
-				    pfn_swap_entry_to_page(entry) != page)
+				    page_to_pfn(pfn_swap_entry_to_page(entry)) != pfn)
 					return not_found(pvmw);
 				return true;
 			}
@@ -249,7 +268,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 			 * cannot return prematurely, while zap_huge_pmd() has
 			 * cleared *pmd but not decremented compound_mapcount().
 			 */
-			if ((pvmw->flags & PVMW_SYNC) &&
+			if ((pvmw->flags & PVMW_SYNC) && page &&
 			    PageTransCompound(page)) {
 				spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
 
-- 
2.11.0


  parent reply	other threads:[~2022-01-21  7:57 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-21  7:55 [PATCH 1/5] mm: rmap: fix cache flush on THP pages Muchun Song
2022-01-21  7:55 ` [PATCH 2/5] dax: fix cache flush on PMD-mapped pages Muchun Song
2022-01-24  7:34   ` Christoph Hellwig
2022-01-21  7:55 ` Muchun Song [this message]
2022-01-24  7:36   ` [PATCH 3/5] mm: page_vma_mapped: support checking if a pfn is mapped into a vma Christoph Hellwig
2022-01-24  9:01     ` Muchun Song
2022-01-21  7:55 ` [PATCH 4/5] dax: fix missing writeprotect the pte entry Muchun Song
2022-01-24  7:41   ` Christoph Hellwig
2022-01-24  9:07     ` Muchun Song
2022-01-21  7:55 ` [PATCH 5/5] mm: remove range parameter from follow_invalidate_pte() Muchun Song
2022-01-21 18:05 ` [PATCH 1/5] mm: rmap: fix cache flush on THP pages Yang Shi
2022-01-24  7:34 ` Christoph Hellwig
2022-01-24  8:51   ` Muchun Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220121075515.79311-3-songmuchun@bytedance.com \
    --to=songmuchun@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nvdimm@lists.linux.dev \
    --cc=rcampbell@nvidia.com \
    --cc=shy828301@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    --cc=xiyuyang19@fudan.edu.cn \
    --cc=zwisler@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).