linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	"Huang, Ying" <ying.huang@intel.com>,
	David Hildenbrand <david@redhat.com>,
	Hugh Dickins <hughd@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Matthew Wilcox <willy@infradead.org>,
	Michal Hocko <mhocko@suse.com>,
	linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 18/24] mm/swap: introduce a helper non fault swapin
Date: Mon, 20 Nov 2023 03:47:34 +0800	[thread overview]
Message-ID: <20231119194740.94101-19-ryncsn@gmail.com> (raw)
In-Reply-To: <20231119194740.94101-1-ryncsn@gmail.com>

From: Kairui Song <kasong@tencent.com>

There are two places where swapin is not direct caused by page fault:
shmem swapin is invoked through shmem mapping, swapoff cause swapin by
walking the page table. They used to construct a pseudo vmfault struct
for swapin function.

Shmem has dropped the pseudo vmfault recently in commit ddc1a5cbc05d
("mempolicy: alloc_pages_mpol() for NUMA policy without vma"). Swapoff
path is still using a pseudo vmfault.

Introduce a helper for them both, this help save stack usage for swapoff
path, and help apply a unified swapin cache and readahead policy check.

Also prepare for follow up commits.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/shmem.c      | 51 ++++++++++++++++---------------------------------
 mm/swap.h       | 11 +++++++++++
 mm/swap_state.c | 38 ++++++++++++++++++++++++++++++++++++
 mm/swapfile.c   | 23 +++++++++++-----------
 4 files changed, 76 insertions(+), 47 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index f9ce4067c742..81d129aa66d1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1565,22 +1565,6 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
 static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info,
 			pgoff_t index, unsigned int order, pgoff_t *ilx);
 
-static struct folio *shmem_swapin_cluster(swp_entry_t swap, gfp_t gfp,
-			struct shmem_inode_info *info, pgoff_t index)
-{
-	struct mempolicy *mpol;
-	pgoff_t ilx;
-	struct page *page;
-
-	mpol = shmem_get_pgoff_policy(info, index, 0, &ilx);
-	page = swap_cluster_readahead(swap, gfp, mpol, ilx);
-	mpol_cond_put(mpol);
-
-	if (!page)
-		return NULL;
-	return page_folio(page);
-}
-
 /*
  * Make sure huge_gfp is always more limited than limit_gfp.
  * Some of the flags set permissions, while others set limitations.
@@ -1854,9 +1838,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	struct swap_info_struct *si;
+	enum swap_cache_result result;
 	struct folio *folio = NULL;
+	struct mempolicy *mpol;
+	struct page *page;
 	swp_entry_t swap;
+	pgoff_t ilx;
 	int error;
 
 	VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
@@ -1866,34 +1853,30 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	if (is_poisoned_swp_entry(swap))
 		return -EIO;
 
-	si = get_swap_device(swap);
-	if (!si) {
+	mpol = shmem_get_pgoff_policy(info, index, 0, &ilx);
+	page = swapin_page_non_fault(swap, gfp, mpol, ilx, fault_mm, &result);
+	mpol_cond_put(mpol);
+
+	if (PTR_ERR(page) == -EBUSY) {
 		if (!shmem_confirm_swap(mapping, index, swap))
 			return -EEXIST;
 		else
 			return -EINVAL;
-	}
-
-	/* Look it up and read it in.. */
-	folio = swap_cache_get_folio(swap, NULL, NULL);
-	if (!folio) {
-		/* Or update major stats only when swapin succeeds?? */
-		if (fault_type) {
+	} else if (!page) {
+		error = -ENOMEM;
+		goto failed;
+	} else {
+		folio = page_folio(page);
+		if (fault_type && result != SWAP_CACHE_HIT) {
 			*fault_type |= VM_FAULT_MAJOR;
 			count_vm_event(PGMAJFAULT);
 			count_memcg_event_mm(fault_mm, PGMAJFAULT);
 		}
-		/* Here we actually start the io */
-		folio = shmem_swapin_cluster(swap, gfp, info, index);
-		if (!folio) {
-			error = -ENOMEM;
-			goto failed;
-		}
 	}
 
 	/* We have to do this with folio locked to prevent races */
 	folio_lock(folio);
-	if (!folio_test_swapcache(folio) ||
+	if ((result != SWAP_CACHE_BYPASS && !folio_test_swapcache(folio)) ||
 	    folio->swap.val != swap.val ||
 	    !shmem_confirm_swap(mapping, index, swap)) {
 		error = -EEXIST;
@@ -1930,7 +1913,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	delete_from_swap_cache(folio);
 	folio_mark_dirty(folio);
 	swap_free(swap);
-	put_swap_device(si);
 
 	*foliop = folio;
 	return 0;
@@ -1944,7 +1926,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 		folio_unlock(folio);
 		folio_put(folio);
 	}
-	put_swap_device(si);
 
 	return error;
 }
diff --git a/mm/swap.h b/mm/swap.h
index da9deb5ba37d..b073c29c9790 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -62,6 +62,10 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
 				    struct mempolicy *mpol, pgoff_t ilx);
 struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
 			      struct vm_fault *vmf, enum swap_cache_result *result);
+struct page *swapin_page_non_fault(swp_entry_t entry, gfp_t gfp_mask,
+				   struct mempolicy *mpol, pgoff_t ilx,
+				   struct mm_struct *mm,
+				   enum swap_cache_result *result);
 
 static inline unsigned int folio_swap_flags(struct folio *folio)
 {
@@ -103,6 +107,13 @@ static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
 	return NULL;
 }
 
+static inline struct page *swapin_page_non_fault(swp_entry_t entry, gfp_t gfp_mask,
+		struct mempolicy *mpol, pgoff_t ilx, struct mm_struct *mm,
+		enum swap_cache_result *result)
+{
+	return NULL;
+}
+
 static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
 {
 	return 0;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ff8a166603d0..eef66757c615 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -956,6 +956,44 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	return page;
 }
 
+struct page *swapin_page_non_fault(swp_entry_t entry, gfp_t gfp_mask,
+				   struct mempolicy *mpol, pgoff_t ilx,
+				   struct mm_struct *mm, enum swap_cache_result *result)
+{
+	enum swap_cache_result cache_result;
+	struct swap_info_struct *si;
+	void *shadow = NULL;
+	struct folio *folio;
+	struct page *page;
+
+	/* Prevent swapoff from happening to us */
+	si = get_swap_device(entry);
+	if (unlikely(!si))
+		return ERR_PTR(-EBUSY);
+
+	folio = swap_cache_get_folio(entry, NULL, &shadow);
+	if (folio) {
+		page = folio_file_page(folio, swp_offset(entry));
+		cache_result = SWAP_CACHE_HIT;
+		goto done;
+	}
+
+	if (swap_use_no_readahead(si, swp_offset(entry))) {
+		page = swapin_no_readahead(entry, gfp_mask, mpol, ilx, mm);
+		if (shadow)
+			workingset_refault(page_folio(page), shadow);
+		cache_result = SWAP_CACHE_BYPASS;
+	} else {
+		page = swap_cluster_readahead(entry, gfp_mask, mpol, ilx);
+		cache_result = SWAP_CACHE_MISS;
+	}
+done:
+	put_swap_device(si);
+	if (result)
+		*result = cache_result;
+	return page;
+}
+
 #ifdef CONFIG_SYSFS
 static ssize_t vma_ra_enabled_show(struct kobject *kobj,
 				     struct kobj_attribute *attr, char *buf)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 925ad92486a4..f8c5096fe0f0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1822,20 +1822,15 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 
 	si = swap_info[type];
 	do {
+		int ret;
+		pte_t ptent;
+		pgoff_t ilx;
+		swp_entry_t entry;
 		struct page *page;
 		unsigned long offset;
+		struct mempolicy *mpol;
 		unsigned char swp_count;
 		struct folio *folio = NULL;
-		swp_entry_t entry;
-		int ret;
-		pte_t ptent;
-
-		struct vm_fault vmf = {
-			.vma = vma,
-			.address = addr,
-			.real_address = addr,
-			.pmd = pmd,
-		};
 
 		if (!pte++) {
 			pte = pte_offset_map(pmd, addr);
@@ -1855,8 +1850,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		offset = swp_offset(entry);
 		pte_unmap(pte);
 		pte = NULL;
-		page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
-					&vmf, NULL);
+
+		mpol = get_vma_policy(vma, addr, 0, &ilx);
+		page = swapin_page_non_fault(entry, GFP_HIGHUSER_MOVABLE,
+					     mpol, ilx, vma->vm_mm, NULL);
+		mpol_cond_put(mpol);
+
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 		else if (page)
-- 
2.42.0



  parent reply	other threads:[~2023-11-19 19:49 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-19 19:47 [PATCH 00/24] Swapin path refactor for optimization and bugfix Kairui Song
2023-11-19 19:47 ` [PATCH 01/24] mm/swap: fix a potential undefined behavior issue Kairui Song
2023-11-19 20:55   ` Matthew Wilcox
2023-11-20  3:35     ` Chris Li
2023-11-20 11:14       ` Kairui Song
2023-11-20 17:34         ` Chris Li
2023-11-19 19:47 ` [PATCH 02/24] mm/swapfile.c: add back some comment Kairui Song
2023-11-19 19:47 ` [PATCH 03/24] mm/swap: move no readahead swapin code to a stand alone helper Kairui Song
2023-11-19 21:00   ` Matthew Wilcox
2023-11-20 11:14     ` Kairui Song
2023-11-20 14:55   ` Dan Carpenter
2023-11-21  5:34   ` Chris Li
2023-11-22 17:33     ` Kairui Song
2023-11-19 19:47 ` [PATCH 04/24] mm/swap: avoid setting page lock bit and doing extra unlock check Kairui Song
2023-11-20  4:17   ` Chris Li
2023-11-20 11:15     ` Kairui Song
2023-11-20 17:44       ` Chris Li
2023-11-22 17:32         ` Kairui Song
2023-11-22 20:57           ` Chris Li
2023-11-24  8:14             ` Kairui Song
2023-11-24  8:37               ` Christopher Li
2023-11-19 19:47 ` [PATCH 05/24] mm/swap: move readahead policy checking into swapin_readahead Kairui Song
2023-11-21  6:15   ` Chris Li
2023-11-21  6:35     ` Kairui Song
2023-11-21  7:41       ` Chris Li
2023-11-21  8:32         ` Kairui Song
2023-11-21 15:24           ` Chris Li
2023-11-19 19:47 ` [PATCH 06/24] swap: rework swapin_no_readahead arguments Kairui Song
2023-11-20  0:20   ` kernel test robot
2023-11-21  6:44   ` Chris Li
2023-11-23 10:51     ` Kairui Song
2023-11-19 19:47 ` [PATCH 07/24] mm/swap: move swap_count to header to be shared Kairui Song
2023-11-21  6:51   ` Chris Li
2023-11-21  7:03     ` Kairui Song
2023-11-19 19:47 ` [PATCH 08/24] mm/swap: check readahead policy per entry Kairui Song
2023-11-20  6:04   ` Huang, Ying
2023-11-20 11:17     ` Kairui Song
2023-11-21  1:10       ` Huang, Ying
2023-11-21  5:20         ` Chris Li
2023-11-21  5:13       ` Chris Li
2023-11-21  7:54   ` Chris Li
2023-11-23 10:52     ` Kairui Song
2023-11-19 19:47 ` [PATCH 09/24] mm/swap: inline __swap_count Kairui Song
2023-11-20  7:41   ` Huang, Ying
2023-11-21  8:02     ` Chris Li
2023-11-19 19:47 ` [PATCH 10/24] mm/swap: remove nr_rotate_swap and related code Kairui Song
2023-11-21 15:45   ` Chris Li
2023-11-19 19:47 ` [PATCH 11/24] mm/swap: also handle swapcache lookup in swapin_readahead Kairui Song
2023-11-20  0:47   ` kernel test robot
2023-11-21 16:06   ` Chris Li
2023-11-24  8:42     ` Kairui Song
2023-11-24  9:10       ` Chris Li
2023-11-19 19:47 ` [PATCH 12/24] mm/swap: simplify arguments for swap_cache_get_folio Kairui Song
2023-11-21 16:36   ` Chris Li
2023-11-19 19:47 ` [PATCH 13/24] swap: simplify swap_cache_get_folio Kairui Song
2023-11-21 16:50   ` Chris Li
2023-11-19 19:47 ` [PATCH 14/24] mm/swap: do shadow lookup as well when doing swap cache lookup Kairui Song
2023-11-21 16:55   ` Chris Li
2023-11-19 19:47 ` [PATCH 15/24] mm/swap: avoid an duplicated swap cache lookup for SYNCHRONOUS_IO device Kairui Song
2023-11-21 17:15   ` Chris Li
2023-11-22 18:08     ` Kairui Song
2023-11-19 19:47 ` [PATCH 16/24] mm/swap: reduce scope of get_swap_device in swapin path Kairui Song
2023-11-19 21:12   ` Matthew Wilcox
2023-11-20 11:14     ` Kairui Song
2023-11-21 17:25   ` Chris Li
2023-11-22  0:36   ` Huang, Ying
2023-11-23 11:13     ` Kairui Song
2023-11-24  0:40       ` Huang, Ying
2023-11-19 19:47 ` [PATCH 17/24] mm/swap: fix false error when swapoff race with swapin Kairui Song
2023-11-19 19:47 ` Kairui Song [this message]
2023-11-20  1:07   ` [PATCH 18/24] mm/swap: introduce a helper non fault swapin kernel test robot
2023-11-22  4:40   ` Chris Li
2023-11-28 11:22     ` Kairui Song
2023-12-13  2:22       ` Chris Li
2023-11-19 19:47 ` [PATCH 19/24] shmem, swap: refactor error check on OOM or race Kairui Song
2023-11-20  7:04   ` Chris Li
2023-11-20 11:17     ` Kairui Song
2023-11-19 19:47 ` [PATCH 20/24] swap: simplify and make swap_find_cache static Kairui Song
2023-11-22  5:01   ` Chris Li
2023-11-19 19:47 ` [PATCH 21/24] swap: make swapin_readahead result checking argument mandatory Kairui Song
2023-11-22  5:15   ` Chris Li
2023-11-24  8:14     ` Kairui Song
2023-11-19 19:47 ` [PATCH 22/24] swap: make swap_cluster_readahead static Kairui Song
2023-11-22  5:20   ` Chris Li
2023-11-19 19:47 ` [PATCH 23/24] swap: fix multiple swap leak when after cgroup migrate Kairui Song
2023-11-20  7:35   ` Huang, Ying
2023-11-20 11:17     ` Kairui Song
2023-11-22  5:34       ` Chris Li
2023-11-19 19:47 ` [PATCH 24/24] mm/swap: change swapin_readahead to swapin_page_fault Kairui Song
2023-11-20 19:09 ` [PATCH 00/24] Swapin path refactor for optimization and bugfix Yosry Ahmed
2023-11-20 20:22   ` Chris Li
2023-11-22  6:46     ` Kairui Song
2023-11-22  6:43   ` Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231119194740.94101-19-ryncsn@gmail.com \
    --to=ryncsn@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).