[PATCH v3 5/7] mm/swap: avoid a duplicated swap cache lookup for SWP_SYNCHRONOUS_IO

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Chris Li <chrisl@kernel.org>,
	"Huang, Ying" <ying.huang@intel.com>,
	Hugh Dickins <hughd@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Matthew Wilcox <willy@infradead.org>,
	Michal Hocko <mhocko@suse.com>,
	Yosry Ahmed <yosryahmed@google.com>,
	David Hildenbrand <david@redhat.com>,
	linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH v3 5/7] mm/swap: avoid a duplicated swap cache lookup for SWP_SYNCHRONOUS_IO
Date: Tue, 30 Jan 2024 01:54:20 +0800	[thread overview]
Message-ID: <20240129175423.1987-6-ryncsn@gmail.com> (raw)
In-Reply-To: <20240129175423.1987-1-ryncsn@gmail.com>

From: Kairui Song <kasong@tencent.com>

When a xa_value is returned by the cache lookup, keep it to be used
later for workingset refault check instead of doing the looking up again
in swapin_no_readahead.

Shadow look up and workingset check is skipped for swapoff to reduce
overhead, workingset checking for anon pages upon swapoff is not
helpful, simply consider all pages as inactive make more sense since
swapoff doesn't mean pages is being accessed.

After this commit, swappin is about 4% faster for ZRAM, micro benchmark
result which use madvise to swap out 10G zero-filled data to ZRAM then
read them in:

Before: 11143285 us
After:  10692644 us (+4.1%)

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/memory.c     |  5 +++--
 mm/shmem.c      |  2 +-
 mm/swap.h       | 11 ++++++-----
 mm/swap_state.c | 23 +++++++++++++----------
 mm/swapfile.c   |  4 ++--
 5 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 8711f8a07039..349946899f8d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3800,6 +3800,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	struct swap_info_struct *si = NULL;
 	rmap_t rmap_flags = RMAP_NONE;
 	bool exclusive = false;
+	void *shadow = NULL;
 	swp_entry_t entry;
 	pte_t pte;
 	vm_fault_t ret = 0;
@@ -3858,14 +3859,14 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	if (unlikely(!si))
 		goto out;
 
-	folio = swap_cache_get_folio(entry, vma, vmf->address);
+	folio = swap_cache_get_folio(entry, vma, vmf->address, &shadow);
 	if (folio)
 		page = folio_file_page(folio, swp_offset(entry));
 	swapcache = folio;
 
 	if (!folio) {
 		folio = swapin_entry(entry, GFP_HIGHUSER_MOVABLE,
-				     vmf, &swapcache);
+				     vmf, &swapcache, shadow);
 		if (!folio) {
 			/*
 			 * Back out if somebody else faulted in this pte
diff --git a/mm/shmem.c b/mm/shmem.c
index d7c84ff62186..698a31bf7baa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1873,7 +1873,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	}
 
 	/* Look it up and read it in.. */
-	folio = swap_cache_get_folio(swap, NULL, 0);
+	folio = swap_cache_get_folio(swap, NULL, 0, NULL);
 	if (!folio) {
 		/* Or update major stats only when swapin succeeds?? */
 		if (fault_type) {
diff --git a/mm/swap.h b/mm/swap.h
index 8f8185d3865c..ca9cb472a263 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -42,7 +42,8 @@ void delete_from_swap_cache(struct folio *folio);
 void clear_shadow_from_swap_cache(int type, unsigned long begin,
 				  unsigned long end);
 struct folio *swap_cache_get_folio(swp_entry_t entry,
-		struct vm_area_struct *vma, unsigned long addr);
+		struct vm_area_struct *vma, unsigned long addr,
+		void **shadowp);
 struct folio *filemap_get_incore_folio(struct address_space *mapping,
 		pgoff_t index);
 
@@ -54,8 +55,8 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags,
 		bool skip_if_exists);
 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
 		struct mempolicy *mpol, pgoff_t ilx);
-struct folio *swapin_entry(swp_entry_t entry, gfp_t flag,
-			   struct vm_fault *vmf, struct folio **swapcached);
+struct folio *swapin_entry(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf,
+		struct folio **swapcached, void *shadow);
 
 static inline unsigned int folio_swap_flags(struct folio *folio)
 {
@@ -87,7 +88,7 @@ static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
 }
 
 static inline struct folio *swapin_entry(swp_entry_t swp, gfp_t gfp_mask,
-			struct vm_fault *vmf, struct folio **swapcached)
+			struct vm_fault *vmf, struct folio **swapcached, void *shadow)
 {
 	return NULL;
 }
@@ -98,7 +99,7 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
 }
 
 static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
-		struct vm_area_struct *vma, unsigned long addr)
+		struct vm_area_struct *vma, unsigned long addr, void **shadowp)
 {
 	return NULL;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5e06b2e140d4..e41a137a6123 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -330,12 +330,18 @@ static inline bool swap_use_vma_readahead(void)
  * Caller must lock the swap device or hold a reference to keep it valid.
  */
 struct folio *swap_cache_get_folio(swp_entry_t entry,
-		struct vm_area_struct *vma, unsigned long addr)
+		struct vm_area_struct *vma, unsigned long addr, void **shadowp)
 {
 	struct folio *folio;
 
-	folio = filemap_get_folio(swap_address_space(entry), swp_offset(entry));
-	if (!IS_ERR(folio)) {
+	folio = filemap_get_entry(swap_address_space(entry), swp_offset(entry));
+	if (xa_is_value(folio)) {
+		if (shadowp)
+			*shadowp = folio;
+		return NULL;
+	}
+
+	if (folio) {
 		bool vma_ra = swap_use_vma_readahead();
 		bool readahead;
 
@@ -365,8 +371,6 @@ struct folio *swap_cache_get_folio(swp_entry_t entry,
 			if (!vma || !vma_ra)
 				atomic_inc(&swapin_readahead_hits);
 		}
-	} else {
-		folio = NULL;
 	}
 
 	return folio;
@@ -866,16 +870,16 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
  * @entry: swap entry of this memory
  * @gfp_mask: memory allocation flags
  * @vmf: fault information
+ * @shadow: workingset shadow corresponding to entry
  *
  * Returns the struct folio for entry and addr after the swap entry is read
  * in.
  */
 static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
-				  struct vm_fault *vmf)
+				  struct vm_fault *vmf, void *shadow)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct folio *folio;
-	void *shadow = NULL;
 
 	/* skip swapcache */
 	folio = vma_alloc_folio(gfp_mask, 0,
@@ -892,7 +896,6 @@ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
 		}
 		mem_cgroup_swapin_uncharge_swap(entry);
 
-		shadow = get_shadow_from_swap_cache(entry);
 		if (shadow)
 			workingset_refault(folio, shadow);
 
@@ -922,7 +925,7 @@ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
  * or skip the readahead(ie, ramdisk based swap device).
  */
 struct folio *swapin_entry(swp_entry_t entry, gfp_t gfp_mask,
-			   struct vm_fault *vmf, struct folio **swapcache)
+			   struct vm_fault *vmf, struct folio **swapcache, void *shadow)
 {
 	struct mempolicy *mpol;
 	struct folio *folio;
@@ -930,7 +933,7 @@ struct folio *swapin_entry(swp_entry_t entry, gfp_t gfp_mask,
 
 	if (data_race(swp_swap_info(entry)->flags & SWP_SYNCHRONOUS_IO) &&
 	    __swap_count(entry) == 1) {
-		folio = swapin_direct(entry, gfp_mask, vmf);
+		folio = swapin_direct(entry, gfp_mask, vmf, shadow);
 	} else {
 		mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx);
 		if (swap_use_vma_readahead())
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1cf7e72e19e3..aac26f5a6cec 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1865,7 +1865,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		pte_unmap(pte);
 		pte = NULL;
 
-		folio = swap_cache_get_folio(entry, vma, addr);
+		folio = swap_cache_get_folio(entry, vma, addr, NULL);
 		if (!folio) {
 			struct vm_fault vmf = {
 				.vma = vma,
@@ -1875,7 +1875,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 			};
 
 			folio = swapin_entry(entry, GFP_HIGHUSER_MOVABLE,
-					    &vmf, NULL);
+					    &vmf, NULL, NULL);
 		}
 		if (!folio) {
 			/*
-- 
2.43.0

next prev parent reply	other threads:[~2024-01-29 17:55 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-29 17:54 [PATCH v3 0/7] swapin refactor for optimization and unified readahead Kairui Song
2024-01-29 17:54 ` [PATCH v3 1/7] mm/swapfile.c: add back some comment Kairui Song
2024-01-29 17:54 ` [PATCH v3 2/7] mm/swap: move no readahead swapin code to a stand-alone helper Kairui Song
2024-01-30  5:38   ` Huang, Ying
2024-01-30  5:55     ` Kairui Song
2024-01-29 17:54 ` [PATCH v3 3/7] mm/swap: always account swapped in page into current memcg Kairui Song
2024-01-30  6:12   ` Huang, Ying
2024-01-30  7:01     ` Kairui Song
2024-01-30  7:03       ` Kairui Song
2024-01-29 17:54 ` [PATCH v3 4/7] mm/swap: introduce swapin_entry for unified readahead policy Kairui Song
2024-01-30  6:29   ` Huang, Ying
2024-01-29 17:54 ` Kairui Song [this message]
2024-01-30  6:51   ` [PATCH v3 5/7] mm/swap: avoid a duplicated swap cache lookup for SWP_SYNCHRONOUS_IO Huang, Ying
2024-01-29 17:54 ` [PATCH v3 6/7] mm/swap, shmem: use unified swapin helper for shmem Kairui Song
2024-01-31  2:51   ` Whether is the race for SWP_SYNCHRONOUS_IO possible? (was Re: [PATCH v3 6/7] mm/swap, shmem: use unified swapin helper for shmem) Huang, Ying
2024-01-31  3:58     ` Kairui Song
2024-01-31 23:45       ` Chris Li
2024-02-01  0:52         ` Huang, Ying
2024-01-31 23:38     ` Chris Li
2024-01-29 17:54 ` [PATCH v3 7/7] mm/swap: refactor swap_cache_get_folio Kairui Song

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8711f8a0703 dfblob:349946899f8 dfblob:d7c84ff6218
dfblob:698a31bf7ba dfblob:8f8185d3865 dfblob:ca9cb472a26
dfblob:5e06b2e140d dfblob:e41a137a612 dfblob:1cf7e72e19e
dfblob:aac26f5a6ce )
 OR (
bs:"[PATCH v3 5/7] mm/swap: avoid a duplicated swap cache lookup for SWP_SYNCHRONOUS_IO" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240129175423.1987-6-ryncsn@gmail.com \
    --to=ryncsn@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).