linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: Report bad PTEs in lookup_swap_cache()
@ 2018-02-23 20:03 Matthew Wilcox
  2018-02-24  0:49 ` Huang, Ying
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Matthew Wilcox @ 2018-02-23 20:03 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox, Huang Ying

From: Matthew Wilcox <mawilcox@microsoft.com>

If we have garbage in the PTE, we can call the radix tree code with a
NULL radix tree head which leads to an OOPS.  Detect the case where
we've found a PTE that refers to a non-existent swap device and report
the error correctly.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/swap.h | 10 ++++------
 mm/memory.c          |  4 +---
 mm/shmem.c           |  2 +-
 mm/swap_state.c      | 35 ++++++++++++++++++++++-------------
 4 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7b6a59f722a3..045edb2ca8d0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -415,9 +415,8 @@ extern void __delete_from_swap_cache(struct page *);
 extern void delete_from_swap_cache(struct page *);
 extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
-extern struct page *lookup_swap_cache(swp_entry_t entry,
-				      struct vm_area_struct *vma,
-				      unsigned long addr);
+extern struct page *lookup_swap_cache(swp_entry_t entry, bool vma_ra,
+				      struct vm_fault *vmf);
 extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr,
 			bool do_poll);
@@ -568,9 +567,8 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
 	return 0;
 }
 
-static inline struct page *lookup_swap_cache(swp_entry_t swp,
-					     struct vm_area_struct *vma,
-					     unsigned long addr)
+static inline struct page *lookup_swap_cache(swp_entry_t swp, bool vma_ra,
+						struct vm_fault *vmf)
 {
 	return NULL;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 5fcfc24904d1..1cfc4699db42 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2926,11 +2926,9 @@ int do_swap_page(struct vm_fault *vmf)
 		goto out;
 	}
 
-
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
 	if (!page) {
-		page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
-					 vmf->address);
+		page = lookup_swap_cache(entry, vma_readahead, vmf);
 		swapcache = page;
 	}
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 1907688b75ee..8976f05823ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1650,7 +1650,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 
 	if (swap.val) {
 		/* Look it up and read it in.. */
-		page = lookup_swap_cache(swap, NULL, 0);
+		page = lookup_swap_cache(swap, false, NULL);
 		if (!page) {
 			/* Or update major stats only when swapin succeeds?? */
 			if (fault_type) {
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 39ae7cfad90f..5a7755ecbb03 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -328,14 +328,22 @@ void free_pages_and_swap_cache(struct page **pages, int nr)
  * lock getting page table operations atomic even if we drop the page
  * lock before returning.
  */
-struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
-			       unsigned long addr)
+struct page *lookup_swap_cache(swp_entry_t entry, bool vma_ra,
+			struct vm_fault *vmf)
 {
 	struct page *page;
-	unsigned long ra_info;
-	int win, hits, readahead;
+	int readahead;
+	struct address_space *swapper_space = swap_address_space(entry);
 
-	page = find_get_page(swap_address_space(entry), swp_offset(entry));
+	if (!swapper_space) {
+		if (vmf)
+			pte_ERROR(vmf->orig_pte);
+		else
+			pr_err("Bad swp_entry: %lx\n", entry.val);
+		return NULL;
+	}
+
+	page = find_get_page(swapper_space, swp_offset(entry));
 
 	INC_CACHE_INFO(find_total);
 	if (page) {
@@ -343,18 +351,19 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
 		if (unlikely(PageTransCompound(page)))
 			return page;
 		readahead = TestClearPageReadahead(page);
-		if (vma) {
-			ra_info = GET_SWAP_RA_VAL(vma);
-			win = SWAP_RA_WIN(ra_info);
-			hits = SWAP_RA_HITS(ra_info);
+		if (vma_ra) {
+			unsigned long ra_info = GET_SWAP_RA_VAL(vmf->vma);
+			int win = SWAP_RA_WIN(ra_info);
+			int hits = SWAP_RA_HITS(ra_info);
+
 			if (readahead)
 				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
-			atomic_long_set(&vma->swap_readahead_info,
-					SWAP_RA_VAL(addr, win, hits));
+			atomic_long_set(&vmf->vma->swap_readahead_info,
+					SWAP_RA_VAL(vmf->address, win, hits));
 		}
 		if (readahead) {
 			count_vm_event(SWAP_RA_HIT);
-			if (!vma)
+			if (!vma_ra)
 				atomic_inc(&swapin_readahead_hits);
 		}
 	}
@@ -675,7 +684,7 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
 	entry = pte_to_swp_entry(vmf->orig_pte);
 	if ((unlikely(non_swap_entry(entry))))
 		return NULL;
-	page = lookup_swap_cache(entry, vma, faddr);
+	page = lookup_swap_cache(entry, true, vmf);
 	if (page)
 		return page;
 
-- 
2.16.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: Report bad PTEs in lookup_swap_cache()
  2018-02-23 20:03 [PATCH] mm: Report bad PTEs in lookup_swap_cache() Matthew Wilcox
@ 2018-02-24  0:49 ` Huang, Ying
  2018-02-25  4:43 ` kbuild test robot
  2018-02-25  6:27 ` kbuild test robot
  2 siblings, 0 replies; 4+ messages in thread
From: Huang, Ying @ 2018-02-24  0:49 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-mm, Matthew Wilcox

Matthew Wilcox <willy@infradead.org> writes:

> From: Matthew Wilcox <mawilcox@microsoft.com>
>
> If we have garbage in the PTE, we can call the radix tree code with a
> NULL radix tree head which leads to an OOPS.  Detect the case where
> we've found a PTE that refers to a non-existent swap device and report
> the error correctly.

There is a patch in -mm tree which addresses this problem at least
partially as follow.  Please take a look at it.

https://www.spinics.net/lists/linux-mm/msg145242.html

[snip]

> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 39ae7cfad90f..5a7755ecbb03 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -328,14 +328,22 @@ void free_pages_and_swap_cache(struct page **pages, int nr)
>   * lock getting page table operations atomic even if we drop the page
>   * lock before returning.
>   */
> -struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
> -			       unsigned long addr)
> +struct page *lookup_swap_cache(swp_entry_t entry, bool vma_ra,
> +			struct vm_fault *vmf)
>  {
>  	struct page *page;
> -	unsigned long ra_info;
> -	int win, hits, readahead;
> +	int readahead;
> +	struct address_space *swapper_space = swap_address_space(entry);
>  
> -	page = find_get_page(swap_address_space(entry), swp_offset(entry));
> +	if (!swapper_space) {
> +		if (vmf)
> +			pte_ERROR(vmf->orig_pte);
> +		else
> +			pr_err("Bad swp_entry: %lx\n", entry.val);

>From we get swap_entry from PTE to lookup_swap_cache(), nothing prevent
the swap device to be swapoff.  So, I think sometimes it is hard to
distinguish between garbage PTEs and PTEs which become deprecated
because of swapoff.

Best Regards,
Huang, Ying

> +		return NULL;
> +	}
> +
> +	page = find_get_page(swapper_space, swp_offset(entry));
>  
>  	INC_CACHE_INFO(find_total);
>  	if (page) {

[snip]

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: Report bad PTEs in lookup_swap_cache()
  2018-02-23 20:03 [PATCH] mm: Report bad PTEs in lookup_swap_cache() Matthew Wilcox
  2018-02-24  0:49 ` Huang, Ying
@ 2018-02-25  4:43 ` kbuild test robot
  2018-02-25  6:27 ` kbuild test robot
  2 siblings, 0 replies; 4+ messages in thread
From: kbuild test robot @ 2018-02-25  4:43 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: kbuild-all, linux-mm, Matthew Wilcox, Huang Ying

[-- Attachment #1: Type: text/plain, Size: 2879 bytes --]

Hi Matthew,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.16-rc2]
[cannot apply to mmotm/master next-20180223]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Matthew-Wilcox/mm-Report-bad-PTEs-in-lookup_swap_cache/20180225-080318
config: sparc64-allmodconfig (attached as .config)
compiler: sparc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=sparc64 

All errors (new ones prefixed by >>):

   mm/swap_state.c: In function 'lookup_swap_cache':
>> mm/swap_state.c:340:4: error: implicit declaration of function 'pte_ERROR'; did you mean 'pgd_ERROR'? [-Werror=implicit-function-declaration]
       pte_ERROR(vmf->orig_pte);
       ^~~~~~~~~
       pgd_ERROR
   cc1: some warnings being treated as errors

vim +340 mm/swap_state.c

   324	
   325	/*
   326	 * Lookup a swap entry in the swap cache. A found page will be returned
   327	 * unlocked and with its refcount incremented - we rely on the kernel
   328	 * lock getting page table operations atomic even if we drop the page
   329	 * lock before returning.
   330	 */
   331	struct page *lookup_swap_cache(swp_entry_t entry, bool vma_ra,
   332				struct vm_fault *vmf)
   333	{
   334		struct page *page;
   335		int readahead;
   336		struct address_space *swapper_space = swap_address_space(entry);
   337	
   338		if (!swapper_space) {
   339			if (vmf)
 > 340				pte_ERROR(vmf->orig_pte);
   341			else
   342				pr_err("Bad swp_entry: %lx\n", entry.val);
   343			return NULL;
   344		}
   345	
   346		page = find_get_page(swapper_space, swp_offset(entry));
   347	
   348		INC_CACHE_INFO(find_total);
   349		if (page) {
   350			INC_CACHE_INFO(find_success);
   351			if (unlikely(PageTransCompound(page)))
   352				return page;
   353			readahead = TestClearPageReadahead(page);
   354			if (vma_ra) {
   355				unsigned long ra_info = GET_SWAP_RA_VAL(vmf->vma);
   356				int win = SWAP_RA_WIN(ra_info);
   357				int hits = SWAP_RA_HITS(ra_info);
   358	
   359				if (readahead)
   360					hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
   361				atomic_long_set(&vmf->vma->swap_readahead_info,
   362						SWAP_RA_VAL(vmf->address, win, hits));
   363			}
   364			if (readahead) {
   365				count_vm_event(SWAP_RA_HIT);
   366				if (!vma_ra)
   367					atomic_inc(&swapin_readahead_hits);
   368			}
   369		}
   370		return page;
   371	}
   372	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 53401 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: Report bad PTEs in lookup_swap_cache()
  2018-02-23 20:03 [PATCH] mm: Report bad PTEs in lookup_swap_cache() Matthew Wilcox
  2018-02-24  0:49 ` Huang, Ying
  2018-02-25  4:43 ` kbuild test robot
@ 2018-02-25  6:27 ` kbuild test robot
  2 siblings, 0 replies; 4+ messages in thread
From: kbuild test robot @ 2018-02-25  6:27 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: kbuild-all, linux-mm, Matthew Wilcox, Huang Ying

[-- Attachment #1: Type: text/plain, Size: 3232 bytes --]

Hi Matthew,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v4.16-rc2]
[cannot apply to mmotm/master next-20180223]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Matthew-Wilcox/mm-Report-bad-PTEs-in-lookup_swap_cache/20180225-080318
config: mips-64r6el_defconfig (attached as .config)
compiler: mips64el-linux-gnuabi64-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=mips 

All warnings (new ones prefixed by >>):

   In file included from arch/mips/include/asm/pgtable.h:17:0,
                    from include/linux/memremap.h:8,
                    from include/linux/mm.h:27,
                    from mm/swap_state.c:10:
   mm/swap_state.c: In function 'lookup_swap_cache':
>> arch/mips/include/asm/pgtable-64.h:160:9: warning: format '%lx' expects argument of type 'long unsigned int', but argument 4 has type 'long long unsigned int' [-Wformat=]
     printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
            ^
>> mm/swap_state.c:340:4: note: in expansion of macro 'pte_ERROR'
       pte_ERROR(vmf->orig_pte);
       ^~~~~~~~~

vim +/pte_ERROR +340 mm/swap_state.c

   324	
   325	/*
   326	 * Lookup a swap entry in the swap cache. A found page will be returned
   327	 * unlocked and with its refcount incremented - we rely on the kernel
   328	 * lock getting page table operations atomic even if we drop the page
   329	 * lock before returning.
   330	 */
   331	struct page *lookup_swap_cache(swp_entry_t entry, bool vma_ra,
   332				struct vm_fault *vmf)
   333	{
   334		struct page *page;
   335		int readahead;
   336		struct address_space *swapper_space = swap_address_space(entry);
   337	
   338		if (!swapper_space) {
   339			if (vmf)
 > 340				pte_ERROR(vmf->orig_pte);
   341			else
   342				pr_err("Bad swp_entry: %lx\n", entry.val);
   343			return NULL;
   344		}
   345	
   346		page = find_get_page(swapper_space, swp_offset(entry));
   347	
   348		INC_CACHE_INFO(find_total);
   349		if (page) {
   350			INC_CACHE_INFO(find_success);
   351			if (unlikely(PageTransCompound(page)))
   352				return page;
   353			readahead = TestClearPageReadahead(page);
   354			if (vma_ra) {
   355				unsigned long ra_info = GET_SWAP_RA_VAL(vmf->vma);
   356				int win = SWAP_RA_WIN(ra_info);
   357				int hits = SWAP_RA_HITS(ra_info);
   358	
   359				if (readahead)
   360					hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
   361				atomic_long_set(&vmf->vma->swap_readahead_info,
   362						SWAP_RA_VAL(vmf->address, win, hits));
   363			}
   364			if (readahead) {
   365				count_vm_event(SWAP_RA_HIT);
   366				if (!vma_ra)
   367					atomic_inc(&swapin_readahead_hits);
   368			}
   369		}
   370		return page;
   371	}
   372	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 18989 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-02-25  6:28 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-02-23 20:03 [PATCH] mm: Report bad PTEs in lookup_swap_cache() Matthew Wilcox
2018-02-24  0:49 ` Huang, Ying
2018-02-25  4:43 ` kbuild test robot
2018-02-25  6:27 ` kbuild test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).