linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Minchan Kim <minchan@kernel.org>
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org,
	Michael Kerrisk <mtk.manpages@gmail.com>,
	Arun Sharma <asharma@fb.com>,
	John Stultz <john.stultz@linaro.org>, Mel Gorman <mel@csn.ul.ie>,
	Hugh Dickins <hughd@google.com>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Rik van Riel <riel@redhat.com>, Neil Brown <neilb@suse.de>,
	Mike Hommey <mh@glandium.org>, Taras Glek <tglek@mozilla.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Jason Evans <je@fb.com>,
	sanjay@google.com, Paul Turner <pjt@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michel Lespinasse <walken@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Minchan Kim <minchan@kernel.org>
Subject: [RFC v7 06/11] send SIGBUS when user try to access purged page
Date: Tue, 12 Mar 2013 16:38:30 +0900	[thread overview]
Message-ID: <1363073915-25000-7-git-send-email-minchan@kernel.org> (raw)
In-Reply-To: <1363073915-25000-1-git-send-email-minchan@kernel.org>

By vrange(2) semantic, user should see SIGBUG if he try to access
purged page without vrange(...VRANGE_NOVOLATILE).

This patch implements it.

I reused PSE bit for quick prototype without enough considering
so need time to see what's empty bit and I am surely missing
many places to handle vrange pte bit. I should investigate all of
pte handling places, especially pte_none case. TODO

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 arch/x86/include/asm/pgtable_types.h |  2 ++
 include/asm-generic/pgtable.h        | 11 +++++++++++
 include/linux/vrange.h               |  2 ++
 mm/memory.c                          | 23 +++++++++++++++++++++--
 mm/vrange.c                          | 26 ++++++++++++++++++++++++--
 5 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 567b5d0..8c5163f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -64,6 +64,8 @@
 #define _PAGE_FILE	(_AT(pteval_t, 1) << _PAGE_BIT_FILE)
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
+#define _PAGE_VRANGE	_PAGE_BIT_PSE
+
 /*
  * _PAGE_NUMA indicates that this page will trigger a numa hinting
  * minor page fault to gather numa placement statistics (see
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index bfd8768..1486d42 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -469,6 +469,17 @@ static inline unsigned long my_zero_pfn(unsigned long addr)
 
 #ifdef CONFIG_MMU
 
+static inline pte_t pte_mkvrange(pte_t pte)
+{
+	pte = pte_set_flags(pte, _PAGE_VRANGE);
+	return pte_clear_flags(pte, _PAGE_PRESENT);
+}
+
+static inline int pte_vrange(pte_t pte)
+{
+	return ((pte_flags(pte) | _PAGE_PRESENT) == _PAGE_VRANGE);
+}
+
 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
 static inline int pmd_trans_huge(pmd_t pmd)
 {
diff --git a/include/linux/vrange.h b/include/linux/vrange.h
index eb3f941..24ed4c1 100644
--- a/include/linux/vrange.h
+++ b/include/linux/vrange.h
@@ -41,6 +41,8 @@ int discard_vpage(struct page *page);
 bool vrange_address(struct mm_struct *mm, unsigned long start,
 			unsigned long end);
 
+extern bool is_purged_vrange(struct mm_struct *mm, unsigned long address);
+
 #else
 
 static inline void vrange_init(void) {};
diff --git a/mm/memory.c b/mm/memory.c
index 494526a..cc369ab 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -59,6 +59,7 @@
 #include <linux/gfp.h>
 #include <linux/migrate.h>
 #include <linux/string.h>
+#include <linux/vrange.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -840,7 +841,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
 	/* pte contains position in swap or file, so copy. */
 	if (unlikely(!pte_present(pte))) {
-		if (!pte_file(pte)) {
+		if (!pte_file(pte) && !pte_vrange(pte)) {
 			swp_entry_t entry = pte_to_swp_entry(pte);
 
 			if (swap_duplicate(entry) < 0)
@@ -1180,7 +1181,7 @@ again:
 		if (pte_file(ptent)) {
 			if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
 				print_bad_pte(vma, addr, ptent, NULL);
-		} else {
+		} else if (!pte_vrange(ptent)) {
 			swp_entry_t entry = pte_to_swp_entry(ptent);
 
 			if (!non_swap_entry(entry))
@@ -3663,9 +3664,27 @@ int handle_pte_fault(struct mm_struct *mm,
 					return do_linear_fault(mm, vma, address,
 						pte, pmd, flags, entry);
 			}
+anon:
 			return do_anonymous_page(mm, vma, address,
 						 pte, pmd, flags);
 		}
+
+		if (unlikely(pte_vrange(entry))) {
+			if (!is_purged_vrange(mm, address)) {
+				/* zap pte */
+				ptl = pte_lockptr(mm, pmd);
+				spin_lock(ptl);
+				if (unlikely(!pte_same(*pte, entry)))
+					goto unlock;
+				flush_cache_page(vma, address, pte_pfn(*pte));
+				ptep_clear_flush(vma, address, pte);
+				pte_unmap_unlock(pte, ptl);
+				goto anon;
+			}
+
+			return VM_FAULT_SIGBUS;
+		}
+
 		if (pte_file(entry))
 			return do_nonlinear_fault(mm, vma, address,
 					pte, pmd, flags, entry);
diff --git a/mm/vrange.c b/mm/vrange.c
index 78aa252..89fcae4 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -343,7 +343,9 @@ int try_to_discard_one(struct page *page, struct vm_area_struct *vma,
 
 	present = pte_present(*pte);
 	flush_cache_page(vma, address, page_to_pfn(page));
-	pteval = ptep_clear_flush(vma, address, pte);
+
+	ptep_clear_flush(vma, address, pte);
+	pteval = pte_mkvrange(*pte);
 
 	update_hiwater_rss(mm);
 	dec_mm_counter(mm, MM_ANONPAGES);
@@ -357,10 +359,12 @@ int try_to_discard_one(struct page *page, struct vm_area_struct *vma,
 			BUG_ON(1);
 	}
 
+	set_pte_at(mm, address, pte, pteval);
+	__vrange_purge(mm, address, address + PAGE_SIZE -1);
 	pte_unmap_unlock(pte, ptl);
 	mmu_notifier_invalidate_page(mm, address);
+	vrange_unlock(mm);
 	ret = 1;
-	__vrange_purge(mm, address, address + PAGE_SIZE -1);
 out:
 	return ret;
 }
@@ -448,3 +452,21 @@ int discard_vpage(struct page *page)
 
 	return 0;
 }
+
+bool is_purged_vrange(struct mm_struct *mm, unsigned long address)
+{
+	struct rb_root *root = &mm->v_rb;
+	struct interval_tree_node *node;
+	struct vrange *range;
+	bool ret = false;
+
+	vrange_lock(mm);
+	node = interval_tree_iter_first(root, address, address + PAGE_SIZE - 1);
+	if (node) {
+		range = container_of(node, struct vrange, node);
+		if (range->purged)
+			ret = true;
+	}
+	vrange_unlock(mm);
+	return ret;
+}
-- 
1.8.1.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-03-12  7:38 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-12  7:38 [RFC v7 00/11] Support vrange for anonymous page Minchan Kim
2013-03-12  7:38 ` [RFC v7 01/11] vrange: enable generic interval tree Minchan Kim
2013-03-12  7:38 ` [RFC v7 02/11] add vrange basic data structure and functions Minchan Kim
2013-03-12  7:38 ` [RFC v7 03/11] add new system call vrange(2) Minchan Kim
2013-03-12  7:38 ` [RFC v7 04/11] add proc/pid/vrange information Minchan Kim
2013-03-12  7:38 ` [RFC v7 05/11] Add purge operation Minchan Kim
2013-03-12  7:38 ` Minchan Kim [this message]
2013-03-12  7:38 ` [RFC v7 07/11] keep mm_struct to vrange when system call context Minchan Kim
2013-03-12  7:38 ` [RFC v7 08/11] add LRU handling for victim vrange Minchan Kim
2013-03-12  7:38 ` [RFC v7 09/11] Get rid of depenceny that all pages is from a zone in shrink_page_list Minchan Kim
2013-03-12  7:38 ` [RFC v7 10/11] Purging vrange pages without swap Minchan Kim
2013-03-12  7:38 ` [RFC v7 11/11] add purged page information in vmstat Minchan Kim
2013-03-12 23:16 ` [RFC v7 00/11] Support vrange for anonymous page Paul Turner
2013-03-13  6:44   ` Minchan Kim
2013-03-21  1:29 ` John Stultz
2013-03-22  6:01   ` Minchan Kim
2013-03-22 17:06     ` John Stultz
2013-03-25  8:42       ` Minchan Kim
2013-03-27  0:26         ` John Stultz
2013-03-27  8:03           ` Minchan Kim
2013-03-30  0:05             ` John Stultz
2013-04-01  7:57               ` Minchan Kim
2013-03-25 17:16 ` Bartlomiej Zolnierkiewicz
2013-03-27  7:18   ` Minchan Kim
2013-04-10 20:22 ` KOSAKI Motohiro
2013-04-11  6:55   ` Minchan Kim
2013-04-11  7:20     ` KOSAKI Motohiro
2013-04-11  8:02       ` Minchan Kim
2013-04-11  8:15         ` KOSAKI Motohiro
2013-04-11  8:31           ` Minchan Kim
2013-04-11 15:01             ` KOSAKI Motohiro
2013-04-14  7:42               ` Minchan Kim
2013-04-16  3:33                 ` John Stultz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1363073915-25000-7-git-send-email-minchan@kernel.org \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=asharma@fb.com \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=je@fb.com \
    --cc=john.stultz@linaro.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=mh@glandium.org \
    --cc=mtk.manpages@gmail.com \
    --cc=neilb@suse.de \
    --cc=pjt@google.com \
    --cc=riel@redhat.com \
    --cc=sanjay@google.com \
    --cc=tglek@mozilla.com \
    --cc=walken@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).