All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20130619043419.GA10961@bbox>

diff --git a/a/1.txt b/N1/1.txt
index c83a20e..d89c1e7 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -57,3 +57,729 @@ On Tue, Jun 11, 2013 at 09:22:50PM -0700, John Stultz wrote:
 
 This patch has some bugs so below patch should fix them and pass my
 simple cases.
+
+>From 13c458388a4784a785d93f285b0c54156c3b04aa Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Tue, 11 Jun 2013 21:22:50 -0700
+Subject: [PATCH 1/2] vrange: Add method to purge volatile ranges
+
+This patch adds discarding function to purge volatile ranges under
+memory pressure. Logic is as following:
+
+1. Memory pressure happens
+2. VM start to reclaim pages
+3. Check the page is in volatile range.
+4. If so, zap the page from the process's page table.
+   (By semantic vrange(2), we should mark it with another one to
+    make page fault when you try to access the address. It will
+    be introduced later patch)
+5. If page is unmapped from all processes, discard it instead of swapping.
+
+This patch does not address the case where there is no swap, which
+keeps anonymous pages from being aged off the LRUs. Minchan has
+additional patches that add support for purging anonymous pages
+
+XXX: First pass at file purging. Seems to work, but is likely broken
+and needs close review.
+
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Android Kernel Team <kernel-team@android.com>
+Cc: Robert Love <rlove@google.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Dave Hansen <dave@linux.vnet.ibm.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Dmitry Adamushko <dmitry.adamushko@gmail.com>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Neil Brown <neilb@suse.de>
+Cc: Andrea Righi <andrea@betterlinux.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Mike Hommey <mh@glandium.org>
+Cc: Taras Glek <tglek@mozilla.com>
+Cc: Dhaval Giani <dgiani@mozilla.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: linux-mm@kvack.org <linux-mm@kvack.org>
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+[jstultz: Reworked to add purging of file pages, commit log tweaks]
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+---
+ include/linux/rmap.h   |   12 +-
+ include/linux/swap.h   |    1 +
+ include/linux/vrange.h |    7 +
+ mm/ksm.c               |    2 +-
+ mm/rmap.c              |   30 +++--
+ mm/swapfile.c          |   36 ++++++
+ mm/vmscan.c            |   16 ++-
+ mm/vrange.c            |  332 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 8 files changed, 420 insertions(+), 16 deletions(-)
+
+diff --git a/include/linux/rmap.h b/include/linux/rmap.h
+index 6dacb93..6432dfb 100644
+--- a/include/linux/rmap.h
++++ b/include/linux/rmap.h
+@@ -83,6 +83,8 @@ enum ttu_flags {
+ };
+ 
+ #ifdef CONFIG_MMU
++unsigned long vma_address(struct page *page, struct vm_area_struct *vma);
++
+ static inline void get_anon_vma(struct anon_vma *anon_vma)
+ {
+ 	atomic_inc(&anon_vma->refcount);
+@@ -182,9 +184,11 @@ static inline void page_dup_rmap(struct page *page)
+  * Called from mm/vmscan.c to handle paging out
+  */
+ int page_referenced(struct page *, int is_locked,
+-			struct mem_cgroup *memcg, unsigned long *vm_flags);
++			struct mem_cgroup *memcg, unsigned long *vm_flags,
++			int *is_vrange);
+ int page_referenced_one(struct page *, struct vm_area_struct *,
+-	unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
++	unsigned long address, unsigned int *mapcount, unsigned long *vm_flags,
++	int *is_vrange);
+ 
+ #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+ 
+@@ -249,9 +253,11 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
+ 
+ static inline int page_referenced(struct page *page, int is_locked,
+ 				  struct mem_cgroup *memcg,
+-				  unsigned long *vm_flags)
++				  unsigned long *vm_flags,
++				  int *is_vrange)
+ {
+ 	*vm_flags = 0;
++	*is_vrange = 0;
+ 	return 0;
+ }
+ 
+diff --git a/include/linux/swap.h b/include/linux/swap.h
+index 1701ce4..5907936 100644
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -383,6 +383,7 @@ extern int swap_duplicate(swp_entry_t);
+ extern int swapcache_prepare(swp_entry_t);
+ extern void swap_free(swp_entry_t);
+ extern void swapcache_free(swp_entry_t, struct page *page);
++extern int __free_swap_and_cache(swp_entry_t);
+ extern int free_swap_and_cache(swp_entry_t);
+ extern int swap_type_of(dev_t, sector_t, struct block_device **);
+ extern unsigned int count_swap_pages(int, int);
+diff --git a/include/linux/vrange.h b/include/linux/vrange.h
+index a97ac25..cbb609a 100644
+--- a/include/linux/vrange.h
++++ b/include/linux/vrange.h
+@@ -37,6 +37,10 @@ extern int vrange_clear(struct vrange_root *vroot,
+ extern void vrange_root_cleanup(struct vrange_root *vroot);
+ extern int vrange_fork(struct mm_struct *new,
+ 					struct mm_struct *old);
++int discard_vpage(struct page *page);
++bool vrange_address(struct mm_struct *mm, unsigned long start,
++			unsigned long end);
++
+ #else
+ 
+ static inline void vrange_init(void) {};
+@@ -47,5 +51,8 @@ static inline int vrange_fork(struct mm_struct *new, struct mm_struct *old)
+ 	return 0;
+ }
+ 
++static inline bool vrange_address(struct mm_struct *mm, unsigned long start,
++		unsigned long end) { return false; };
++static inline int discard_vpage(struct page *page) { return 0 };
+ #endif
+ #endif /* _LINIUX_VRANGE_H */
+diff --git a/mm/ksm.c b/mm/ksm.c
+index b6afe0c..debc20c 100644
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -1932,7 +1932,7 @@ again:
+ 				continue;
+ 
+ 			referenced += page_referenced_one(page, vma,
+-				rmap_item->address, &mapcount, vm_flags);
++				rmap_item->address, &mapcount, vm_flags, NULL);
+ 			if (!search_new_forks || !mapcount)
+ 				break;
+ 		}
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 6280da8..5522522 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -57,6 +57,8 @@
+ #include <linux/migrate.h>
+ #include <linux/hugetlb.h>
+ #include <linux/backing-dev.h>
++#include <linux/vrange.h>
++#include <linux/rmap.h>
+ 
+ #include <asm/tlbflush.h>
+ 
+@@ -523,8 +525,7 @@ __vma_address(struct page *page, struct vm_area_struct *vma)
+ 	return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ }
+ 
+-inline unsigned long
+-vma_address(struct page *page, struct vm_area_struct *vma)
++unsigned long vma_address(struct page *page, struct vm_area_struct *vma)
+ {
+ 	unsigned long address = __vma_address(page, vma);
+ 
+@@ -662,7 +663,7 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+  */
+ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+ 			unsigned long address, unsigned int *mapcount,
+-			unsigned long *vm_flags)
++			unsigned long *vm_flags, int *is_vrange)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	int referenced = 0;
+@@ -724,6 +725,9 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+ 				referenced++;
+ 		}
+ 		pte_unmap_unlock(pte, ptl);
++		if (is_vrange &&
++			vrange_address(mm, address, address + PAGE_SIZE - 1))
++			*is_vrange = 1;
+ 	}
+ 
+ 	(*mapcount)--;
+@@ -736,7 +740,8 @@ out:
+ 
+ static int page_referenced_anon(struct page *page,
+ 				struct mem_cgroup *memcg,
+-				unsigned long *vm_flags)
++				unsigned long *vm_flags,
++				int *is_vrange)
+ {
+ 	unsigned int mapcount;
+ 	struct anon_vma *anon_vma;
+@@ -761,7 +766,7 @@ static int page_referenced_anon(struct page *page,
+ 		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
+ 			continue;
+ 		referenced += page_referenced_one(page, vma, address,
+-						  &mapcount, vm_flags);
++					&mapcount, vm_flags, is_vrange);
+ 		if (!mapcount)
+ 			break;
+ 	}
+@@ -785,7 +790,9 @@ static int page_referenced_anon(struct page *page,
+  */
+ static int page_referenced_file(struct page *page,
+ 				struct mem_cgroup *memcg,
+-				unsigned long *vm_flags)
++				unsigned long *vm_flags,
++				int *is_vrange)
++
+ {
+ 	unsigned int mapcount;
+ 	struct address_space *mapping = page->mapping;
+@@ -826,7 +833,8 @@ static int page_referenced_file(struct page *page,
+ 		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
+ 			continue;
+ 		referenced += page_referenced_one(page, vma, address,
+-						  &mapcount, vm_flags);
++							&mapcount, vm_flags,
++							is_vrange);
+ 		if (!mapcount)
+ 			break;
+ 	}
+@@ -841,6 +849,7 @@ static int page_referenced_file(struct page *page,
+  * @is_locked: caller holds lock on the page
+  * @memcg: target memory cgroup
+  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
++ * @is_vrange: the page in vrange of some process
+  *
+  * Quick test_and_clear_referenced for all mappings to a page,
+  * returns the number of ptes which referenced the page.
+@@ -848,7 +857,8 @@ static int page_referenced_file(struct page *page,
+ int page_referenced(struct page *page,
+ 		    int is_locked,
+ 		    struct mem_cgroup *memcg,
+-		    unsigned long *vm_flags)
++		    unsigned long *vm_flags,
++		    int *is_vrange)
+ {
+ 	int referenced = 0;
+ 	int we_locked = 0;
+@@ -867,10 +877,10 @@ int page_referenced(struct page *page,
+ 								vm_flags);
+ 		else if (PageAnon(page))
+ 			referenced += page_referenced_anon(page, memcg,
+-								vm_flags);
++							vm_flags, is_vrange);
+ 		else if (page->mapping)
+ 			referenced += page_referenced_file(page, memcg,
+-								vm_flags);
++							vm_flags, is_vrange);
+ 		if (we_locked)
+ 			unlock_page(page);
+ 
+diff --git a/mm/swapfile.c b/mm/swapfile.c
+index 6c340d9..1f6c80e 100644
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -734,6 +734,42 @@ int try_to_free_swap(struct page *page)
+ }
+ 
+ /*
++ * It's almost same with free_swap_and_cache except page is already
++ * locked.
++ */
++int __free_swap_and_cache(swp_entry_t entry)
++{
++	struct swap_info_struct *p;
++	struct page *page = NULL;
++
++	if (non_swap_entry(entry))
++		return 1;
++
++	p = swap_info_get(entry);
++	if (p) {
++		if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
++			page = find_get_page(swap_address_space(entry),
++						entry.val);
++		}
++		spin_unlock(&p->lock);
++	}
++
++	if (page) {
++		/*
++		 * Not mapped elsewhere, or swap space full? Free it!
++		 * Also recheck PageSwapCache now page is locked (above).
++		 */
++		if (PageSwapCache(page) && !PageWriteback(page) &&
++				(!page_mapped(page) || vm_swap_full())) {
++			delete_from_swap_cache(page);
++			SetPageDirty(page);
++		}
++		page_cache_release(page);
++	}
++	return p != NULL;
++}
++
++/*
+  * Free the swap entry like above, but also try to
+  * free the page cache entry if it is the last user.
+  */
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index fa6a853..c75e0ac 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -43,6 +43,7 @@
+ #include <linux/sysctl.h>
+ #include <linux/oom.h>
+ #include <linux/prefetch.h>
++#include <linux/vrange.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/div64.h>
+@@ -611,6 +612,7 @@ enum page_references {
+ 	PAGEREF_RECLAIM,
+ 	PAGEREF_RECLAIM_CLEAN,
+ 	PAGEREF_KEEP,
++	PAGEREF_DISCARD,
+ 	PAGEREF_ACTIVATE,
+ };
+ 
+@@ -619,9 +621,10 @@ static enum page_references page_check_references(struct page *page,
+ {
+ 	int referenced_ptes, referenced_page;
+ 	unsigned long vm_flags;
++	int is_vrange = 0;
+ 
+ 	referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
+-					  &vm_flags);
++					  &vm_flags, &is_vrange);
+ 	referenced_page = TestClearPageReferenced(page);
+ 
+ 	/*
+@@ -631,6 +634,12 @@ static enum page_references page_check_references(struct page *page,
+ 	if (vm_flags & VM_LOCKED)
+ 		return PAGEREF_RECLAIM;
+ 
++	/*
++	 * Bail out if the page is in vrange and try to discard.
++	 */
++	if (is_vrange)
++		return PAGEREF_DISCARD;
++
+ 	if (referenced_ptes) {
+ 		if (PageSwapBacked(page))
+ 			return PAGEREF_ACTIVATE;
+@@ -769,6 +778,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
+ 			goto activate_locked;
+ 		case PAGEREF_KEEP:
+ 			goto keep_locked;
++		case PAGEREF_DISCARD:
++			if (discard_vpage(page))
++				goto free_it;
+ 		case PAGEREF_RECLAIM:
+ 		case PAGEREF_RECLAIM_CLEAN:
+ 			; /* try to reclaim the page below */
+@@ -1497,7 +1509,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
+ 		}
+ 
+ 		if (page_referenced(page, 0, sc->target_mem_cgroup,
+-				    &vm_flags)) {
++				    &vm_flags, NULL)) {
+ 			nr_rotated += hpage_nr_pages(page);
+ 			/*
+ 			 * Identify referenced, file-backed active pages and
+diff --git a/mm/vrange.c b/mm/vrange.c
+index 5278939..d57cb38 100644
+--- a/mm/vrange.c
++++ b/mm/vrange.c
+@@ -6,6 +6,13 @@
+ #include <linux/slab.h>
+ #include <linux/mman.h>
+ #include <linux/syscalls.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/hugetlb.h>
++#include "internal.h"
++#include <linux/swap.h>
++#include <linux/swapops.h>
++#include <linux/mmu_notifier.h>
+ 
+ static struct kmem_cache *vrange_cachep;
+ 
+@@ -364,3 +371,328 @@ SYSCALL_DEFINE4(vrange, unsigned long, start,
+ out:
+ 	return ret;
+ }
++
++
++static bool __vrange_address(struct vrange_root *vroot,
++			unsigned long start, unsigned long end)
++{
++	struct interval_tree_node *node;
++
++	node = interval_tree_iter_first(&vroot->v_rb, start, end);
++	return node ? true : false;
++}
++
++bool vrange_address(struct mm_struct *mm,
++			unsigned long start, unsigned long end)
++{
++	struct vrange_root *vroot;
++	unsigned long vstart_idx, vend_idx;
++	struct vm_area_struct *vma;
++	bool ret;
++
++	vma = find_vma(mm, start);
++	if (vma->vm_file && (vma->vm_flags & VM_SHARED)) {
++		vroot = &vma->vm_file->f_mapping->vroot;
++		vstart_idx = vma->vm_pgoff + start - vma->vm_start;
++		vend_idx = vma->vm_pgoff + end - vma->vm_start;
++	} else {
++		vroot = &mm->vroot;
++		vstart_idx = start;
++		vend_idx = end;
++	}
++
++	vrange_lock(vroot);
++	ret = __vrange_address(vroot, vstart_idx, vend_idx);
++	vrange_unlock(vroot);
++	return ret;
++}
++
++static pte_t *__vpage_check_address(struct page *page,
++		struct mm_struct *mm, unsigned long address, spinlock_t **ptlp)
++{
++	pmd_t *pmd;
++	pte_t *pte;
++	spinlock_t *ptl;
++	bool present;
++
++	/* TODO : look into tlbfs */
++	if (unlikely(PageHuge(page)))
++		return NULL;
++
++	pmd = mm_find_pmd(mm, address);
++	if (!pmd)
++		return NULL;
++	/*
++	 * TODO : Support THP
++	 */
++	if (pmd_trans_huge(*pmd))
++		return NULL;
++
++	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
++	if (pte_none(*pte))
++		goto out;
++
++	present = pte_present(*pte);
++	if (present && page_to_pfn(page) != pte_pfn(*pte))
++		goto out;
++	else if (present) {
++		*ptlp = ptl;
++		return pte;
++	} else {
++		swp_entry_t entry = { .val = page_private(page) };
++
++		VM_BUG_ON(non_swap_entry(entry));
++		if (entry.val != pte_to_swp_entry(*pte).val)
++			goto out;
++		*ptlp = ptl;
++		return pte;
++	}
++out:
++	pte_unmap_unlock(pte, ptl);
++	return NULL;
++}
++
++/*
++ * This functions checks @page is matched with pte's encoded one
++ * which could be a page or swap slot.
++ */
++static inline pte_t *vpage_check_address(struct page *page,
++		struct mm_struct *mm, unsigned long address,
++		spinlock_t **ptlp)
++{
++	pte_t *ptep;
++	__cond_lock(*ptlp, ptep = __vpage_check_address(page,
++				mm, address, ptlp));
++	return ptep;
++}
++
++static void __vrange_purge(struct vrange_root *vroot,
++		unsigned long start, unsigned long end)
++{
++	struct vrange *range;
++	struct interval_tree_node *node;
++
++	node = interval_tree_iter_first(&vroot->v_rb, start, end);
++	while (node) {
++		range = container_of(node, struct vrange, node);
++		range->purged = true;
++		node = interval_tree_iter_next(node, start, end);
++	}
++}
++
++int try_to_discard_one(struct vrange_root *vroot, struct page *page,
++			struct vm_area_struct *vma, unsigned long addr)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pte_t *pte;
++	pte_t pteval;
++	spinlock_t *ptl;
++	int ret = 0;
++	bool present;
++
++	VM_BUG_ON(!PageLocked(page));
++
++	vrange_lock(vroot);
++	pte = vpage_check_address(page, mm, addr, &ptl);
++	if (!pte)
++		goto out;
++
++	if (vma->vm_flags & VM_LOCKED) {
++		pte_unmap_unlock(pte, ptl);
++		goto out;
++	}
++
++	present = pte_present(*pte);
++	flush_cache_page(vma, address, page_to_pfn(page));
++	pteval = ptep_clear_flush(vma, addr, pte);
++
++	update_hiwater_rss(mm);
++	if (present) {
++		if (PageAnon(page))
++			dec_mm_counter(mm, MM_ANONPAGES);
++		else
++			dec_mm_counter(mm, MM_FILEPAGES);
++		page_remove_rmap(page);
++		page_cache_release(page);
++	} else {
++		swp_entry_t entry = pte_to_swp_entry(pteval);
++		dec_mm_counter(mm, MM_SWAPENTS);
++		if (unlikely(!__free_swap_and_cache(entry)))
++			BUG_ON(1);
++	}
++
++	pte_unmap_unlock(pte, ptl);
++	mmu_notifier_invalidate_page(mm, addr);
++	ret = 1;
++
++	if (!PageAnon(page)) /* switch to file offset) */
++		addr = vma->vm_pgoff + addr - vma->vm_start;
++
++	__vrange_purge(vroot, addr, addr + PAGE_SIZE - 1);
++
++out:
++	vrange_unlock(vroot);
++	return ret;
++}
++
++static int try_to_discard_anon_vpage(struct page *page)
++{
++	struct anon_vma *anon_vma;
++	struct anon_vma_chain *avc;
++	pgoff_t pgoff;
++	struct vm_area_struct *vma;
++	struct mm_struct *mm;
++	struct vrange_root *vroot;
++
++	unsigned long address;
++	bool ret = 0;
++
++	anon_vma = page_lock_anon_vma_read(page);
++	if (!anon_vma)
++		return ret;
++
++	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
++	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
++		pte_t *pte;
++		spinlock_t *ptl;
++
++		vma = avc->vma;
++		mm = vma->vm_mm;
++		vroot = &mm->vroot;
++		address = vma_address(page, vma);
++
++		vrange_lock(vroot);
++		/*
++		 * We can't use page_check_address because it doesn't check
++		 * swap entry of the page table. We need the check because
++		 * we have to make sure atomicity of shared vrange.
++		 * It means all vranges which are shared a page should be
++		 * purged if a page in a process is purged.
++		 */
++		pte = vpage_check_address(page, mm, address, &ptl);
++		if (!pte) {
++			vrange_unlock(vroot);
++			continue;
++		}
++
++		if (vma->vm_flags & VM_LOCKED) {
++			pte_unmap_unlock(pte, ptl);
++			vrange_unlock(vroot);
++			goto out;
++		}
++
++		pte_unmap_unlock(pte, ptl);
++		if (!__vrange_address(vroot, address,
++					address + PAGE_SIZE - 1)) {
++			vrange_unlock(vroot);
++			goto out;
++		}
++
++		vrange_unlock(vroot);
++	}
++
++	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
++		vma = avc->vma;
++		mm = vma->vm_mm;
++		vroot = &mm->vroot;
++		address = vma_address(page, vma);
++		if (!try_to_discard_one(vroot, page, vma, address))
++			goto out;
++	}
++
++	ret = 1;
++out:
++	page_unlock_anon_vma_read(anon_vma);
++	return ret;
++}
++
++
++
++static int try_to_discard_file_vpage(struct page *page)
++{
++	struct address_space *mapping = page->mapping;
++	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
++	struct vm_area_struct *vma;
++	bool ret = 0;
++
++	mutex_lock(&mapping->i_mmap_mutex);
++	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
++		unsigned long address = vma_address(page, vma);
++		struct mm_struct *mm = vma->vm_mm;
++		struct vrange_root *vroot = &mapping->vroot;
++		pte_t *pte;
++		spinlock_t *ptl;
++		long vstart_idx;
++
++
++		vstart_idx = vma->vm_pgoff + address - vma->vm_start;
++
++		vrange_lock(vroot);
++		/*
++		 * We can't use page_check_address because it doesn't check
++		 * swap entry of the page table. We need the check because
++		 * we have to make sure atomicity of shared vrange.
++		 * It means all vranges which are shared a page should be
++		 * purged if a page in a process is purged.
++		 */
++		pte = vpage_check_address(page, mm, address, &ptl);
++		if (!pte) {
++			vrange_unlock(vroot);
++			continue;
++		}
++
++		if (vma->vm_flags & VM_LOCKED) {
++			pte_unmap_unlock(pte, ptl);
++			vrange_unlock(vroot);
++			goto out;
++		}
++
++		pte_unmap_unlock(pte, ptl);
++		if (!__vrange_address(vroot, vstart_idx,
++					vstart_idx + PAGE_SIZE - 1)) {
++			vrange_unlock(vroot);
++			goto out;
++		}
++
++		vrange_unlock(vroot);
++	}
++
++	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
++		unsigned long address = vma_address(page, vma);
++		struct vrange_root *vroot = &mapping->vroot;
++
++		if (!try_to_discard_one(vroot, page, vma, address))
++			goto out;
++	}
++
++	ret = 1;
++out:
++	mutex_unlock(&mapping->i_mmap_mutex);
++	return ret;
++}
++
++static int try_to_discard_vpage(struct page *page)
++{
++	if (PageAnon(page))
++		return try_to_discard_anon_vpage(page);
++	return try_to_discard_file_vpage(page);
++}
++
++int discard_vpage(struct page *page)
++{
++	VM_BUG_ON(!PageLocked(page));
++	VM_BUG_ON(PageLRU(page));
++
++	if (try_to_discard_vpage(page)) {
++		if (PageSwapCache(page))
++			try_to_free_swap(page);
++
++		if (page_freeze_refs(page, 1)) {
++			unlock_page(page);
++			return 1;
++		}
++	}
++
++	return 0;
++}
++
+-- 
+1.7.9.5
+
+-- 
+Kind regards,
+Minchan Kim
diff --git a/a/content_digest b/N1/content_digest
index 24bc3c2..a3fed9e 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -85,6 +85,732 @@
  ">  8 files changed, 420 insertions(+), 16 deletions(-)\n"
  "\n"
  "This patch has some bugs so below patch should fix them and pass my\n"
- simple cases.
+ "simple cases.\n"
+ "\n"
+ ">From 13c458388a4784a785d93f285b0c54156c3b04aa Mon Sep 17 00:00:00 2001\n"
+ "From: Minchan Kim <minchan@kernel.org>\n"
+ "Date: Tue, 11 Jun 2013 21:22:50 -0700\n"
+ "Subject: [PATCH 1/2] vrange: Add method to purge volatile ranges\n"
+ "\n"
+ "This patch adds discarding function to purge volatile ranges under\n"
+ "memory pressure. Logic is as following:\n"
+ "\n"
+ "1. Memory pressure happens\n"
+ "2. VM start to reclaim pages\n"
+ "3. Check the page is in volatile range.\n"
+ "4. If so, zap the page from the process's page table.\n"
+ "   (By semantic vrange(2), we should mark it with another one to\n"
+ "    make page fault when you try to access the address. It will\n"
+ "    be introduced later patch)\n"
+ "5. If page is unmapped from all processes, discard it instead of swapping.\n"
+ "\n"
+ "This patch does not address the case where there is no swap, which\n"
+ "keeps anonymous pages from being aged off the LRUs. Minchan has\n"
+ "additional patches that add support for purging anonymous pages\n"
+ "\n"
+ "XXX: First pass at file purging. Seems to work, but is likely broken\n"
+ "and needs close review.\n"
+ "\n"
+ "Cc: Andrew Morton <akpm@linux-foundation.org>\n"
+ "Cc: Android Kernel Team <kernel-team@android.com>\n"
+ "Cc: Robert Love <rlove@google.com>\n"
+ "Cc: Mel Gorman <mel@csn.ul.ie>\n"
+ "Cc: Hugh Dickins <hughd@google.com>\n"
+ "Cc: Dave Hansen <dave@linux.vnet.ibm.com>\n"
+ "Cc: Rik van Riel <riel@redhat.com>\n"
+ "Cc: Dmitry Adamushko <dmitry.adamushko@gmail.com>\n"
+ "Cc: Dave Chinner <david@fromorbit.com>\n"
+ "Cc: Neil Brown <neilb@suse.de>\n"
+ "Cc: Andrea Righi <andrea@betterlinux.com>\n"
+ "Cc: Andrea Arcangeli <aarcange@redhat.com>\n"
+ "Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>\n"
+ "Cc: Mike Hommey <mh@glandium.org>\n"
+ "Cc: Taras Glek <tglek@mozilla.com>\n"
+ "Cc: Dhaval Giani <dgiani@mozilla.com>\n"
+ "Cc: Jan Kara <jack@suse.cz>\n"
+ "Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>\n"
+ "Cc: Michel Lespinasse <walken@google.com>\n"
+ "Cc: Minchan Kim <minchan@kernel.org>\n"
+ "Cc: linux-mm@kvack.org <linux-mm@kvack.org>\n"
+ "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
+ "[jstultz: Reworked to add purging of file pages, commit log tweaks]\n"
+ "Signed-off-by: John Stultz <john.stultz@linaro.org>\n"
+ "---\n"
+ " include/linux/rmap.h   |   12 +-\n"
+ " include/linux/swap.h   |    1 +\n"
+ " include/linux/vrange.h |    7 +\n"
+ " mm/ksm.c               |    2 +-\n"
+ " mm/rmap.c              |   30 +++--\n"
+ " mm/swapfile.c          |   36 ++++++\n"
+ " mm/vmscan.c            |   16 ++-\n"
+ " mm/vrange.c            |  332 ++++++++++++++++++++++++++++++++++++++++++++++++\n"
+ " 8 files changed, 420 insertions(+), 16 deletions(-)\n"
+ "\n"
+ "diff --git a/include/linux/rmap.h b/include/linux/rmap.h\n"
+ "index 6dacb93..6432dfb 100644\n"
+ "--- a/include/linux/rmap.h\n"
+ "+++ b/include/linux/rmap.h\n"
+ "@@ -83,6 +83,8 @@ enum ttu_flags {\n"
+ " };\n"
+ " \n"
+ " #ifdef CONFIG_MMU\n"
+ "+unsigned long vma_address(struct page *page, struct vm_area_struct *vma);\n"
+ "+\n"
+ " static inline void get_anon_vma(struct anon_vma *anon_vma)\n"
+ " {\n"
+ " \tatomic_inc(&anon_vma->refcount);\n"
+ "@@ -182,9 +184,11 @@ static inline void page_dup_rmap(struct page *page)\n"
+ "  * Called from mm/vmscan.c to handle paging out\n"
+ "  */\n"
+ " int page_referenced(struct page *, int is_locked,\n"
+ "-\t\t\tstruct mem_cgroup *memcg, unsigned long *vm_flags);\n"
+ "+\t\t\tstruct mem_cgroup *memcg, unsigned long *vm_flags,\n"
+ "+\t\t\tint *is_vrange);\n"
+ " int page_referenced_one(struct page *, struct vm_area_struct *,\n"
+ "-\tunsigned long address, unsigned int *mapcount, unsigned long *vm_flags);\n"
+ "+\tunsigned long address, unsigned int *mapcount, unsigned long *vm_flags,\n"
+ "+\tint *is_vrange);\n"
+ " \n"
+ " #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)\n"
+ " \n"
+ "@@ -249,9 +253,11 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,\n"
+ " \n"
+ " static inline int page_referenced(struct page *page, int is_locked,\n"
+ " \t\t\t\t  struct mem_cgroup *memcg,\n"
+ "-\t\t\t\t  unsigned long *vm_flags)\n"
+ "+\t\t\t\t  unsigned long *vm_flags,\n"
+ "+\t\t\t\t  int *is_vrange)\n"
+ " {\n"
+ " \t*vm_flags = 0;\n"
+ "+\t*is_vrange = 0;\n"
+ " \treturn 0;\n"
+ " }\n"
+ " \n"
+ "diff --git a/include/linux/swap.h b/include/linux/swap.h\n"
+ "index 1701ce4..5907936 100644\n"
+ "--- a/include/linux/swap.h\n"
+ "+++ b/include/linux/swap.h\n"
+ "@@ -383,6 +383,7 @@ extern int swap_duplicate(swp_entry_t);\n"
+ " extern int swapcache_prepare(swp_entry_t);\n"
+ " extern void swap_free(swp_entry_t);\n"
+ " extern void swapcache_free(swp_entry_t, struct page *page);\n"
+ "+extern int __free_swap_and_cache(swp_entry_t);\n"
+ " extern int free_swap_and_cache(swp_entry_t);\n"
+ " extern int swap_type_of(dev_t, sector_t, struct block_device **);\n"
+ " extern unsigned int count_swap_pages(int, int);\n"
+ "diff --git a/include/linux/vrange.h b/include/linux/vrange.h\n"
+ "index a97ac25..cbb609a 100644\n"
+ "--- a/include/linux/vrange.h\n"
+ "+++ b/include/linux/vrange.h\n"
+ "@@ -37,6 +37,10 @@ extern int vrange_clear(struct vrange_root *vroot,\n"
+ " extern void vrange_root_cleanup(struct vrange_root *vroot);\n"
+ " extern int vrange_fork(struct mm_struct *new,\n"
+ " \t\t\t\t\tstruct mm_struct *old);\n"
+ "+int discard_vpage(struct page *page);\n"
+ "+bool vrange_address(struct mm_struct *mm, unsigned long start,\n"
+ "+\t\t\tunsigned long end);\n"
+ "+\n"
+ " #else\n"
+ " \n"
+ " static inline void vrange_init(void) {};\n"
+ "@@ -47,5 +51,8 @@ static inline int vrange_fork(struct mm_struct *new, struct mm_struct *old)\n"
+ " \treturn 0;\n"
+ " }\n"
+ " \n"
+ "+static inline bool vrange_address(struct mm_struct *mm, unsigned long start,\n"
+ "+\t\tunsigned long end) { return false; };\n"
+ "+static inline int discard_vpage(struct page *page) { return 0 };\n"
+ " #endif\n"
+ " #endif /* _LINIUX_VRANGE_H */\n"
+ "diff --git a/mm/ksm.c b/mm/ksm.c\n"
+ "index b6afe0c..debc20c 100644\n"
+ "--- a/mm/ksm.c\n"
+ "+++ b/mm/ksm.c\n"
+ "@@ -1932,7 +1932,7 @@ again:\n"
+ " \t\t\t\tcontinue;\n"
+ " \n"
+ " \t\t\treferenced += page_referenced_one(page, vma,\n"
+ "-\t\t\t\trmap_item->address, &mapcount, vm_flags);\n"
+ "+\t\t\t\trmap_item->address, &mapcount, vm_flags, NULL);\n"
+ " \t\t\tif (!search_new_forks || !mapcount)\n"
+ " \t\t\t\tbreak;\n"
+ " \t\t}\n"
+ "diff --git a/mm/rmap.c b/mm/rmap.c\n"
+ "index 6280da8..5522522 100644\n"
+ "--- a/mm/rmap.c\n"
+ "+++ b/mm/rmap.c\n"
+ "@@ -57,6 +57,8 @@\n"
+ " #include <linux/migrate.h>\n"
+ " #include <linux/hugetlb.h>\n"
+ " #include <linux/backing-dev.h>\n"
+ "+#include <linux/vrange.h>\n"
+ "+#include <linux/rmap.h>\n"
+ " \n"
+ " #include <asm/tlbflush.h>\n"
+ " \n"
+ "@@ -523,8 +525,7 @@ __vma_address(struct page *page, struct vm_area_struct *vma)\n"
+ " \treturn vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);\n"
+ " }\n"
+ " \n"
+ "-inline unsigned long\n"
+ "-vma_address(struct page *page, struct vm_area_struct *vma)\n"
+ "+unsigned long vma_address(struct page *page, struct vm_area_struct *vma)\n"
+ " {\n"
+ " \tunsigned long address = __vma_address(page, vma);\n"
+ " \n"
+ "@@ -662,7 +663,7 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)\n"
+ "  */\n"
+ " int page_referenced_one(struct page *page, struct vm_area_struct *vma,\n"
+ " \t\t\tunsigned long address, unsigned int *mapcount,\n"
+ "-\t\t\tunsigned long *vm_flags)\n"
+ "+\t\t\tunsigned long *vm_flags, int *is_vrange)\n"
+ " {\n"
+ " \tstruct mm_struct *mm = vma->vm_mm;\n"
+ " \tint referenced = 0;\n"
+ "@@ -724,6 +725,9 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,\n"
+ " \t\t\t\treferenced++;\n"
+ " \t\t}\n"
+ " \t\tpte_unmap_unlock(pte, ptl);\n"
+ "+\t\tif (is_vrange &&\n"
+ "+\t\t\tvrange_address(mm, address, address + PAGE_SIZE - 1))\n"
+ "+\t\t\t*is_vrange = 1;\n"
+ " \t}\n"
+ " \n"
+ " \t(*mapcount)--;\n"
+ "@@ -736,7 +740,8 @@ out:\n"
+ " \n"
+ " static int page_referenced_anon(struct page *page,\n"
+ " \t\t\t\tstruct mem_cgroup *memcg,\n"
+ "-\t\t\t\tunsigned long *vm_flags)\n"
+ "+\t\t\t\tunsigned long *vm_flags,\n"
+ "+\t\t\t\tint *is_vrange)\n"
+ " {\n"
+ " \tunsigned int mapcount;\n"
+ " \tstruct anon_vma *anon_vma;\n"
+ "@@ -761,7 +766,7 @@ static int page_referenced_anon(struct page *page,\n"
+ " \t\tif (memcg && !mm_match_cgroup(vma->vm_mm, memcg))\n"
+ " \t\t\tcontinue;\n"
+ " \t\treferenced += page_referenced_one(page, vma, address,\n"
+ "-\t\t\t\t\t\t  &mapcount, vm_flags);\n"
+ "+\t\t\t\t\t&mapcount, vm_flags, is_vrange);\n"
+ " \t\tif (!mapcount)\n"
+ " \t\t\tbreak;\n"
+ " \t}\n"
+ "@@ -785,7 +790,9 @@ static int page_referenced_anon(struct page *page,\n"
+ "  */\n"
+ " static int page_referenced_file(struct page *page,\n"
+ " \t\t\t\tstruct mem_cgroup *memcg,\n"
+ "-\t\t\t\tunsigned long *vm_flags)\n"
+ "+\t\t\t\tunsigned long *vm_flags,\n"
+ "+\t\t\t\tint *is_vrange)\n"
+ "+\n"
+ " {\n"
+ " \tunsigned int mapcount;\n"
+ " \tstruct address_space *mapping = page->mapping;\n"
+ "@@ -826,7 +833,8 @@ static int page_referenced_file(struct page *page,\n"
+ " \t\tif (memcg && !mm_match_cgroup(vma->vm_mm, memcg))\n"
+ " \t\t\tcontinue;\n"
+ " \t\treferenced += page_referenced_one(page, vma, address,\n"
+ "-\t\t\t\t\t\t  &mapcount, vm_flags);\n"
+ "+\t\t\t\t\t\t\t&mapcount, vm_flags,\n"
+ "+\t\t\t\t\t\t\tis_vrange);\n"
+ " \t\tif (!mapcount)\n"
+ " \t\t\tbreak;\n"
+ " \t}\n"
+ "@@ -841,6 +849,7 @@ static int page_referenced_file(struct page *page,\n"
+ "  * @is_locked: caller holds lock on the page\n"
+ "  * @memcg: target memory cgroup\n"
+ "  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page\n"
+ "+ * @is_vrange: the page in vrange of some process\n"
+ "  *\n"
+ "  * Quick test_and_clear_referenced for all mappings to a page,\n"
+ "  * returns the number of ptes which referenced the page.\n"
+ "@@ -848,7 +857,8 @@ static int page_referenced_file(struct page *page,\n"
+ " int page_referenced(struct page *page,\n"
+ " \t\t    int is_locked,\n"
+ " \t\t    struct mem_cgroup *memcg,\n"
+ "-\t\t    unsigned long *vm_flags)\n"
+ "+\t\t    unsigned long *vm_flags,\n"
+ "+\t\t    int *is_vrange)\n"
+ " {\n"
+ " \tint referenced = 0;\n"
+ " \tint we_locked = 0;\n"
+ "@@ -867,10 +877,10 @@ int page_referenced(struct page *page,\n"
+ " \t\t\t\t\t\t\t\tvm_flags);\n"
+ " \t\telse if (PageAnon(page))\n"
+ " \t\t\treferenced += page_referenced_anon(page, memcg,\n"
+ "-\t\t\t\t\t\t\t\tvm_flags);\n"
+ "+\t\t\t\t\t\t\tvm_flags, is_vrange);\n"
+ " \t\telse if (page->mapping)\n"
+ " \t\t\treferenced += page_referenced_file(page, memcg,\n"
+ "-\t\t\t\t\t\t\t\tvm_flags);\n"
+ "+\t\t\t\t\t\t\tvm_flags, is_vrange);\n"
+ " \t\tif (we_locked)\n"
+ " \t\t\tunlock_page(page);\n"
+ " \n"
+ "diff --git a/mm/swapfile.c b/mm/swapfile.c\n"
+ "index 6c340d9..1f6c80e 100644\n"
+ "--- a/mm/swapfile.c\n"
+ "+++ b/mm/swapfile.c\n"
+ "@@ -734,6 +734,42 @@ int try_to_free_swap(struct page *page)\n"
+ " }\n"
+ " \n"
+ " /*\n"
+ "+ * It's almost same with free_swap_and_cache except page is already\n"
+ "+ * locked.\n"
+ "+ */\n"
+ "+int __free_swap_and_cache(swp_entry_t entry)\n"
+ "+{\n"
+ "+\tstruct swap_info_struct *p;\n"
+ "+\tstruct page *page = NULL;\n"
+ "+\n"
+ "+\tif (non_swap_entry(entry))\n"
+ "+\t\treturn 1;\n"
+ "+\n"
+ "+\tp = swap_info_get(entry);\n"
+ "+\tif (p) {\n"
+ "+\t\tif (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {\n"
+ "+\t\t\tpage = find_get_page(swap_address_space(entry),\n"
+ "+\t\t\t\t\t\tentry.val);\n"
+ "+\t\t}\n"
+ "+\t\tspin_unlock(&p->lock);\n"
+ "+\t}\n"
+ "+\n"
+ "+\tif (page) {\n"
+ "+\t\t/*\n"
+ "+\t\t * Not mapped elsewhere, or swap space full? Free it!\n"
+ "+\t\t * Also recheck PageSwapCache now page is locked (above).\n"
+ "+\t\t */\n"
+ "+\t\tif (PageSwapCache(page) && !PageWriteback(page) &&\n"
+ "+\t\t\t\t(!page_mapped(page) || vm_swap_full())) {\n"
+ "+\t\t\tdelete_from_swap_cache(page);\n"
+ "+\t\t\tSetPageDirty(page);\n"
+ "+\t\t}\n"
+ "+\t\tpage_cache_release(page);\n"
+ "+\t}\n"
+ "+\treturn p != NULL;\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "  * Free the swap entry like above, but also try to\n"
+ "  * free the page cache entry if it is the last user.\n"
+ "  */\n"
+ "diff --git a/mm/vmscan.c b/mm/vmscan.c\n"
+ "index fa6a853..c75e0ac 100644\n"
+ "--- a/mm/vmscan.c\n"
+ "+++ b/mm/vmscan.c\n"
+ "@@ -43,6 +43,7 @@\n"
+ " #include <linux/sysctl.h>\n"
+ " #include <linux/oom.h>\n"
+ " #include <linux/prefetch.h>\n"
+ "+#include <linux/vrange.h>\n"
+ " \n"
+ " #include <asm/tlbflush.h>\n"
+ " #include <asm/div64.h>\n"
+ "@@ -611,6 +612,7 @@ enum page_references {\n"
+ " \tPAGEREF_RECLAIM,\n"
+ " \tPAGEREF_RECLAIM_CLEAN,\n"
+ " \tPAGEREF_KEEP,\n"
+ "+\tPAGEREF_DISCARD,\n"
+ " \tPAGEREF_ACTIVATE,\n"
+ " };\n"
+ " \n"
+ "@@ -619,9 +621,10 @@ static enum page_references page_check_references(struct page *page,\n"
+ " {\n"
+ " \tint referenced_ptes, referenced_page;\n"
+ " \tunsigned long vm_flags;\n"
+ "+\tint is_vrange = 0;\n"
+ " \n"
+ " \treferenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,\n"
+ "-\t\t\t\t\t  &vm_flags);\n"
+ "+\t\t\t\t\t  &vm_flags, &is_vrange);\n"
+ " \treferenced_page = TestClearPageReferenced(page);\n"
+ " \n"
+ " \t/*\n"
+ "@@ -631,6 +634,12 @@ static enum page_references page_check_references(struct page *page,\n"
+ " \tif (vm_flags & VM_LOCKED)\n"
+ " \t\treturn PAGEREF_RECLAIM;\n"
+ " \n"
+ "+\t/*\n"
+ "+\t * Bail out if the page is in vrange and try to discard.\n"
+ "+\t */\n"
+ "+\tif (is_vrange)\n"
+ "+\t\treturn PAGEREF_DISCARD;\n"
+ "+\n"
+ " \tif (referenced_ptes) {\n"
+ " \t\tif (PageSwapBacked(page))\n"
+ " \t\t\treturn PAGEREF_ACTIVATE;\n"
+ "@@ -769,6 +778,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,\n"
+ " \t\t\tgoto activate_locked;\n"
+ " \t\tcase PAGEREF_KEEP:\n"
+ " \t\t\tgoto keep_locked;\n"
+ "+\t\tcase PAGEREF_DISCARD:\n"
+ "+\t\t\tif (discard_vpage(page))\n"
+ "+\t\t\t\tgoto free_it;\n"
+ " \t\tcase PAGEREF_RECLAIM:\n"
+ " \t\tcase PAGEREF_RECLAIM_CLEAN:\n"
+ " \t\t\t; /* try to reclaim the page below */\n"
+ "@@ -1497,7 +1509,7 @@ static void shrink_active_list(unsigned long nr_to_scan,\n"
+ " \t\t}\n"
+ " \n"
+ " \t\tif (page_referenced(page, 0, sc->target_mem_cgroup,\n"
+ "-\t\t\t\t    &vm_flags)) {\n"
+ "+\t\t\t\t    &vm_flags, NULL)) {\n"
+ " \t\t\tnr_rotated += hpage_nr_pages(page);\n"
+ " \t\t\t/*\n"
+ " \t\t\t * Identify referenced, file-backed active pages and\n"
+ "diff --git a/mm/vrange.c b/mm/vrange.c\n"
+ "index 5278939..d57cb38 100644\n"
+ "--- a/mm/vrange.c\n"
+ "+++ b/mm/vrange.c\n"
+ "@@ -6,6 +6,13 @@\n"
+ " #include <linux/slab.h>\n"
+ " #include <linux/mman.h>\n"
+ " #include <linux/syscalls.h>\n"
+ "+#include <linux/pagemap.h>\n"
+ "+#include <linux/rmap.h>\n"
+ "+#include <linux/hugetlb.h>\n"
+ "+#include \"internal.h\"\n"
+ "+#include <linux/swap.h>\n"
+ "+#include <linux/swapops.h>\n"
+ "+#include <linux/mmu_notifier.h>\n"
+ " \n"
+ " static struct kmem_cache *vrange_cachep;\n"
+ " \n"
+ "@@ -364,3 +371,328 @@ SYSCALL_DEFINE4(vrange, unsigned long, start,\n"
+ " out:\n"
+ " \treturn ret;\n"
+ " }\n"
+ "+\n"
+ "+\n"
+ "+static bool __vrange_address(struct vrange_root *vroot,\n"
+ "+\t\t\tunsigned long start, unsigned long end)\n"
+ "+{\n"
+ "+\tstruct interval_tree_node *node;\n"
+ "+\n"
+ "+\tnode = interval_tree_iter_first(&vroot->v_rb, start, end);\n"
+ "+\treturn node ? true : false;\n"
+ "+}\n"
+ "+\n"
+ "+bool vrange_address(struct mm_struct *mm,\n"
+ "+\t\t\tunsigned long start, unsigned long end)\n"
+ "+{\n"
+ "+\tstruct vrange_root *vroot;\n"
+ "+\tunsigned long vstart_idx, vend_idx;\n"
+ "+\tstruct vm_area_struct *vma;\n"
+ "+\tbool ret;\n"
+ "+\n"
+ "+\tvma = find_vma(mm, start);\n"
+ "+\tif (vma->vm_file && (vma->vm_flags & VM_SHARED)) {\n"
+ "+\t\tvroot = &vma->vm_file->f_mapping->vroot;\n"
+ "+\t\tvstart_idx = vma->vm_pgoff + start - vma->vm_start;\n"
+ "+\t\tvend_idx = vma->vm_pgoff + end - vma->vm_start;\n"
+ "+\t} else {\n"
+ "+\t\tvroot = &mm->vroot;\n"
+ "+\t\tvstart_idx = start;\n"
+ "+\t\tvend_idx = end;\n"
+ "+\t}\n"
+ "+\n"
+ "+\tvrange_lock(vroot);\n"
+ "+\tret = __vrange_address(vroot, vstart_idx, vend_idx);\n"
+ "+\tvrange_unlock(vroot);\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+static pte_t *__vpage_check_address(struct page *page,\n"
+ "+\t\tstruct mm_struct *mm, unsigned long address, spinlock_t **ptlp)\n"
+ "+{\n"
+ "+\tpmd_t *pmd;\n"
+ "+\tpte_t *pte;\n"
+ "+\tspinlock_t *ptl;\n"
+ "+\tbool present;\n"
+ "+\n"
+ "+\t/* TODO : look into tlbfs */\n"
+ "+\tif (unlikely(PageHuge(page)))\n"
+ "+\t\treturn NULL;\n"
+ "+\n"
+ "+\tpmd = mm_find_pmd(mm, address);\n"
+ "+\tif (!pmd)\n"
+ "+\t\treturn NULL;\n"
+ "+\t/*\n"
+ "+\t * TODO : Support THP\n"
+ "+\t */\n"
+ "+\tif (pmd_trans_huge(*pmd))\n"
+ "+\t\treturn NULL;\n"
+ "+\n"
+ "+\tpte = pte_offset_map_lock(mm, pmd, address, &ptl);\n"
+ "+\tif (pte_none(*pte))\n"
+ "+\t\tgoto out;\n"
+ "+\n"
+ "+\tpresent = pte_present(*pte);\n"
+ "+\tif (present && page_to_pfn(page) != pte_pfn(*pte))\n"
+ "+\t\tgoto out;\n"
+ "+\telse if (present) {\n"
+ "+\t\t*ptlp = ptl;\n"
+ "+\t\treturn pte;\n"
+ "+\t} else {\n"
+ "+\t\tswp_entry_t entry = { .val = page_private(page) };\n"
+ "+\n"
+ "+\t\tVM_BUG_ON(non_swap_entry(entry));\n"
+ "+\t\tif (entry.val != pte_to_swp_entry(*pte).val)\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\t*ptlp = ptl;\n"
+ "+\t\treturn pte;\n"
+ "+\t}\n"
+ "+out:\n"
+ "+\tpte_unmap_unlock(pte, ptl);\n"
+ "+\treturn NULL;\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * This functions checks @page is matched with pte's encoded one\n"
+ "+ * which could be a page or swap slot.\n"
+ "+ */\n"
+ "+static inline pte_t *vpage_check_address(struct page *page,\n"
+ "+\t\tstruct mm_struct *mm, unsigned long address,\n"
+ "+\t\tspinlock_t **ptlp)\n"
+ "+{\n"
+ "+\tpte_t *ptep;\n"
+ "+\t__cond_lock(*ptlp, ptep = __vpage_check_address(page,\n"
+ "+\t\t\t\tmm, address, ptlp));\n"
+ "+\treturn ptep;\n"
+ "+}\n"
+ "+\n"
+ "+static void __vrange_purge(struct vrange_root *vroot,\n"
+ "+\t\tunsigned long start, unsigned long end)\n"
+ "+{\n"
+ "+\tstruct vrange *range;\n"
+ "+\tstruct interval_tree_node *node;\n"
+ "+\n"
+ "+\tnode = interval_tree_iter_first(&vroot->v_rb, start, end);\n"
+ "+\twhile (node) {\n"
+ "+\t\trange = container_of(node, struct vrange, node);\n"
+ "+\t\trange->purged = true;\n"
+ "+\t\tnode = interval_tree_iter_next(node, start, end);\n"
+ "+\t}\n"
+ "+}\n"
+ "+\n"
+ "+int try_to_discard_one(struct vrange_root *vroot, struct page *page,\n"
+ "+\t\t\tstruct vm_area_struct *vma, unsigned long addr)\n"
+ "+{\n"
+ "+\tstruct mm_struct *mm = vma->vm_mm;\n"
+ "+\tpte_t *pte;\n"
+ "+\tpte_t pteval;\n"
+ "+\tspinlock_t *ptl;\n"
+ "+\tint ret = 0;\n"
+ "+\tbool present;\n"
+ "+\n"
+ "+\tVM_BUG_ON(!PageLocked(page));\n"
+ "+\n"
+ "+\tvrange_lock(vroot);\n"
+ "+\tpte = vpage_check_address(page, mm, addr, &ptl);\n"
+ "+\tif (!pte)\n"
+ "+\t\tgoto out;\n"
+ "+\n"
+ "+\tif (vma->vm_flags & VM_LOCKED) {\n"
+ "+\t\tpte_unmap_unlock(pte, ptl);\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\n"
+ "+\tpresent = pte_present(*pte);\n"
+ "+\tflush_cache_page(vma, address, page_to_pfn(page));\n"
+ "+\tpteval = ptep_clear_flush(vma, addr, pte);\n"
+ "+\n"
+ "+\tupdate_hiwater_rss(mm);\n"
+ "+\tif (present) {\n"
+ "+\t\tif (PageAnon(page))\n"
+ "+\t\t\tdec_mm_counter(mm, MM_ANONPAGES);\n"
+ "+\t\telse\n"
+ "+\t\t\tdec_mm_counter(mm, MM_FILEPAGES);\n"
+ "+\t\tpage_remove_rmap(page);\n"
+ "+\t\tpage_cache_release(page);\n"
+ "+\t} else {\n"
+ "+\t\tswp_entry_t entry = pte_to_swp_entry(pteval);\n"
+ "+\t\tdec_mm_counter(mm, MM_SWAPENTS);\n"
+ "+\t\tif (unlikely(!__free_swap_and_cache(entry)))\n"
+ "+\t\t\tBUG_ON(1);\n"
+ "+\t}\n"
+ "+\n"
+ "+\tpte_unmap_unlock(pte, ptl);\n"
+ "+\tmmu_notifier_invalidate_page(mm, addr);\n"
+ "+\tret = 1;\n"
+ "+\n"
+ "+\tif (!PageAnon(page)) /* switch to file offset) */\n"
+ "+\t\taddr = vma->vm_pgoff + addr - vma->vm_start;\n"
+ "+\n"
+ "+\t__vrange_purge(vroot, addr, addr + PAGE_SIZE - 1);\n"
+ "+\n"
+ "+out:\n"
+ "+\tvrange_unlock(vroot);\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+static int try_to_discard_anon_vpage(struct page *page)\n"
+ "+{\n"
+ "+\tstruct anon_vma *anon_vma;\n"
+ "+\tstruct anon_vma_chain *avc;\n"
+ "+\tpgoff_t pgoff;\n"
+ "+\tstruct vm_area_struct *vma;\n"
+ "+\tstruct mm_struct *mm;\n"
+ "+\tstruct vrange_root *vroot;\n"
+ "+\n"
+ "+\tunsigned long address;\n"
+ "+\tbool ret = 0;\n"
+ "+\n"
+ "+\tanon_vma = page_lock_anon_vma_read(page);\n"
+ "+\tif (!anon_vma)\n"
+ "+\t\treturn ret;\n"
+ "+\n"
+ "+\tpgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);\n"
+ "+\tanon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {\n"
+ "+\t\tpte_t *pte;\n"
+ "+\t\tspinlock_t *ptl;\n"
+ "+\n"
+ "+\t\tvma = avc->vma;\n"
+ "+\t\tmm = vma->vm_mm;\n"
+ "+\t\tvroot = &mm->vroot;\n"
+ "+\t\taddress = vma_address(page, vma);\n"
+ "+\n"
+ "+\t\tvrange_lock(vroot);\n"
+ "+\t\t/*\n"
+ "+\t\t * We can't use page_check_address because it doesn't check\n"
+ "+\t\t * swap entry of the page table. We need the check because\n"
+ "+\t\t * we have to make sure atomicity of shared vrange.\n"
+ "+\t\t * It means all vranges which are shared a page should be\n"
+ "+\t\t * purged if a page in a process is purged.\n"
+ "+\t\t */\n"
+ "+\t\tpte = vpage_check_address(page, mm, address, &ptl);\n"
+ "+\t\tif (!pte) {\n"
+ "+\t\t\tvrange_unlock(vroot);\n"
+ "+\t\t\tcontinue;\n"
+ "+\t\t}\n"
+ "+\n"
+ "+\t\tif (vma->vm_flags & VM_LOCKED) {\n"
+ "+\t\t\tpte_unmap_unlock(pte, ptl);\n"
+ "+\t\t\tvrange_unlock(vroot);\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\t}\n"
+ "+\n"
+ "+\t\tpte_unmap_unlock(pte, ptl);\n"
+ "+\t\tif (!__vrange_address(vroot, address,\n"
+ "+\t\t\t\t\taddress + PAGE_SIZE - 1)) {\n"
+ "+\t\t\tvrange_unlock(vroot);\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\t}\n"
+ "+\n"
+ "+\t\tvrange_unlock(vroot);\n"
+ "+\t}\n"
+ "+\n"
+ "+\tanon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {\n"
+ "+\t\tvma = avc->vma;\n"
+ "+\t\tmm = vma->vm_mm;\n"
+ "+\t\tvroot = &mm->vroot;\n"
+ "+\t\taddress = vma_address(page, vma);\n"
+ "+\t\tif (!try_to_discard_one(vroot, page, vma, address))\n"
+ "+\t\t\tgoto out;\n"
+ "+\t}\n"
+ "+\n"
+ "+\tret = 1;\n"
+ "+out:\n"
+ "+\tpage_unlock_anon_vma_read(anon_vma);\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+\n"
+ "+\n"
+ "+static int try_to_discard_file_vpage(struct page *page)\n"
+ "+{\n"
+ "+\tstruct address_space *mapping = page->mapping;\n"
+ "+\tpgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);\n"
+ "+\tstruct vm_area_struct *vma;\n"
+ "+\tbool ret = 0;\n"
+ "+\n"
+ "+\tmutex_lock(&mapping->i_mmap_mutex);\n"
+ "+\tvma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {\n"
+ "+\t\tunsigned long address = vma_address(page, vma);\n"
+ "+\t\tstruct mm_struct *mm = vma->vm_mm;\n"
+ "+\t\tstruct vrange_root *vroot = &mapping->vroot;\n"
+ "+\t\tpte_t *pte;\n"
+ "+\t\tspinlock_t *ptl;\n"
+ "+\t\tlong vstart_idx;\n"
+ "+\n"
+ "+\n"
+ "+\t\tvstart_idx = vma->vm_pgoff + address - vma->vm_start;\n"
+ "+\n"
+ "+\t\tvrange_lock(vroot);\n"
+ "+\t\t/*\n"
+ "+\t\t * We can't use page_check_address because it doesn't check\n"
+ "+\t\t * swap entry of the page table. We need the check because\n"
+ "+\t\t * we have to make sure atomicity of shared vrange.\n"
+ "+\t\t * It means all vranges which are shared a page should be\n"
+ "+\t\t * purged if a page in a process is purged.\n"
+ "+\t\t */\n"
+ "+\t\tpte = vpage_check_address(page, mm, address, &ptl);\n"
+ "+\t\tif (!pte) {\n"
+ "+\t\t\tvrange_unlock(vroot);\n"
+ "+\t\t\tcontinue;\n"
+ "+\t\t}\n"
+ "+\n"
+ "+\t\tif (vma->vm_flags & VM_LOCKED) {\n"
+ "+\t\t\tpte_unmap_unlock(pte, ptl);\n"
+ "+\t\t\tvrange_unlock(vroot);\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\t}\n"
+ "+\n"
+ "+\t\tpte_unmap_unlock(pte, ptl);\n"
+ "+\t\tif (!__vrange_address(vroot, vstart_idx,\n"
+ "+\t\t\t\t\tvstart_idx + PAGE_SIZE - 1)) {\n"
+ "+\t\t\tvrange_unlock(vroot);\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\t}\n"
+ "+\n"
+ "+\t\tvrange_unlock(vroot);\n"
+ "+\t}\n"
+ "+\n"
+ "+\tvma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {\n"
+ "+\t\tunsigned long address = vma_address(page, vma);\n"
+ "+\t\tstruct vrange_root *vroot = &mapping->vroot;\n"
+ "+\n"
+ "+\t\tif (!try_to_discard_one(vroot, page, vma, address))\n"
+ "+\t\t\tgoto out;\n"
+ "+\t}\n"
+ "+\n"
+ "+\tret = 1;\n"
+ "+out:\n"
+ "+\tmutex_unlock(&mapping->i_mmap_mutex);\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+static int try_to_discard_vpage(struct page *page)\n"
+ "+{\n"
+ "+\tif (PageAnon(page))\n"
+ "+\t\treturn try_to_discard_anon_vpage(page);\n"
+ "+\treturn try_to_discard_file_vpage(page);\n"
+ "+}\n"
+ "+\n"
+ "+int discard_vpage(struct page *page)\n"
+ "+{\n"
+ "+\tVM_BUG_ON(!PageLocked(page));\n"
+ "+\tVM_BUG_ON(PageLRU(page));\n"
+ "+\n"
+ "+\tif (try_to_discard_vpage(page)) {\n"
+ "+\t\tif (PageSwapCache(page))\n"
+ "+\t\t\ttry_to_free_swap(page);\n"
+ "+\n"
+ "+\t\tif (page_freeze_refs(page, 1)) {\n"
+ "+\t\t\tunlock_page(page);\n"
+ "+\t\t\treturn 1;\n"
+ "+\t\t}\n"
+ "+\t}\n"
+ "+\n"
+ "+\treturn 0;\n"
+ "+}\n"
+ "+\n"
+ "-- \n"
+ "1.7.9.5\n"
+ "\n"
+ "-- \n"
+ "Kind regards,\n"
+ Minchan Kim
 
-17754790192883c036c39ccfb16af6efedb15b58fad1e71a1c180467d701b10f
+a8eb9aab90e06e61be4847eef4f5f62d9d8d89b1e85ca067de2dbc7a91b71cd8

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.