diff for duplicates of <20130619043419.GA10961@bbox> diff --git a/a/1.txt b/N1/1.txt index c83a20e..d89c1e7 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -57,3 +57,729 @@ On Tue, Jun 11, 2013 at 09:22:50PM -0700, John Stultz wrote: This patch has some bugs so below patch should fix them and pass my simple cases. + +>From 13c458388a4784a785d93f285b0c54156c3b04aa Mon Sep 17 00:00:00 2001 +From: Minchan Kim <minchan@kernel.org> +Date: Tue, 11 Jun 2013 21:22:50 -0700 +Subject: [PATCH 1/2] vrange: Add method to purge volatile ranges + +This patch adds discarding function to purge volatile ranges under +memory pressure. Logic is as following: + +1. Memory pressure happens +2. VM start to reclaim pages +3. Check the page is in volatile range. +4. If so, zap the page from the process's page table. + (By semantic vrange(2), we should mark it with another one to + make page fault when you try to access the address. It will + be introduced later patch) +5. If page is unmapped from all processes, discard it instead of swapping. + +This patch does not address the case where there is no swap, which +keeps anonymous pages from being aged off the LRUs. Minchan has +additional patches that add support for purging anonymous pages + +XXX: First pass at file purging. Seems to work, but is likely broken +and needs close review. + +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Android Kernel Team <kernel-team@android.com> +Cc: Robert Love <rlove@google.com> +Cc: Mel Gorman <mel@csn.ul.ie> +Cc: Hugh Dickins <hughd@google.com> +Cc: Dave Hansen <dave@linux.vnet.ibm.com> +Cc: Rik van Riel <riel@redhat.com> +Cc: Dmitry Adamushko <dmitry.adamushko@gmail.com> +Cc: Dave Chinner <david@fromorbit.com> +Cc: Neil Brown <neilb@suse.de> +Cc: Andrea Righi <andrea@betterlinux.com> +Cc: Andrea Arcangeli <aarcange@redhat.com> +Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> +Cc: Mike Hommey <mh@glandium.org> +Cc: Taras Glek <tglek@mozilla.com> +Cc: Dhaval Giani <dgiani@mozilla.com> +Cc: Jan Kara <jack@suse.cz> +Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> +Cc: Michel Lespinasse <walken@google.com> +Cc: Minchan Kim <minchan@kernel.org> +Cc: linux-mm@kvack.org <linux-mm@kvack.org> +Signed-off-by: Minchan Kim <minchan@kernel.org> +[jstultz: Reworked to add purging of file pages, commit log tweaks] +Signed-off-by: John Stultz <john.stultz@linaro.org> +--- + include/linux/rmap.h | 12 +- + include/linux/swap.h | 1 + + include/linux/vrange.h | 7 + + mm/ksm.c | 2 +- + mm/rmap.c | 30 +++-- + mm/swapfile.c | 36 ++++++ + mm/vmscan.c | 16 ++- + mm/vrange.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++++ + 8 files changed, 420 insertions(+), 16 deletions(-) + +diff --git a/include/linux/rmap.h b/include/linux/rmap.h +index 6dacb93..6432dfb 100644 +--- a/include/linux/rmap.h ++++ b/include/linux/rmap.h +@@ -83,6 +83,8 @@ enum ttu_flags { + }; + + #ifdef CONFIG_MMU ++unsigned long vma_address(struct page *page, struct vm_area_struct *vma); ++ + static inline void get_anon_vma(struct anon_vma *anon_vma) + { + atomic_inc(&anon_vma->refcount); +@@ -182,9 +184,11 @@ static inline void page_dup_rmap(struct page *page) + * Called from mm/vmscan.c to handle paging out + */ + int page_referenced(struct page *, int is_locked, +- struct mem_cgroup *memcg, unsigned long *vm_flags); ++ struct mem_cgroup *memcg, unsigned long *vm_flags, ++ int *is_vrange); + int page_referenced_one(struct page *, struct vm_area_struct *, +- unsigned long address, unsigned int *mapcount, unsigned long *vm_flags); ++ unsigned long address, unsigned int *mapcount, unsigned long *vm_flags, ++ int *is_vrange); + + #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) + +@@ -249,9 +253,11 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *, + + static inline int page_referenced(struct page *page, int is_locked, + struct mem_cgroup *memcg, +- unsigned long *vm_flags) ++ unsigned long *vm_flags, ++ int *is_vrange) + { + *vm_flags = 0; ++ *is_vrange = 0; + return 0; + } + +diff --git a/include/linux/swap.h b/include/linux/swap.h +index 1701ce4..5907936 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -383,6 +383,7 @@ extern int swap_duplicate(swp_entry_t); + extern int swapcache_prepare(swp_entry_t); + extern void swap_free(swp_entry_t); + extern void swapcache_free(swp_entry_t, struct page *page); ++extern int __free_swap_and_cache(swp_entry_t); + extern int free_swap_and_cache(swp_entry_t); + extern int swap_type_of(dev_t, sector_t, struct block_device **); + extern unsigned int count_swap_pages(int, int); +diff --git a/include/linux/vrange.h b/include/linux/vrange.h +index a97ac25..cbb609a 100644 +--- a/include/linux/vrange.h ++++ b/include/linux/vrange.h +@@ -37,6 +37,10 @@ extern int vrange_clear(struct vrange_root *vroot, + extern void vrange_root_cleanup(struct vrange_root *vroot); + extern int vrange_fork(struct mm_struct *new, + struct mm_struct *old); ++int discard_vpage(struct page *page); ++bool vrange_address(struct mm_struct *mm, unsigned long start, ++ unsigned long end); ++ + #else + + static inline void vrange_init(void) {}; +@@ -47,5 +51,8 @@ static inline int vrange_fork(struct mm_struct *new, struct mm_struct *old) + return 0; + } + ++static inline bool vrange_address(struct mm_struct *mm, unsigned long start, ++ unsigned long end) { return false; }; ++static inline int discard_vpage(struct page *page) { return 0 }; + #endif + #endif /* _LINIUX_VRANGE_H */ +diff --git a/mm/ksm.c b/mm/ksm.c +index b6afe0c..debc20c 100644 +--- a/mm/ksm.c ++++ b/mm/ksm.c +@@ -1932,7 +1932,7 @@ again: + continue; + + referenced += page_referenced_one(page, vma, +- rmap_item->address, &mapcount, vm_flags); ++ rmap_item->address, &mapcount, vm_flags, NULL); + if (!search_new_forks || !mapcount) + break; + } +diff --git a/mm/rmap.c b/mm/rmap.c +index 6280da8..5522522 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -57,6 +57,8 @@ + #include <linux/migrate.h> + #include <linux/hugetlb.h> + #include <linux/backing-dev.h> ++#include <linux/vrange.h> ++#include <linux/rmap.h> + + #include <asm/tlbflush.h> + +@@ -523,8 +525,7 @@ __vma_address(struct page *page, struct vm_area_struct *vma) + return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + } + +-inline unsigned long +-vma_address(struct page *page, struct vm_area_struct *vma) ++unsigned long vma_address(struct page *page, struct vm_area_struct *vma) + { + unsigned long address = __vma_address(page, vma); + +@@ -662,7 +663,7 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) + */ + int page_referenced_one(struct page *page, struct vm_area_struct *vma, + unsigned long address, unsigned int *mapcount, +- unsigned long *vm_flags) ++ unsigned long *vm_flags, int *is_vrange) + { + struct mm_struct *mm = vma->vm_mm; + int referenced = 0; +@@ -724,6 +725,9 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, + referenced++; + } + pte_unmap_unlock(pte, ptl); ++ if (is_vrange && ++ vrange_address(mm, address, address + PAGE_SIZE - 1)) ++ *is_vrange = 1; + } + + (*mapcount)--; +@@ -736,7 +740,8 @@ out: + + static int page_referenced_anon(struct page *page, + struct mem_cgroup *memcg, +- unsigned long *vm_flags) ++ unsigned long *vm_flags, ++ int *is_vrange) + { + unsigned int mapcount; + struct anon_vma *anon_vma; +@@ -761,7 +766,7 @@ static int page_referenced_anon(struct page *page, + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) + continue; + referenced += page_referenced_one(page, vma, address, +- &mapcount, vm_flags); ++ &mapcount, vm_flags, is_vrange); + if (!mapcount) + break; + } +@@ -785,7 +790,9 @@ static int page_referenced_anon(struct page *page, + */ + static int page_referenced_file(struct page *page, + struct mem_cgroup *memcg, +- unsigned long *vm_flags) ++ unsigned long *vm_flags, ++ int *is_vrange) ++ + { + unsigned int mapcount; + struct address_space *mapping = page->mapping; +@@ -826,7 +833,8 @@ static int page_referenced_file(struct page *page, + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) + continue; + referenced += page_referenced_one(page, vma, address, +- &mapcount, vm_flags); ++ &mapcount, vm_flags, ++ is_vrange); + if (!mapcount) + break; + } +@@ -841,6 +849,7 @@ static int page_referenced_file(struct page *page, + * @is_locked: caller holds lock on the page + * @memcg: target memory cgroup + * @vm_flags: collect encountered vma->vm_flags who actually referenced the page ++ * @is_vrange: the page in vrange of some process + * + * Quick test_and_clear_referenced for all mappings to a page, + * returns the number of ptes which referenced the page. +@@ -848,7 +857,8 @@ static int page_referenced_file(struct page *page, + int page_referenced(struct page *page, + int is_locked, + struct mem_cgroup *memcg, +- unsigned long *vm_flags) ++ unsigned long *vm_flags, ++ int *is_vrange) + { + int referenced = 0; + int we_locked = 0; +@@ -867,10 +877,10 @@ int page_referenced(struct page *page, + vm_flags); + else if (PageAnon(page)) + referenced += page_referenced_anon(page, memcg, +- vm_flags); ++ vm_flags, is_vrange); + else if (page->mapping) + referenced += page_referenced_file(page, memcg, +- vm_flags); ++ vm_flags, is_vrange); + if (we_locked) + unlock_page(page); + +diff --git a/mm/swapfile.c b/mm/swapfile.c +index 6c340d9..1f6c80e 100644 +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -734,6 +734,42 @@ int try_to_free_swap(struct page *page) + } + + /* ++ * It's almost same with free_swap_and_cache except page is already ++ * locked. ++ */ ++int __free_swap_and_cache(swp_entry_t entry) ++{ ++ struct swap_info_struct *p; ++ struct page *page = NULL; ++ ++ if (non_swap_entry(entry)) ++ return 1; ++ ++ p = swap_info_get(entry); ++ if (p) { ++ if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { ++ page = find_get_page(swap_address_space(entry), ++ entry.val); ++ } ++ spin_unlock(&p->lock); ++ } ++ ++ if (page) { ++ /* ++ * Not mapped elsewhere, or swap space full? Free it! ++ * Also recheck PageSwapCache now page is locked (above). ++ */ ++ if (PageSwapCache(page) && !PageWriteback(page) && ++ (!page_mapped(page) || vm_swap_full())) { ++ delete_from_swap_cache(page); ++ SetPageDirty(page); ++ } ++ page_cache_release(page); ++ } ++ return p != NULL; ++} ++ ++/* + * Free the swap entry like above, but also try to + * free the page cache entry if it is the last user. + */ +diff --git a/mm/vmscan.c b/mm/vmscan.c +index fa6a853..c75e0ac 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -43,6 +43,7 @@ + #include <linux/sysctl.h> + #include <linux/oom.h> + #include <linux/prefetch.h> ++#include <linux/vrange.h> + + #include <asm/tlbflush.h> + #include <asm/div64.h> +@@ -611,6 +612,7 @@ enum page_references { + PAGEREF_RECLAIM, + PAGEREF_RECLAIM_CLEAN, + PAGEREF_KEEP, ++ PAGEREF_DISCARD, + PAGEREF_ACTIVATE, + }; + +@@ -619,9 +621,10 @@ static enum page_references page_check_references(struct page *page, + { + int referenced_ptes, referenced_page; + unsigned long vm_flags; ++ int is_vrange = 0; + + referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, +- &vm_flags); ++ &vm_flags, &is_vrange); + referenced_page = TestClearPageReferenced(page); + + /* +@@ -631,6 +634,12 @@ static enum page_references page_check_references(struct page *page, + if (vm_flags & VM_LOCKED) + return PAGEREF_RECLAIM; + ++ /* ++ * Bail out if the page is in vrange and try to discard. ++ */ ++ if (is_vrange) ++ return PAGEREF_DISCARD; ++ + if (referenced_ptes) { + if (PageSwapBacked(page)) + return PAGEREF_ACTIVATE; +@@ -769,6 +778,9 @@ static unsigned long shrink_page_list(struct list_head *page_list, + goto activate_locked; + case PAGEREF_KEEP: + goto keep_locked; ++ case PAGEREF_DISCARD: ++ if (discard_vpage(page)) ++ goto free_it; + case PAGEREF_RECLAIM: + case PAGEREF_RECLAIM_CLEAN: + ; /* try to reclaim the page below */ +@@ -1497,7 +1509,7 @@ static void shrink_active_list(unsigned long nr_to_scan, + } + + if (page_referenced(page, 0, sc->target_mem_cgroup, +- &vm_flags)) { ++ &vm_flags, NULL)) { + nr_rotated += hpage_nr_pages(page); + /* + * Identify referenced, file-backed active pages and +diff --git a/mm/vrange.c b/mm/vrange.c +index 5278939..d57cb38 100644 +--- a/mm/vrange.c ++++ b/mm/vrange.c +@@ -6,6 +6,13 @@ + #include <linux/slab.h> + #include <linux/mman.h> + #include <linux/syscalls.h> ++#include <linux/pagemap.h> ++#include <linux/rmap.h> ++#include <linux/hugetlb.h> ++#include "internal.h" ++#include <linux/swap.h> ++#include <linux/swapops.h> ++#include <linux/mmu_notifier.h> + + static struct kmem_cache *vrange_cachep; + +@@ -364,3 +371,328 @@ SYSCALL_DEFINE4(vrange, unsigned long, start, + out: + return ret; + } ++ ++ ++static bool __vrange_address(struct vrange_root *vroot, ++ unsigned long start, unsigned long end) ++{ ++ struct interval_tree_node *node; ++ ++ node = interval_tree_iter_first(&vroot->v_rb, start, end); ++ return node ? true : false; ++} ++ ++bool vrange_address(struct mm_struct *mm, ++ unsigned long start, unsigned long end) ++{ ++ struct vrange_root *vroot; ++ unsigned long vstart_idx, vend_idx; ++ struct vm_area_struct *vma; ++ bool ret; ++ ++ vma = find_vma(mm, start); ++ if (vma->vm_file && (vma->vm_flags & VM_SHARED)) { ++ vroot = &vma->vm_file->f_mapping->vroot; ++ vstart_idx = vma->vm_pgoff + start - vma->vm_start; ++ vend_idx = vma->vm_pgoff + end - vma->vm_start; ++ } else { ++ vroot = &mm->vroot; ++ vstart_idx = start; ++ vend_idx = end; ++ } ++ ++ vrange_lock(vroot); ++ ret = __vrange_address(vroot, vstart_idx, vend_idx); ++ vrange_unlock(vroot); ++ return ret; ++} ++ ++static pte_t *__vpage_check_address(struct page *page, ++ struct mm_struct *mm, unsigned long address, spinlock_t **ptlp) ++{ ++ pmd_t *pmd; ++ pte_t *pte; ++ spinlock_t *ptl; ++ bool present; ++ ++ /* TODO : look into tlbfs */ ++ if (unlikely(PageHuge(page))) ++ return NULL; ++ ++ pmd = mm_find_pmd(mm, address); ++ if (!pmd) ++ return NULL; ++ /* ++ * TODO : Support THP ++ */ ++ if (pmd_trans_huge(*pmd)) ++ return NULL; ++ ++ pte = pte_offset_map_lock(mm, pmd, address, &ptl); ++ if (pte_none(*pte)) ++ goto out; ++ ++ present = pte_present(*pte); ++ if (present && page_to_pfn(page) != pte_pfn(*pte)) ++ goto out; ++ else if (present) { ++ *ptlp = ptl; ++ return pte; ++ } else { ++ swp_entry_t entry = { .val = page_private(page) }; ++ ++ VM_BUG_ON(non_swap_entry(entry)); ++ if (entry.val != pte_to_swp_entry(*pte).val) ++ goto out; ++ *ptlp = ptl; ++ return pte; ++ } ++out: ++ pte_unmap_unlock(pte, ptl); ++ return NULL; ++} ++ ++/* ++ * This functions checks @page is matched with pte's encoded one ++ * which could be a page or swap slot. ++ */ ++static inline pte_t *vpage_check_address(struct page *page, ++ struct mm_struct *mm, unsigned long address, ++ spinlock_t **ptlp) ++{ ++ pte_t *ptep; ++ __cond_lock(*ptlp, ptep = __vpage_check_address(page, ++ mm, address, ptlp)); ++ return ptep; ++} ++ ++static void __vrange_purge(struct vrange_root *vroot, ++ unsigned long start, unsigned long end) ++{ ++ struct vrange *range; ++ struct interval_tree_node *node; ++ ++ node = interval_tree_iter_first(&vroot->v_rb, start, end); ++ while (node) { ++ range = container_of(node, struct vrange, node); ++ range->purged = true; ++ node = interval_tree_iter_next(node, start, end); ++ } ++} ++ ++int try_to_discard_one(struct vrange_root *vroot, struct page *page, ++ struct vm_area_struct *vma, unsigned long addr) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ pte_t *pte; ++ pte_t pteval; ++ spinlock_t *ptl; ++ int ret = 0; ++ bool present; ++ ++ VM_BUG_ON(!PageLocked(page)); ++ ++ vrange_lock(vroot); ++ pte = vpage_check_address(page, mm, addr, &ptl); ++ if (!pte) ++ goto out; ++ ++ if (vma->vm_flags & VM_LOCKED) { ++ pte_unmap_unlock(pte, ptl); ++ goto out; ++ } ++ ++ present = pte_present(*pte); ++ flush_cache_page(vma, address, page_to_pfn(page)); ++ pteval = ptep_clear_flush(vma, addr, pte); ++ ++ update_hiwater_rss(mm); ++ if (present) { ++ if (PageAnon(page)) ++ dec_mm_counter(mm, MM_ANONPAGES); ++ else ++ dec_mm_counter(mm, MM_FILEPAGES); ++ page_remove_rmap(page); ++ page_cache_release(page); ++ } else { ++ swp_entry_t entry = pte_to_swp_entry(pteval); ++ dec_mm_counter(mm, MM_SWAPENTS); ++ if (unlikely(!__free_swap_and_cache(entry))) ++ BUG_ON(1); ++ } ++ ++ pte_unmap_unlock(pte, ptl); ++ mmu_notifier_invalidate_page(mm, addr); ++ ret = 1; ++ ++ if (!PageAnon(page)) /* switch to file offset) */ ++ addr = vma->vm_pgoff + addr - vma->vm_start; ++ ++ __vrange_purge(vroot, addr, addr + PAGE_SIZE - 1); ++ ++out: ++ vrange_unlock(vroot); ++ return ret; ++} ++ ++static int try_to_discard_anon_vpage(struct page *page) ++{ ++ struct anon_vma *anon_vma; ++ struct anon_vma_chain *avc; ++ pgoff_t pgoff; ++ struct vm_area_struct *vma; ++ struct mm_struct *mm; ++ struct vrange_root *vroot; ++ ++ unsigned long address; ++ bool ret = 0; ++ ++ anon_vma = page_lock_anon_vma_read(page); ++ if (!anon_vma) ++ return ret; ++ ++ pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); ++ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { ++ pte_t *pte; ++ spinlock_t *ptl; ++ ++ vma = avc->vma; ++ mm = vma->vm_mm; ++ vroot = &mm->vroot; ++ address = vma_address(page, vma); ++ ++ vrange_lock(vroot); ++ /* ++ * We can't use page_check_address because it doesn't check ++ * swap entry of the page table. We need the check because ++ * we have to make sure atomicity of shared vrange. ++ * It means all vranges which are shared a page should be ++ * purged if a page in a process is purged. ++ */ ++ pte = vpage_check_address(page, mm, address, &ptl); ++ if (!pte) { ++ vrange_unlock(vroot); ++ continue; ++ } ++ ++ if (vma->vm_flags & VM_LOCKED) { ++ pte_unmap_unlock(pte, ptl); ++ vrange_unlock(vroot); ++ goto out; ++ } ++ ++ pte_unmap_unlock(pte, ptl); ++ if (!__vrange_address(vroot, address, ++ address + PAGE_SIZE - 1)) { ++ vrange_unlock(vroot); ++ goto out; ++ } ++ ++ vrange_unlock(vroot); ++ } ++ ++ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { ++ vma = avc->vma; ++ mm = vma->vm_mm; ++ vroot = &mm->vroot; ++ address = vma_address(page, vma); ++ if (!try_to_discard_one(vroot, page, vma, address)) ++ goto out; ++ } ++ ++ ret = 1; ++out: ++ page_unlock_anon_vma_read(anon_vma); ++ return ret; ++} ++ ++ ++ ++static int try_to_discard_file_vpage(struct page *page) ++{ ++ struct address_space *mapping = page->mapping; ++ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); ++ struct vm_area_struct *vma; ++ bool ret = 0; ++ ++ mutex_lock(&mapping->i_mmap_mutex); ++ vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { ++ unsigned long address = vma_address(page, vma); ++ struct mm_struct *mm = vma->vm_mm; ++ struct vrange_root *vroot = &mapping->vroot; ++ pte_t *pte; ++ spinlock_t *ptl; ++ long vstart_idx; ++ ++ ++ vstart_idx = vma->vm_pgoff + address - vma->vm_start; ++ ++ vrange_lock(vroot); ++ /* ++ * We can't use page_check_address because it doesn't check ++ * swap entry of the page table. We need the check because ++ * we have to make sure atomicity of shared vrange. ++ * It means all vranges which are shared a page should be ++ * purged if a page in a process is purged. ++ */ ++ pte = vpage_check_address(page, mm, address, &ptl); ++ if (!pte) { ++ vrange_unlock(vroot); ++ continue; ++ } ++ ++ if (vma->vm_flags & VM_LOCKED) { ++ pte_unmap_unlock(pte, ptl); ++ vrange_unlock(vroot); ++ goto out; ++ } ++ ++ pte_unmap_unlock(pte, ptl); ++ if (!__vrange_address(vroot, vstart_idx, ++ vstart_idx + PAGE_SIZE - 1)) { ++ vrange_unlock(vroot); ++ goto out; ++ } ++ ++ vrange_unlock(vroot); ++ } ++ ++ vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { ++ unsigned long address = vma_address(page, vma); ++ struct vrange_root *vroot = &mapping->vroot; ++ ++ if (!try_to_discard_one(vroot, page, vma, address)) ++ goto out; ++ } ++ ++ ret = 1; ++out: ++ mutex_unlock(&mapping->i_mmap_mutex); ++ return ret; ++} ++ ++static int try_to_discard_vpage(struct page *page) ++{ ++ if (PageAnon(page)) ++ return try_to_discard_anon_vpage(page); ++ return try_to_discard_file_vpage(page); ++} ++ ++int discard_vpage(struct page *page) ++{ ++ VM_BUG_ON(!PageLocked(page)); ++ VM_BUG_ON(PageLRU(page)); ++ ++ if (try_to_discard_vpage(page)) { ++ if (PageSwapCache(page)) ++ try_to_free_swap(page); ++ ++ if (page_freeze_refs(page, 1)) { ++ unlock_page(page); ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ +-- +1.7.9.5 + +-- +Kind regards, +Minchan Kim diff --git a/a/content_digest b/N1/content_digest index 24bc3c2..a3fed9e 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -85,6 +85,732 @@ "> 8 files changed, 420 insertions(+), 16 deletions(-)\n" "\n" "This patch has some bugs so below patch should fix them and pass my\n" - simple cases. + "simple cases.\n" + "\n" + ">From 13c458388a4784a785d93f285b0c54156c3b04aa Mon Sep 17 00:00:00 2001\n" + "From: Minchan Kim <minchan@kernel.org>\n" + "Date: Tue, 11 Jun 2013 21:22:50 -0700\n" + "Subject: [PATCH 1/2] vrange: Add method to purge volatile ranges\n" + "\n" + "This patch adds discarding function to purge volatile ranges under\n" + "memory pressure. Logic is as following:\n" + "\n" + "1. Memory pressure happens\n" + "2. VM start to reclaim pages\n" + "3. Check the page is in volatile range.\n" + "4. If so, zap the page from the process's page table.\n" + " (By semantic vrange(2), we should mark it with another one to\n" + " make page fault when you try to access the address. It will\n" + " be introduced later patch)\n" + "5. If page is unmapped from all processes, discard it instead of swapping.\n" + "\n" + "This patch does not address the case where there is no swap, which\n" + "keeps anonymous pages from being aged off the LRUs. Minchan has\n" + "additional patches that add support for purging anonymous pages\n" + "\n" + "XXX: First pass at file purging. Seems to work, but is likely broken\n" + "and needs close review.\n" + "\n" + "Cc: Andrew Morton <akpm@linux-foundation.org>\n" + "Cc: Android Kernel Team <kernel-team@android.com>\n" + "Cc: Robert Love <rlove@google.com>\n" + "Cc: Mel Gorman <mel@csn.ul.ie>\n" + "Cc: Hugh Dickins <hughd@google.com>\n" + "Cc: Dave Hansen <dave@linux.vnet.ibm.com>\n" + "Cc: Rik van Riel <riel@redhat.com>\n" + "Cc: Dmitry Adamushko <dmitry.adamushko@gmail.com>\n" + "Cc: Dave Chinner <david@fromorbit.com>\n" + "Cc: Neil Brown <neilb@suse.de>\n" + "Cc: Andrea Righi <andrea@betterlinux.com>\n" + "Cc: Andrea Arcangeli <aarcange@redhat.com>\n" + "Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>\n" + "Cc: Mike Hommey <mh@glandium.org>\n" + "Cc: Taras Glek <tglek@mozilla.com>\n" + "Cc: Dhaval Giani <dgiani@mozilla.com>\n" + "Cc: Jan Kara <jack@suse.cz>\n" + "Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>\n" + "Cc: Michel Lespinasse <walken@google.com>\n" + "Cc: Minchan Kim <minchan@kernel.org>\n" + "Cc: linux-mm@kvack.org <linux-mm@kvack.org>\n" + "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" + "[jstultz: Reworked to add purging of file pages, commit log tweaks]\n" + "Signed-off-by: John Stultz <john.stultz@linaro.org>\n" + "---\n" + " include/linux/rmap.h | 12 +-\n" + " include/linux/swap.h | 1 +\n" + " include/linux/vrange.h | 7 +\n" + " mm/ksm.c | 2 +-\n" + " mm/rmap.c | 30 +++--\n" + " mm/swapfile.c | 36 ++++++\n" + " mm/vmscan.c | 16 ++-\n" + " mm/vrange.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++++\n" + " 8 files changed, 420 insertions(+), 16 deletions(-)\n" + "\n" + "diff --git a/include/linux/rmap.h b/include/linux/rmap.h\n" + "index 6dacb93..6432dfb 100644\n" + "--- a/include/linux/rmap.h\n" + "+++ b/include/linux/rmap.h\n" + "@@ -83,6 +83,8 @@ enum ttu_flags {\n" + " };\n" + " \n" + " #ifdef CONFIG_MMU\n" + "+unsigned long vma_address(struct page *page, struct vm_area_struct *vma);\n" + "+\n" + " static inline void get_anon_vma(struct anon_vma *anon_vma)\n" + " {\n" + " \tatomic_inc(&anon_vma->refcount);\n" + "@@ -182,9 +184,11 @@ static inline void page_dup_rmap(struct page *page)\n" + " * Called from mm/vmscan.c to handle paging out\n" + " */\n" + " int page_referenced(struct page *, int is_locked,\n" + "-\t\t\tstruct mem_cgroup *memcg, unsigned long *vm_flags);\n" + "+\t\t\tstruct mem_cgroup *memcg, unsigned long *vm_flags,\n" + "+\t\t\tint *is_vrange);\n" + " int page_referenced_one(struct page *, struct vm_area_struct *,\n" + "-\tunsigned long address, unsigned int *mapcount, unsigned long *vm_flags);\n" + "+\tunsigned long address, unsigned int *mapcount, unsigned long *vm_flags,\n" + "+\tint *is_vrange);\n" + " \n" + " #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)\n" + " \n" + "@@ -249,9 +253,11 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,\n" + " \n" + " static inline int page_referenced(struct page *page, int is_locked,\n" + " \t\t\t\t struct mem_cgroup *memcg,\n" + "-\t\t\t\t unsigned long *vm_flags)\n" + "+\t\t\t\t unsigned long *vm_flags,\n" + "+\t\t\t\t int *is_vrange)\n" + " {\n" + " \t*vm_flags = 0;\n" + "+\t*is_vrange = 0;\n" + " \treturn 0;\n" + " }\n" + " \n" + "diff --git a/include/linux/swap.h b/include/linux/swap.h\n" + "index 1701ce4..5907936 100644\n" + "--- a/include/linux/swap.h\n" + "+++ b/include/linux/swap.h\n" + "@@ -383,6 +383,7 @@ extern int swap_duplicate(swp_entry_t);\n" + " extern int swapcache_prepare(swp_entry_t);\n" + " extern void swap_free(swp_entry_t);\n" + " extern void swapcache_free(swp_entry_t, struct page *page);\n" + "+extern int __free_swap_and_cache(swp_entry_t);\n" + " extern int free_swap_and_cache(swp_entry_t);\n" + " extern int swap_type_of(dev_t, sector_t, struct block_device **);\n" + " extern unsigned int count_swap_pages(int, int);\n" + "diff --git a/include/linux/vrange.h b/include/linux/vrange.h\n" + "index a97ac25..cbb609a 100644\n" + "--- a/include/linux/vrange.h\n" + "+++ b/include/linux/vrange.h\n" + "@@ -37,6 +37,10 @@ extern int vrange_clear(struct vrange_root *vroot,\n" + " extern void vrange_root_cleanup(struct vrange_root *vroot);\n" + " extern int vrange_fork(struct mm_struct *new,\n" + " \t\t\t\t\tstruct mm_struct *old);\n" + "+int discard_vpage(struct page *page);\n" + "+bool vrange_address(struct mm_struct *mm, unsigned long start,\n" + "+\t\t\tunsigned long end);\n" + "+\n" + " #else\n" + " \n" + " static inline void vrange_init(void) {};\n" + "@@ -47,5 +51,8 @@ static inline int vrange_fork(struct mm_struct *new, struct mm_struct *old)\n" + " \treturn 0;\n" + " }\n" + " \n" + "+static inline bool vrange_address(struct mm_struct *mm, unsigned long start,\n" + "+\t\tunsigned long end) { return false; };\n" + "+static inline int discard_vpage(struct page *page) { return 0 };\n" + " #endif\n" + " #endif /* _LINIUX_VRANGE_H */\n" + "diff --git a/mm/ksm.c b/mm/ksm.c\n" + "index b6afe0c..debc20c 100644\n" + "--- a/mm/ksm.c\n" + "+++ b/mm/ksm.c\n" + "@@ -1932,7 +1932,7 @@ again:\n" + " \t\t\t\tcontinue;\n" + " \n" + " \t\t\treferenced += page_referenced_one(page, vma,\n" + "-\t\t\t\trmap_item->address, &mapcount, vm_flags);\n" + "+\t\t\t\trmap_item->address, &mapcount, vm_flags, NULL);\n" + " \t\t\tif (!search_new_forks || !mapcount)\n" + " \t\t\t\tbreak;\n" + " \t\t}\n" + "diff --git a/mm/rmap.c b/mm/rmap.c\n" + "index 6280da8..5522522 100644\n" + "--- a/mm/rmap.c\n" + "+++ b/mm/rmap.c\n" + "@@ -57,6 +57,8 @@\n" + " #include <linux/migrate.h>\n" + " #include <linux/hugetlb.h>\n" + " #include <linux/backing-dev.h>\n" + "+#include <linux/vrange.h>\n" + "+#include <linux/rmap.h>\n" + " \n" + " #include <asm/tlbflush.h>\n" + " \n" + "@@ -523,8 +525,7 @@ __vma_address(struct page *page, struct vm_area_struct *vma)\n" + " \treturn vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);\n" + " }\n" + " \n" + "-inline unsigned long\n" + "-vma_address(struct page *page, struct vm_area_struct *vma)\n" + "+unsigned long vma_address(struct page *page, struct vm_area_struct *vma)\n" + " {\n" + " \tunsigned long address = __vma_address(page, vma);\n" + " \n" + "@@ -662,7 +663,7 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)\n" + " */\n" + " int page_referenced_one(struct page *page, struct vm_area_struct *vma,\n" + " \t\t\tunsigned long address, unsigned int *mapcount,\n" + "-\t\t\tunsigned long *vm_flags)\n" + "+\t\t\tunsigned long *vm_flags, int *is_vrange)\n" + " {\n" + " \tstruct mm_struct *mm = vma->vm_mm;\n" + " \tint referenced = 0;\n" + "@@ -724,6 +725,9 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,\n" + " \t\t\t\treferenced++;\n" + " \t\t}\n" + " \t\tpte_unmap_unlock(pte, ptl);\n" + "+\t\tif (is_vrange &&\n" + "+\t\t\tvrange_address(mm, address, address + PAGE_SIZE - 1))\n" + "+\t\t\t*is_vrange = 1;\n" + " \t}\n" + " \n" + " \t(*mapcount)--;\n" + "@@ -736,7 +740,8 @@ out:\n" + " \n" + " static int page_referenced_anon(struct page *page,\n" + " \t\t\t\tstruct mem_cgroup *memcg,\n" + "-\t\t\t\tunsigned long *vm_flags)\n" + "+\t\t\t\tunsigned long *vm_flags,\n" + "+\t\t\t\tint *is_vrange)\n" + " {\n" + " \tunsigned int mapcount;\n" + " \tstruct anon_vma *anon_vma;\n" + "@@ -761,7 +766,7 @@ static int page_referenced_anon(struct page *page,\n" + " \t\tif (memcg && !mm_match_cgroup(vma->vm_mm, memcg))\n" + " \t\t\tcontinue;\n" + " \t\treferenced += page_referenced_one(page, vma, address,\n" + "-\t\t\t\t\t\t &mapcount, vm_flags);\n" + "+\t\t\t\t\t&mapcount, vm_flags, is_vrange);\n" + " \t\tif (!mapcount)\n" + " \t\t\tbreak;\n" + " \t}\n" + "@@ -785,7 +790,9 @@ static int page_referenced_anon(struct page *page,\n" + " */\n" + " static int page_referenced_file(struct page *page,\n" + " \t\t\t\tstruct mem_cgroup *memcg,\n" + "-\t\t\t\tunsigned long *vm_flags)\n" + "+\t\t\t\tunsigned long *vm_flags,\n" + "+\t\t\t\tint *is_vrange)\n" + "+\n" + " {\n" + " \tunsigned int mapcount;\n" + " \tstruct address_space *mapping = page->mapping;\n" + "@@ -826,7 +833,8 @@ static int page_referenced_file(struct page *page,\n" + " \t\tif (memcg && !mm_match_cgroup(vma->vm_mm, memcg))\n" + " \t\t\tcontinue;\n" + " \t\treferenced += page_referenced_one(page, vma, address,\n" + "-\t\t\t\t\t\t &mapcount, vm_flags);\n" + "+\t\t\t\t\t\t\t&mapcount, vm_flags,\n" + "+\t\t\t\t\t\t\tis_vrange);\n" + " \t\tif (!mapcount)\n" + " \t\t\tbreak;\n" + " \t}\n" + "@@ -841,6 +849,7 @@ static int page_referenced_file(struct page *page,\n" + " * @is_locked: caller holds lock on the page\n" + " * @memcg: target memory cgroup\n" + " * @vm_flags: collect encountered vma->vm_flags who actually referenced the page\n" + "+ * @is_vrange: the page in vrange of some process\n" + " *\n" + " * Quick test_and_clear_referenced for all mappings to a page,\n" + " * returns the number of ptes which referenced the page.\n" + "@@ -848,7 +857,8 @@ static int page_referenced_file(struct page *page,\n" + " int page_referenced(struct page *page,\n" + " \t\t int is_locked,\n" + " \t\t struct mem_cgroup *memcg,\n" + "-\t\t unsigned long *vm_flags)\n" + "+\t\t unsigned long *vm_flags,\n" + "+\t\t int *is_vrange)\n" + " {\n" + " \tint referenced = 0;\n" + " \tint we_locked = 0;\n" + "@@ -867,10 +877,10 @@ int page_referenced(struct page *page,\n" + " \t\t\t\t\t\t\t\tvm_flags);\n" + " \t\telse if (PageAnon(page))\n" + " \t\t\treferenced += page_referenced_anon(page, memcg,\n" + "-\t\t\t\t\t\t\t\tvm_flags);\n" + "+\t\t\t\t\t\t\tvm_flags, is_vrange);\n" + " \t\telse if (page->mapping)\n" + " \t\t\treferenced += page_referenced_file(page, memcg,\n" + "-\t\t\t\t\t\t\t\tvm_flags);\n" + "+\t\t\t\t\t\t\tvm_flags, is_vrange);\n" + " \t\tif (we_locked)\n" + " \t\t\tunlock_page(page);\n" + " \n" + "diff --git a/mm/swapfile.c b/mm/swapfile.c\n" + "index 6c340d9..1f6c80e 100644\n" + "--- a/mm/swapfile.c\n" + "+++ b/mm/swapfile.c\n" + "@@ -734,6 +734,42 @@ int try_to_free_swap(struct page *page)\n" + " }\n" + " \n" + " /*\n" + "+ * It's almost same with free_swap_and_cache except page is already\n" + "+ * locked.\n" + "+ */\n" + "+int __free_swap_and_cache(swp_entry_t entry)\n" + "+{\n" + "+\tstruct swap_info_struct *p;\n" + "+\tstruct page *page = NULL;\n" + "+\n" + "+\tif (non_swap_entry(entry))\n" + "+\t\treturn 1;\n" + "+\n" + "+\tp = swap_info_get(entry);\n" + "+\tif (p) {\n" + "+\t\tif (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {\n" + "+\t\t\tpage = find_get_page(swap_address_space(entry),\n" + "+\t\t\t\t\t\tentry.val);\n" + "+\t\t}\n" + "+\t\tspin_unlock(&p->lock);\n" + "+\t}\n" + "+\n" + "+\tif (page) {\n" + "+\t\t/*\n" + "+\t\t * Not mapped elsewhere, or swap space full? Free it!\n" + "+\t\t * Also recheck PageSwapCache now page is locked (above).\n" + "+\t\t */\n" + "+\t\tif (PageSwapCache(page) && !PageWriteback(page) &&\n" + "+\t\t\t\t(!page_mapped(page) || vm_swap_full())) {\n" + "+\t\t\tdelete_from_swap_cache(page);\n" + "+\t\t\tSetPageDirty(page);\n" + "+\t\t}\n" + "+\t\tpage_cache_release(page);\n" + "+\t}\n" + "+\treturn p != NULL;\n" + "+}\n" + "+\n" + "+/*\n" + " * Free the swap entry like above, but also try to\n" + " * free the page cache entry if it is the last user.\n" + " */\n" + "diff --git a/mm/vmscan.c b/mm/vmscan.c\n" + "index fa6a853..c75e0ac 100644\n" + "--- a/mm/vmscan.c\n" + "+++ b/mm/vmscan.c\n" + "@@ -43,6 +43,7 @@\n" + " #include <linux/sysctl.h>\n" + " #include <linux/oom.h>\n" + " #include <linux/prefetch.h>\n" + "+#include <linux/vrange.h>\n" + " \n" + " #include <asm/tlbflush.h>\n" + " #include <asm/div64.h>\n" + "@@ -611,6 +612,7 @@ enum page_references {\n" + " \tPAGEREF_RECLAIM,\n" + " \tPAGEREF_RECLAIM_CLEAN,\n" + " \tPAGEREF_KEEP,\n" + "+\tPAGEREF_DISCARD,\n" + " \tPAGEREF_ACTIVATE,\n" + " };\n" + " \n" + "@@ -619,9 +621,10 @@ static enum page_references page_check_references(struct page *page,\n" + " {\n" + " \tint referenced_ptes, referenced_page;\n" + " \tunsigned long vm_flags;\n" + "+\tint is_vrange = 0;\n" + " \n" + " \treferenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,\n" + "-\t\t\t\t\t &vm_flags);\n" + "+\t\t\t\t\t &vm_flags, &is_vrange);\n" + " \treferenced_page = TestClearPageReferenced(page);\n" + " \n" + " \t/*\n" + "@@ -631,6 +634,12 @@ static enum page_references page_check_references(struct page *page,\n" + " \tif (vm_flags & VM_LOCKED)\n" + " \t\treturn PAGEREF_RECLAIM;\n" + " \n" + "+\t/*\n" + "+\t * Bail out if the page is in vrange and try to discard.\n" + "+\t */\n" + "+\tif (is_vrange)\n" + "+\t\treturn PAGEREF_DISCARD;\n" + "+\n" + " \tif (referenced_ptes) {\n" + " \t\tif (PageSwapBacked(page))\n" + " \t\t\treturn PAGEREF_ACTIVATE;\n" + "@@ -769,6 +778,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,\n" + " \t\t\tgoto activate_locked;\n" + " \t\tcase PAGEREF_KEEP:\n" + " \t\t\tgoto keep_locked;\n" + "+\t\tcase PAGEREF_DISCARD:\n" + "+\t\t\tif (discard_vpage(page))\n" + "+\t\t\t\tgoto free_it;\n" + " \t\tcase PAGEREF_RECLAIM:\n" + " \t\tcase PAGEREF_RECLAIM_CLEAN:\n" + " \t\t\t; /* try to reclaim the page below */\n" + "@@ -1497,7 +1509,7 @@ static void shrink_active_list(unsigned long nr_to_scan,\n" + " \t\t}\n" + " \n" + " \t\tif (page_referenced(page, 0, sc->target_mem_cgroup,\n" + "-\t\t\t\t &vm_flags)) {\n" + "+\t\t\t\t &vm_flags, NULL)) {\n" + " \t\t\tnr_rotated += hpage_nr_pages(page);\n" + " \t\t\t/*\n" + " \t\t\t * Identify referenced, file-backed active pages and\n" + "diff --git a/mm/vrange.c b/mm/vrange.c\n" + "index 5278939..d57cb38 100644\n" + "--- a/mm/vrange.c\n" + "+++ b/mm/vrange.c\n" + "@@ -6,6 +6,13 @@\n" + " #include <linux/slab.h>\n" + " #include <linux/mman.h>\n" + " #include <linux/syscalls.h>\n" + "+#include <linux/pagemap.h>\n" + "+#include <linux/rmap.h>\n" + "+#include <linux/hugetlb.h>\n" + "+#include \"internal.h\"\n" + "+#include <linux/swap.h>\n" + "+#include <linux/swapops.h>\n" + "+#include <linux/mmu_notifier.h>\n" + " \n" + " static struct kmem_cache *vrange_cachep;\n" + " \n" + "@@ -364,3 +371,328 @@ SYSCALL_DEFINE4(vrange, unsigned long, start,\n" + " out:\n" + " \treturn ret;\n" + " }\n" + "+\n" + "+\n" + "+static bool __vrange_address(struct vrange_root *vroot,\n" + "+\t\t\tunsigned long start, unsigned long end)\n" + "+{\n" + "+\tstruct interval_tree_node *node;\n" + "+\n" + "+\tnode = interval_tree_iter_first(&vroot->v_rb, start, end);\n" + "+\treturn node ? true : false;\n" + "+}\n" + "+\n" + "+bool vrange_address(struct mm_struct *mm,\n" + "+\t\t\tunsigned long start, unsigned long end)\n" + "+{\n" + "+\tstruct vrange_root *vroot;\n" + "+\tunsigned long vstart_idx, vend_idx;\n" + "+\tstruct vm_area_struct *vma;\n" + "+\tbool ret;\n" + "+\n" + "+\tvma = find_vma(mm, start);\n" + "+\tif (vma->vm_file && (vma->vm_flags & VM_SHARED)) {\n" + "+\t\tvroot = &vma->vm_file->f_mapping->vroot;\n" + "+\t\tvstart_idx = vma->vm_pgoff + start - vma->vm_start;\n" + "+\t\tvend_idx = vma->vm_pgoff + end - vma->vm_start;\n" + "+\t} else {\n" + "+\t\tvroot = &mm->vroot;\n" + "+\t\tvstart_idx = start;\n" + "+\t\tvend_idx = end;\n" + "+\t}\n" + "+\n" + "+\tvrange_lock(vroot);\n" + "+\tret = __vrange_address(vroot, vstart_idx, vend_idx);\n" + "+\tvrange_unlock(vroot);\n" + "+\treturn ret;\n" + "+}\n" + "+\n" + "+static pte_t *__vpage_check_address(struct page *page,\n" + "+\t\tstruct mm_struct *mm, unsigned long address, spinlock_t **ptlp)\n" + "+{\n" + "+\tpmd_t *pmd;\n" + "+\tpte_t *pte;\n" + "+\tspinlock_t *ptl;\n" + "+\tbool present;\n" + "+\n" + "+\t/* TODO : look into tlbfs */\n" + "+\tif (unlikely(PageHuge(page)))\n" + "+\t\treturn NULL;\n" + "+\n" + "+\tpmd = mm_find_pmd(mm, address);\n" + "+\tif (!pmd)\n" + "+\t\treturn NULL;\n" + "+\t/*\n" + "+\t * TODO : Support THP\n" + "+\t */\n" + "+\tif (pmd_trans_huge(*pmd))\n" + "+\t\treturn NULL;\n" + "+\n" + "+\tpte = pte_offset_map_lock(mm, pmd, address, &ptl);\n" + "+\tif (pte_none(*pte))\n" + "+\t\tgoto out;\n" + "+\n" + "+\tpresent = pte_present(*pte);\n" + "+\tif (present && page_to_pfn(page) != pte_pfn(*pte))\n" + "+\t\tgoto out;\n" + "+\telse if (present) {\n" + "+\t\t*ptlp = ptl;\n" + "+\t\treturn pte;\n" + "+\t} else {\n" + "+\t\tswp_entry_t entry = { .val = page_private(page) };\n" + "+\n" + "+\t\tVM_BUG_ON(non_swap_entry(entry));\n" + "+\t\tif (entry.val != pte_to_swp_entry(*pte).val)\n" + "+\t\t\tgoto out;\n" + "+\t\t*ptlp = ptl;\n" + "+\t\treturn pte;\n" + "+\t}\n" + "+out:\n" + "+\tpte_unmap_unlock(pte, ptl);\n" + "+\treturn NULL;\n" + "+}\n" + "+\n" + "+/*\n" + "+ * This functions checks @page is matched with pte's encoded one\n" + "+ * which could be a page or swap slot.\n" + "+ */\n" + "+static inline pte_t *vpage_check_address(struct page *page,\n" + "+\t\tstruct mm_struct *mm, unsigned long address,\n" + "+\t\tspinlock_t **ptlp)\n" + "+{\n" + "+\tpte_t *ptep;\n" + "+\t__cond_lock(*ptlp, ptep = __vpage_check_address(page,\n" + "+\t\t\t\tmm, address, ptlp));\n" + "+\treturn ptep;\n" + "+}\n" + "+\n" + "+static void __vrange_purge(struct vrange_root *vroot,\n" + "+\t\tunsigned long start, unsigned long end)\n" + "+{\n" + "+\tstruct vrange *range;\n" + "+\tstruct interval_tree_node *node;\n" + "+\n" + "+\tnode = interval_tree_iter_first(&vroot->v_rb, start, end);\n" + "+\twhile (node) {\n" + "+\t\trange = container_of(node, struct vrange, node);\n" + "+\t\trange->purged = true;\n" + "+\t\tnode = interval_tree_iter_next(node, start, end);\n" + "+\t}\n" + "+}\n" + "+\n" + "+int try_to_discard_one(struct vrange_root *vroot, struct page *page,\n" + "+\t\t\tstruct vm_area_struct *vma, unsigned long addr)\n" + "+{\n" + "+\tstruct mm_struct *mm = vma->vm_mm;\n" + "+\tpte_t *pte;\n" + "+\tpte_t pteval;\n" + "+\tspinlock_t *ptl;\n" + "+\tint ret = 0;\n" + "+\tbool present;\n" + "+\n" + "+\tVM_BUG_ON(!PageLocked(page));\n" + "+\n" + "+\tvrange_lock(vroot);\n" + "+\tpte = vpage_check_address(page, mm, addr, &ptl);\n" + "+\tif (!pte)\n" + "+\t\tgoto out;\n" + "+\n" + "+\tif (vma->vm_flags & VM_LOCKED) {\n" + "+\t\tpte_unmap_unlock(pte, ptl);\n" + "+\t\tgoto out;\n" + "+\t}\n" + "+\n" + "+\tpresent = pte_present(*pte);\n" + "+\tflush_cache_page(vma, address, page_to_pfn(page));\n" + "+\tpteval = ptep_clear_flush(vma, addr, pte);\n" + "+\n" + "+\tupdate_hiwater_rss(mm);\n" + "+\tif (present) {\n" + "+\t\tif (PageAnon(page))\n" + "+\t\t\tdec_mm_counter(mm, MM_ANONPAGES);\n" + "+\t\telse\n" + "+\t\t\tdec_mm_counter(mm, MM_FILEPAGES);\n" + "+\t\tpage_remove_rmap(page);\n" + "+\t\tpage_cache_release(page);\n" + "+\t} else {\n" + "+\t\tswp_entry_t entry = pte_to_swp_entry(pteval);\n" + "+\t\tdec_mm_counter(mm, MM_SWAPENTS);\n" + "+\t\tif (unlikely(!__free_swap_and_cache(entry)))\n" + "+\t\t\tBUG_ON(1);\n" + "+\t}\n" + "+\n" + "+\tpte_unmap_unlock(pte, ptl);\n" + "+\tmmu_notifier_invalidate_page(mm, addr);\n" + "+\tret = 1;\n" + "+\n" + "+\tif (!PageAnon(page)) /* switch to file offset) */\n" + "+\t\taddr = vma->vm_pgoff + addr - vma->vm_start;\n" + "+\n" + "+\t__vrange_purge(vroot, addr, addr + PAGE_SIZE - 1);\n" + "+\n" + "+out:\n" + "+\tvrange_unlock(vroot);\n" + "+\treturn ret;\n" + "+}\n" + "+\n" + "+static int try_to_discard_anon_vpage(struct page *page)\n" + "+{\n" + "+\tstruct anon_vma *anon_vma;\n" + "+\tstruct anon_vma_chain *avc;\n" + "+\tpgoff_t pgoff;\n" + "+\tstruct vm_area_struct *vma;\n" + "+\tstruct mm_struct *mm;\n" + "+\tstruct vrange_root *vroot;\n" + "+\n" + "+\tunsigned long address;\n" + "+\tbool ret = 0;\n" + "+\n" + "+\tanon_vma = page_lock_anon_vma_read(page);\n" + "+\tif (!anon_vma)\n" + "+\t\treturn ret;\n" + "+\n" + "+\tpgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);\n" + "+\tanon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {\n" + "+\t\tpte_t *pte;\n" + "+\t\tspinlock_t *ptl;\n" + "+\n" + "+\t\tvma = avc->vma;\n" + "+\t\tmm = vma->vm_mm;\n" + "+\t\tvroot = &mm->vroot;\n" + "+\t\taddress = vma_address(page, vma);\n" + "+\n" + "+\t\tvrange_lock(vroot);\n" + "+\t\t/*\n" + "+\t\t * We can't use page_check_address because it doesn't check\n" + "+\t\t * swap entry of the page table. We need the check because\n" + "+\t\t * we have to make sure atomicity of shared vrange.\n" + "+\t\t * It means all vranges which are shared a page should be\n" + "+\t\t * purged if a page in a process is purged.\n" + "+\t\t */\n" + "+\t\tpte = vpage_check_address(page, mm, address, &ptl);\n" + "+\t\tif (!pte) {\n" + "+\t\t\tvrange_unlock(vroot);\n" + "+\t\t\tcontinue;\n" + "+\t\t}\n" + "+\n" + "+\t\tif (vma->vm_flags & VM_LOCKED) {\n" + "+\t\t\tpte_unmap_unlock(pte, ptl);\n" + "+\t\t\tvrange_unlock(vroot);\n" + "+\t\t\tgoto out;\n" + "+\t\t}\n" + "+\n" + "+\t\tpte_unmap_unlock(pte, ptl);\n" + "+\t\tif (!__vrange_address(vroot, address,\n" + "+\t\t\t\t\taddress + PAGE_SIZE - 1)) {\n" + "+\t\t\tvrange_unlock(vroot);\n" + "+\t\t\tgoto out;\n" + "+\t\t}\n" + "+\n" + "+\t\tvrange_unlock(vroot);\n" + "+\t}\n" + "+\n" + "+\tanon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {\n" + "+\t\tvma = avc->vma;\n" + "+\t\tmm = vma->vm_mm;\n" + "+\t\tvroot = &mm->vroot;\n" + "+\t\taddress = vma_address(page, vma);\n" + "+\t\tif (!try_to_discard_one(vroot, page, vma, address))\n" + "+\t\t\tgoto out;\n" + "+\t}\n" + "+\n" + "+\tret = 1;\n" + "+out:\n" + "+\tpage_unlock_anon_vma_read(anon_vma);\n" + "+\treturn ret;\n" + "+}\n" + "+\n" + "+\n" + "+\n" + "+static int try_to_discard_file_vpage(struct page *page)\n" + "+{\n" + "+\tstruct address_space *mapping = page->mapping;\n" + "+\tpgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);\n" + "+\tstruct vm_area_struct *vma;\n" + "+\tbool ret = 0;\n" + "+\n" + "+\tmutex_lock(&mapping->i_mmap_mutex);\n" + "+\tvma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {\n" + "+\t\tunsigned long address = vma_address(page, vma);\n" + "+\t\tstruct mm_struct *mm = vma->vm_mm;\n" + "+\t\tstruct vrange_root *vroot = &mapping->vroot;\n" + "+\t\tpte_t *pte;\n" + "+\t\tspinlock_t *ptl;\n" + "+\t\tlong vstart_idx;\n" + "+\n" + "+\n" + "+\t\tvstart_idx = vma->vm_pgoff + address - vma->vm_start;\n" + "+\n" + "+\t\tvrange_lock(vroot);\n" + "+\t\t/*\n" + "+\t\t * We can't use page_check_address because it doesn't check\n" + "+\t\t * swap entry of the page table. We need the check because\n" + "+\t\t * we have to make sure atomicity of shared vrange.\n" + "+\t\t * It means all vranges which are shared a page should be\n" + "+\t\t * purged if a page in a process is purged.\n" + "+\t\t */\n" + "+\t\tpte = vpage_check_address(page, mm, address, &ptl);\n" + "+\t\tif (!pte) {\n" + "+\t\t\tvrange_unlock(vroot);\n" + "+\t\t\tcontinue;\n" + "+\t\t}\n" + "+\n" + "+\t\tif (vma->vm_flags & VM_LOCKED) {\n" + "+\t\t\tpte_unmap_unlock(pte, ptl);\n" + "+\t\t\tvrange_unlock(vroot);\n" + "+\t\t\tgoto out;\n" + "+\t\t}\n" + "+\n" + "+\t\tpte_unmap_unlock(pte, ptl);\n" + "+\t\tif (!__vrange_address(vroot, vstart_idx,\n" + "+\t\t\t\t\tvstart_idx + PAGE_SIZE - 1)) {\n" + "+\t\t\tvrange_unlock(vroot);\n" + "+\t\t\tgoto out;\n" + "+\t\t}\n" + "+\n" + "+\t\tvrange_unlock(vroot);\n" + "+\t}\n" + "+\n" + "+\tvma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {\n" + "+\t\tunsigned long address = vma_address(page, vma);\n" + "+\t\tstruct vrange_root *vroot = &mapping->vroot;\n" + "+\n" + "+\t\tif (!try_to_discard_one(vroot, page, vma, address))\n" + "+\t\t\tgoto out;\n" + "+\t}\n" + "+\n" + "+\tret = 1;\n" + "+out:\n" + "+\tmutex_unlock(&mapping->i_mmap_mutex);\n" + "+\treturn ret;\n" + "+}\n" + "+\n" + "+static int try_to_discard_vpage(struct page *page)\n" + "+{\n" + "+\tif (PageAnon(page))\n" + "+\t\treturn try_to_discard_anon_vpage(page);\n" + "+\treturn try_to_discard_file_vpage(page);\n" + "+}\n" + "+\n" + "+int discard_vpage(struct page *page)\n" + "+{\n" + "+\tVM_BUG_ON(!PageLocked(page));\n" + "+\tVM_BUG_ON(PageLRU(page));\n" + "+\n" + "+\tif (try_to_discard_vpage(page)) {\n" + "+\t\tif (PageSwapCache(page))\n" + "+\t\t\ttry_to_free_swap(page);\n" + "+\n" + "+\t\tif (page_freeze_refs(page, 1)) {\n" + "+\t\t\tunlock_page(page);\n" + "+\t\t\treturn 1;\n" + "+\t\t}\n" + "+\t}\n" + "+\n" + "+\treturn 0;\n" + "+}\n" + "+\n" + "-- \n" + "1.7.9.5\n" + "\n" + "-- \n" + "Kind regards,\n" + Minchan Kim -17754790192883c036c39ccfb16af6efedb15b58fad1e71a1c180467d701b10f +a8eb9aab90e06e61be4847eef4f5f62d9d8d89b1e85ca067de2dbc7a91b71cd8
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.