diff for duplicates of <20190520035254.57579-2-minchan@kernel.org> diff --git a/a/1.txt b/N1/1.txt index 12d2a8a..317b4ca 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,338 +1,133 @@ -When a process expects no accesses to a certain memory range -it could hint kernel that the pages can be reclaimed -when memory pressure happens but data should be preserved -for future use. This could reduce workingset eviction so it -ends up increasing performance. -This patch introduces the new MADV_COOL hint to madvise(2) -syscall. MADV_COOL can be used by a process to mark a memory range -as not expected to be used in the near future. The hint can help -kernel in deciding which pages to evict early during memory -pressure. +On Mon, 20 May 2019 12:52:48 +0900 Minchan Kim wrote: +> +static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr, +> + unsigned long end, struct mm_walk *walk) +> +{ +> + pte_t *orig_pte, *pte, ptent; +> + spinlock_t *ptl; +> + struct page *page; +> + struct vm_area_struct *vma = walk->vma; +> + unsigned long next; +> + +> + next = pmd_addr_end(addr, end); +> + if (pmd_trans_huge(*pmd)) { +> + spinlock_t *ptl; -Internally, it works via deactivating memory from active list to -inactive's head so when the memory pressure happens, they will be -reclaimed earlier than other active pages unless there is no -access until the time. +Seems not needed with another ptl declared above. +> + +> + ptl = pmd_trans_huge_lock(pmd, vma); +> + if (!ptl) +> + return 0; +> + +> + if (is_huge_zero_pmd(*pmd)) +> + goto huge_unlock; +> + +> + page = pmd_page(*pmd); +> + if (page_mapcount(page) > 1) +> + goto huge_unlock; +> + +> + if (next - addr != HPAGE_PMD_SIZE) { +> + int err; -* v1r2 - * use clear_page_young in deactivate_page - joelaf +Alternately, we deactivate thp only if the address range from userspace +is sane enough, in order to avoid complex works we have to do here. +> + +> + get_page(page); +> + spin_unlock(ptl); +> + lock_page(page); +> + err = split_huge_page(page); +> + unlock_page(page); +> + put_page(page); +> + if (!err) +> + goto regular_page; +> + return 0; +> + } +> + +> + pmdp_test_and_clear_young(vma, addr, pmd); +> + deactivate_page(page); +> +huge_unlock: +> + spin_unlock(ptl); +> + return 0; +> + } +> + +> + if (pmd_trans_unstable(pmd)) +> + return 0; +> + +> +regular_page: -* v1r1 - * Revise the description - surenb - * Renaming from MADV_WARM to MADV_COOL - surenb +Take a look at pending signal? -Signed-off-by: Minchan Kim <minchan@kernel.org> ---- - include/linux/page-flags.h | 1 + - include/linux/page_idle.h | 15 ++++ - include/linux/swap.h | 1 + - include/uapi/asm-generic/mman-common.h | 1 + - mm/madvise.c | 112 +++++++++++++++++++++++++ - mm/swap.c | 43 ++++++++++ - 6 files changed, 173 insertions(+) +> + orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); +> + for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) { -diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h -index 9f8712a4b1a5..58b06654c8dd 100644 ---- a/include/linux/page-flags.h -+++ b/include/linux/page-flags.h -@@ -424,6 +424,7 @@ static inline bool set_hwpoison_free_buddy_page(struct page *page) - TESTPAGEFLAG(Young, young, PF_ANY) - SETPAGEFLAG(Young, young, PF_ANY) - TESTCLEARFLAG(Young, young, PF_ANY) -+CLEARPAGEFLAG(Young, young, PF_ANY) - PAGEFLAG(Idle, idle, PF_ANY) - #endif - -diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h -index 1e894d34bdce..f3f43b317150 100644 ---- a/include/linux/page_idle.h -+++ b/include/linux/page_idle.h -@@ -19,6 +19,11 @@ static inline void set_page_young(struct page *page) - SetPageYoung(page); - } - -+static inline void clear_page_young(struct page *page) -+{ -+ ClearPageYoung(page); -+} -+ - static inline bool test_and_clear_page_young(struct page *page) - { - return TestClearPageYoung(page); -@@ -65,6 +70,16 @@ static inline void set_page_young(struct page *page) - set_bit(PAGE_EXT_YOUNG, &page_ext->flags); - } - -+static void clear_page_young(struct page *page) -+{ -+ struct page_ext *page_ext = lookup_page_ext(page); -+ -+ if (unlikely(!page_ext)) -+ return; -+ -+ clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); -+} -+ - static inline bool test_and_clear_page_young(struct page *page) - { - struct page_ext *page_ext = lookup_page_ext(page); -diff --git a/include/linux/swap.h b/include/linux/swap.h -index 4bfb5c4ac108..64795abea003 100644 ---- a/include/linux/swap.h -+++ b/include/linux/swap.h -@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu); - extern void lru_add_drain_all(void); - extern void rotate_reclaimable_page(struct page *page); - extern void deactivate_file_page(struct page *page); -+extern void deactivate_page(struct page *page); - extern void mark_page_lazyfree(struct page *page); - extern void swap_setup(void); - -diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h -index abd238d0f7a4..f7a4a5d4b642 100644 ---- a/include/uapi/asm-generic/mman-common.h -+++ b/include/uapi/asm-generic/mman-common.h -@@ -42,6 +42,7 @@ - #define MADV_SEQUENTIAL 2 /* expect sequential page references */ - #define MADV_WILLNEED 3 /* will need these pages */ - #define MADV_DONTNEED 4 /* don't need these pages */ -+#define MADV_COOL 5 /* deactivatie these pages */ - - /* common parameters: try to keep these consistent across architectures */ - #define MADV_FREE 8 /* free pages only if memory pressure */ -diff --git a/mm/madvise.c b/mm/madvise.c -index 628022e674a7..c05817fb570d 100644 ---- a/mm/madvise.c -+++ b/mm/madvise.c -@@ -8,6 +8,7 @@ - - #include <linux/mman.h> - #include <linux/pagemap.h> -+#include <linux/page_idle.h> - #include <linux/syscalls.h> - #include <linux/mempolicy.h> - #include <linux/page-isolation.h> -@@ -40,6 +41,7 @@ static int madvise_need_mmap_write(int behavior) - case MADV_REMOVE: - case MADV_WILLNEED: - case MADV_DONTNEED: -+ case MADV_COOL: - case MADV_FREE: - return 0; - default: -@@ -307,6 +309,113 @@ static long madvise_willneed(struct vm_area_struct *vma, - return 0; - } - -+static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr, -+ unsigned long end, struct mm_walk *walk) -+{ -+ pte_t *orig_pte, *pte, ptent; -+ spinlock_t *ptl; -+ struct page *page; -+ struct vm_area_struct *vma = walk->vma; -+ unsigned long next; -+ -+ next = pmd_addr_end(addr, end); -+ if (pmd_trans_huge(*pmd)) { -+ spinlock_t *ptl; -+ -+ ptl = pmd_trans_huge_lock(pmd, vma); -+ if (!ptl) -+ return 0; -+ -+ if (is_huge_zero_pmd(*pmd)) -+ goto huge_unlock; -+ -+ page = pmd_page(*pmd); -+ if (page_mapcount(page) > 1) -+ goto huge_unlock; -+ -+ if (next - addr != HPAGE_PMD_SIZE) { -+ int err; -+ -+ get_page(page); -+ spin_unlock(ptl); -+ lock_page(page); -+ err = split_huge_page(page); -+ unlock_page(page); -+ put_page(page); -+ if (!err) -+ goto regular_page; -+ return 0; -+ } -+ -+ pmdp_test_and_clear_young(vma, addr, pmd); -+ deactivate_page(page); -+huge_unlock: -+ spin_unlock(ptl); -+ return 0; -+ } -+ -+ if (pmd_trans_unstable(pmd)) -+ return 0; -+ -+regular_page: -+ orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); -+ for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) { -+ ptent = *pte; -+ -+ if (pte_none(ptent)) -+ continue; -+ -+ if (!pte_present(ptent)) -+ continue; -+ -+ page = vm_normal_page(vma, addr, ptent); -+ if (!page) -+ continue; -+ -+ if (page_mapcount(page) > 1) -+ continue; -+ -+ ptep_test_and_clear_young(vma, addr, pte); -+ deactivate_page(page); -+ } -+ -+ pte_unmap_unlock(orig_pte, ptl); -+ cond_resched(); -+ -+ return 0; -+} -+ -+static void madvise_cool_page_range(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, -+ unsigned long addr, unsigned long end) -+{ -+ struct mm_walk cool_walk = { -+ .pmd_entry = madvise_cool_pte_range, -+ .mm = vma->vm_mm, -+ }; -+ -+ tlb_start_vma(tlb, vma); -+ walk_page_range(addr, end, &cool_walk); -+ tlb_end_vma(tlb, vma); -+} -+ -+static long madvise_cool(struct vm_area_struct *vma, -+ unsigned long start_addr, unsigned long end_addr) -+{ -+ struct mm_struct *mm = vma->vm_mm; -+ struct mmu_gather tlb; -+ -+ if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) -+ return -EINVAL; -+ -+ lru_add_drain(); -+ tlb_gather_mmu(&tlb, mm, start_addr, end_addr); -+ madvise_cool_page_range(&tlb, vma, start_addr, end_addr); -+ tlb_finish_mmu(&tlb, start_addr, end_addr); -+ -+ return 0; -+} -+ - static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, struct mm_walk *walk) - -@@ -695,6 +804,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, - return madvise_remove(vma, prev, start, end); - case MADV_WILLNEED: - return madvise_willneed(vma, prev, start, end); -+ case MADV_COOL: -+ return madvise_cool(vma, start, end); - case MADV_FREE: - case MADV_DONTNEED: - return madvise_dontneed_free(vma, prev, start, end, behavior); -@@ -716,6 +827,7 @@ madvise_behavior_valid(int behavior) - case MADV_WILLNEED: - case MADV_DONTNEED: - case MADV_FREE: -+ case MADV_COOL: - #ifdef CONFIG_KSM - case MADV_MERGEABLE: - case MADV_UNMERGEABLE: -diff --git a/mm/swap.c b/mm/swap.c -index 3a75722e68a9..0f94c3b5397d 100644 ---- a/mm/swap.c -+++ b/mm/swap.c -@@ -46,6 +46,7 @@ int page_cluster; - static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); - static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); - static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs); -+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); - static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs); - #ifdef CONFIG_SMP - static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); -@@ -537,6 +538,23 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, - update_page_reclaim_stat(lruvec, file, 0); - } - -+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, -+ void *arg) -+{ -+ if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { -+ int file = page_is_file_cache(page); -+ int lru = page_lru_base_type(page); -+ -+ del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); -+ ClearPageActive(page); -+ ClearPageReferenced(page); -+ clear_page_young(page); -+ add_page_to_lru_list(page, lruvec, lru); -+ -+ __count_vm_events(PGDEACTIVATE, hpage_nr_pages(page)); -+ update_page_reclaim_stat(lruvec, file, 0); -+ } -+} - - static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, - void *arg) -@@ -589,6 +607,10 @@ void lru_add_drain_cpu(int cpu) - if (pagevec_count(pvec)) - pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); - -+ pvec = &per_cpu(lru_deactivate_pvecs, cpu); -+ if (pagevec_count(pvec)) -+ pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); -+ - pvec = &per_cpu(lru_lazyfree_pvecs, cpu); - if (pagevec_count(pvec)) - pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); -@@ -622,6 +644,26 @@ void deactivate_file_page(struct page *page) - } - } - -+/* -+ * deactivate_page - deactivate a page -+ * @page: page to deactivate -+ * -+ * deactivate_page() moves @page to the inactive list if @page was on the active -+ * list and was not an unevictable page. This is done to accelerate the reclaim -+ * of @page. -+ */ -+void deactivate_page(struct page *page) -+{ -+ if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { -+ struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); -+ -+ get_page(page); -+ if (!pagevec_add(pvec, page) || PageCompound(page)) -+ pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); -+ put_cpu_var(lru_deactivate_pvecs); -+ } -+} -+ - /** - * mark_page_lazyfree - make an anon page lazyfree - * @page: page to deactivate -@@ -686,6 +728,7 @@ void lru_add_drain_all(void) - if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || - pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || - pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || -+ pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || - pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) || - need_activate_page_drain(cpu)) { - INIT_WORK(work, lru_add_drain_per_cpu); --- -2.21.0.1020.gf2820cf01a-goog +s/end/next/ ? +> + ptent = *pte; +> + +> + if (pte_none(ptent)) +> + continue; +> + +> + if (!pte_present(ptent)) +> + continue; +> + +> + page = vm_normal_page(vma, addr, ptent); +> + if (!page) +> + continue; +> + +> + if (page_mapcount(page) > 1) +> + continue; +> + +> + ptep_test_and_clear_young(vma, addr, pte); +> + deactivate_page(page); +> + } +> + +> + pte_unmap_unlock(orig_pte, ptl); +> + cond_resched(); +> + +> + return 0; +> +} +> + +> +static long madvise_cool(struct vm_area_struct *vma, +> + unsigned long start_addr, unsigned long end_addr) +> +{ +> + struct mm_struct *mm = vma->vm_mm; +> + struct mmu_gather tlb; +> + +> + if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) +> + return -EINVAL; + +No service in case of VM_IO? +> + +> + lru_add_drain(); +> + tlb_gather_mmu(&tlb, mm, start_addr, end_addr); +> + madvise_cool_page_range(&tlb, vma, start_addr, end_addr); +> + tlb_finish_mmu(&tlb, start_addr, end_addr); +> + +> + return 0; +> +} +> + +> +/* +> + * deactivate_page - deactivate a page +> + * @page: page to deactivate +> + * +> + * deactivate_page() moves @page to the inactive list if @page was on the active +> + * list and was not an unevictable page. This is done to accelerate the reclaim +> + * of @page. +> + */ +> +void deactivate_page(struct page *page) +> +{ +> + if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { +> + struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); +> + +> + get_page(page); + +A line of comment seems needed for pinning the page. + +> + if (!pagevec_add(pvec, page) || PageCompound(page)) +> + pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); +> + put_cpu_var(lru_deactivate_pvecs); +> + } +> +} +> + + +-- +Hillf diff --git a/a/content_digest b/N1/content_digest index c3e13e8..37fbc5e 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -1,9 +1,10 @@ "ref\020190520035254.57579-1-minchan@kernel.org\0" - "From\0Minchan Kim <minchan@kernel.org>\0" - "Subject\0[RFC 1/7] mm: introduce MADV_COOL\0" - "Date\0Mon, 20 May 2019 12:52:48 +0900\0" - "To\0Andrew Morton <akpm@linux-foundation.org>\0" - "Cc\0LKML <linux-kernel@vger.kernel.org>" + "From\0Hillf Danton <hdanton@sina.com>\0" + "Subject\0Re: [RFC 1/7] mm: introduce MADV_COOL\0" + "Date\0Tue, 28 May 2019 16:53:01 +0800\0" + "To\0Minchan Kim <minchan@kernel.org>\0" + "Cc\0Andrew Morton <akpm@linux-foundation.org>" + LKML <linux-kernel@vger.kernel.org> linux-mm <linux-mm@kvack.org> Michal Hocko <mhocko@suse.com> Johannes Weiner <hannes@cmpxchg.org> @@ -13,347 +14,141 @@ Daniel Colascione <dancol@google.com> Shakeel Butt <shakeelb@google.com> Sonny Rao <sonnyrao@google.com> - Brian Geffon <bgeffon@google.com> - " Minchan Kim <minchan@kernel.org>\0" + " Brian Geffon <bgeffon@google.com>\0" "\00:1\0" "b\0" - "When a process expects no accesses to a certain memory range\n" - "it could hint kernel that the pages can be reclaimed\n" - "when memory pressure happens but data should be preserved\n" - "for future use. This could reduce workingset eviction so it\n" - "ends up increasing performance.\n" "\n" - "This patch introduces the new MADV_COOL hint to madvise(2)\n" - "syscall. MADV_COOL can be used by a process to mark a memory range\n" - "as not expected to be used in the near future. The hint can help\n" - "kernel in deciding which pages to evict early during memory\n" - "pressure.\n" + "On Mon, 20 May 2019 12:52:48 +0900 Minchan Kim wrote:\n" + "> +static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,\n" + "> +\t\t\t\tunsigned long end, struct mm_walk *walk)\n" + "> +{\n" + "> +\tpte_t *orig_pte, *pte, ptent;\n" + "> +\tspinlock_t *ptl;\n" + "> +\tstruct page *page;\n" + "> +\tstruct vm_area_struct *vma = walk->vma;\n" + "> +\tunsigned long next;\n" + "> +\n" + "> +\tnext = pmd_addr_end(addr, end);\n" + "> +\tif (pmd_trans_huge(*pmd)) {\n" + "> +\t\tspinlock_t *ptl;\n" "\n" - "Internally, it works via deactivating memory from active list to\n" - "inactive's head so when the memory pressure happens, they will be\n" - "reclaimed earlier than other active pages unless there is no\n" - "access until the time.\n" + "Seems not needed with another ptl declared above.\n" + "> +\n" + "> +\t\tptl = pmd_trans_huge_lock(pmd, vma);\n" + "> +\t\tif (!ptl)\n" + "> +\t\t\treturn 0;\n" + "> +\n" + "> +\t\tif (is_huge_zero_pmd(*pmd))\n" + "> +\t\t\tgoto huge_unlock;\n" + "> +\n" + "> +\t\tpage = pmd_page(*pmd);\n" + "> +\t\tif (page_mapcount(page) > 1)\n" + "> +\t\t\tgoto huge_unlock;\n" + "> +\n" + "> +\t\tif (next - addr != HPAGE_PMD_SIZE) {\n" + "> +\t\t\tint err;\n" "\n" - "* v1r2\n" - " * use clear_page_young in deactivate_page - joelaf\n" + "Alternately, we deactivate thp only if the address range from userspace\n" + "is sane enough, in order to avoid complex works we have to do here.\n" + "> +\n" + "> +\t\t\tget_page(page);\n" + "> +\t\t\tspin_unlock(ptl);\n" + "> +\t\t\tlock_page(page);\n" + "> +\t\t\terr = split_huge_page(page);\n" + "> +\t\t\tunlock_page(page);\n" + "> +\t\t\tput_page(page);\n" + "> +\t\t\tif (!err)\n" + "> +\t\t\t\tgoto regular_page;\n" + "> +\t\t\treturn 0;\n" + "> +\t\t}\n" + "> +\n" + "> +\t\tpmdp_test_and_clear_young(vma, addr, pmd);\n" + "> +\t\tdeactivate_page(page);\n" + "> +huge_unlock:\n" + "> +\t\tspin_unlock(ptl);\n" + "> +\t\treturn 0;\n" + "> +\t}\n" + "> +\n" + "> +\tif (pmd_trans_unstable(pmd))\n" + "> +\t\treturn 0;\n" + "> +\n" + "> +regular_page:\n" "\n" - "* v1r1\n" - " * Revise the description - surenb\n" - " * Renaming from MADV_WARM to MADV_COOL - surenb\n" + "Take a look at pending signal?\n" "\n" - "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" - "---\n" - " include/linux/page-flags.h | 1 +\n" - " include/linux/page_idle.h | 15 ++++\n" - " include/linux/swap.h | 1 +\n" - " include/uapi/asm-generic/mman-common.h | 1 +\n" - " mm/madvise.c | 112 +++++++++++++++++++++++++\n" - " mm/swap.c | 43 ++++++++++\n" - " 6 files changed, 173 insertions(+)\n" + "> +\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n" + "> +\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n" "\n" - "diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h\n" - "index 9f8712a4b1a5..58b06654c8dd 100644\n" - "--- a/include/linux/page-flags.h\n" - "+++ b/include/linux/page-flags.h\n" - "@@ -424,6 +424,7 @@ static inline bool set_hwpoison_free_buddy_page(struct page *page)\n" - " TESTPAGEFLAG(Young, young, PF_ANY)\n" - " SETPAGEFLAG(Young, young, PF_ANY)\n" - " TESTCLEARFLAG(Young, young, PF_ANY)\n" - "+CLEARPAGEFLAG(Young, young, PF_ANY)\n" - " PAGEFLAG(Idle, idle, PF_ANY)\n" - " #endif\n" - " \n" - "diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h\n" - "index 1e894d34bdce..f3f43b317150 100644\n" - "--- a/include/linux/page_idle.h\n" - "+++ b/include/linux/page_idle.h\n" - "@@ -19,6 +19,11 @@ static inline void set_page_young(struct page *page)\n" - " \tSetPageYoung(page);\n" - " }\n" - " \n" - "+static inline void clear_page_young(struct page *page)\n" - "+{\n" - "+\tClearPageYoung(page);\n" - "+}\n" - "+\n" - " static inline bool test_and_clear_page_young(struct page *page)\n" - " {\n" - " \treturn TestClearPageYoung(page);\n" - "@@ -65,6 +70,16 @@ static inline void set_page_young(struct page *page)\n" - " \tset_bit(PAGE_EXT_YOUNG, &page_ext->flags);\n" - " }\n" - " \n" - "+static void clear_page_young(struct page *page)\n" - "+{\n" - "+\tstruct page_ext *page_ext = lookup_page_ext(page);\n" - "+\n" - "+\tif (unlikely(!page_ext))\n" - "+\t\treturn;\n" - "+\n" - "+\tclear_bit(PAGE_EXT_YOUNG, &page_ext->flags);\n" - "+}\n" - "+\n" - " static inline bool test_and_clear_page_young(struct page *page)\n" - " {\n" - " \tstruct page_ext *page_ext = lookup_page_ext(page);\n" - "diff --git a/include/linux/swap.h b/include/linux/swap.h\n" - "index 4bfb5c4ac108..64795abea003 100644\n" - "--- a/include/linux/swap.h\n" - "+++ b/include/linux/swap.h\n" - "@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu);\n" - " extern void lru_add_drain_all(void);\n" - " extern void rotate_reclaimable_page(struct page *page);\n" - " extern void deactivate_file_page(struct page *page);\n" - "+extern void deactivate_page(struct page *page);\n" - " extern void mark_page_lazyfree(struct page *page);\n" - " extern void swap_setup(void);\n" - " \n" - "diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h\n" - "index abd238d0f7a4..f7a4a5d4b642 100644\n" - "--- a/include/uapi/asm-generic/mman-common.h\n" - "+++ b/include/uapi/asm-generic/mman-common.h\n" - "@@ -42,6 +42,7 @@\n" - " #define MADV_SEQUENTIAL\t2\t\t/* expect sequential page references */\n" - " #define MADV_WILLNEED\t3\t\t/* will need these pages */\n" - " #define MADV_DONTNEED\t4\t\t/* don't need these pages */\n" - "+#define MADV_COOL\t5\t\t/* deactivatie these pages */\n" - " \n" - " /* common parameters: try to keep these consistent across architectures */\n" - " #define MADV_FREE\t8\t\t/* free pages only if memory pressure */\n" - "diff --git a/mm/madvise.c b/mm/madvise.c\n" - "index 628022e674a7..c05817fb570d 100644\n" - "--- a/mm/madvise.c\n" - "+++ b/mm/madvise.c\n" - "@@ -8,6 +8,7 @@\n" - " \n" - " #include <linux/mman.h>\n" - " #include <linux/pagemap.h>\n" - "+#include <linux/page_idle.h>\n" - " #include <linux/syscalls.h>\n" - " #include <linux/mempolicy.h>\n" - " #include <linux/page-isolation.h>\n" - "@@ -40,6 +41,7 @@ static int madvise_need_mmap_write(int behavior)\n" - " \tcase MADV_REMOVE:\n" - " \tcase MADV_WILLNEED:\n" - " \tcase MADV_DONTNEED:\n" - "+\tcase MADV_COOL:\n" - " \tcase MADV_FREE:\n" - " \t\treturn 0;\n" - " \tdefault:\n" - "@@ -307,6 +309,113 @@ static long madvise_willneed(struct vm_area_struct *vma,\n" - " \treturn 0;\n" - " }\n" - " \n" - "+static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,\n" - "+\t\t\t\tunsigned long end, struct mm_walk *walk)\n" - "+{\n" - "+\tpte_t *orig_pte, *pte, ptent;\n" - "+\tspinlock_t *ptl;\n" - "+\tstruct page *page;\n" - "+\tstruct vm_area_struct *vma = walk->vma;\n" - "+\tunsigned long next;\n" - "+\n" - "+\tnext = pmd_addr_end(addr, end);\n" - "+\tif (pmd_trans_huge(*pmd)) {\n" - "+\t\tspinlock_t *ptl;\n" - "+\n" - "+\t\tptl = pmd_trans_huge_lock(pmd, vma);\n" - "+\t\tif (!ptl)\n" - "+\t\t\treturn 0;\n" - "+\n" - "+\t\tif (is_huge_zero_pmd(*pmd))\n" - "+\t\t\tgoto huge_unlock;\n" - "+\n" - "+\t\tpage = pmd_page(*pmd);\n" - "+\t\tif (page_mapcount(page) > 1)\n" - "+\t\t\tgoto huge_unlock;\n" - "+\n" - "+\t\tif (next - addr != HPAGE_PMD_SIZE) {\n" - "+\t\t\tint err;\n" - "+\n" - "+\t\t\tget_page(page);\n" - "+\t\t\tspin_unlock(ptl);\n" - "+\t\t\tlock_page(page);\n" - "+\t\t\terr = split_huge_page(page);\n" - "+\t\t\tunlock_page(page);\n" - "+\t\t\tput_page(page);\n" - "+\t\t\tif (!err)\n" - "+\t\t\t\tgoto regular_page;\n" - "+\t\t\treturn 0;\n" - "+\t\t}\n" - "+\n" - "+\t\tpmdp_test_and_clear_young(vma, addr, pmd);\n" - "+\t\tdeactivate_page(page);\n" - "+huge_unlock:\n" - "+\t\tspin_unlock(ptl);\n" - "+\t\treturn 0;\n" - "+\t}\n" - "+\n" - "+\tif (pmd_trans_unstable(pmd))\n" - "+\t\treturn 0;\n" - "+\n" - "+regular_page:\n" - "+\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n" - "+\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n" - "+\t\tptent = *pte;\n" - "+\n" - "+\t\tif (pte_none(ptent))\n" - "+\t\t\tcontinue;\n" - "+\n" - "+\t\tif (!pte_present(ptent))\n" - "+\t\t\tcontinue;\n" - "+\n" - "+\t\tpage = vm_normal_page(vma, addr, ptent);\n" - "+\t\tif (!page)\n" - "+\t\t\tcontinue;\n" - "+\n" - "+\t\tif (page_mapcount(page) > 1)\n" - "+\t\t\tcontinue;\n" - "+\n" - "+\t\tptep_test_and_clear_young(vma, addr, pte);\n" - "+\t\tdeactivate_page(page);\n" - "+\t}\n" - "+\n" - "+\tpte_unmap_unlock(orig_pte, ptl);\n" - "+\tcond_resched();\n" - "+\n" - "+\treturn 0;\n" - "+}\n" - "+\n" - "+static void madvise_cool_page_range(struct mmu_gather *tlb,\n" - "+\t\t\t struct vm_area_struct *vma,\n" - "+\t\t\t unsigned long addr, unsigned long end)\n" - "+{\n" - "+\tstruct mm_walk cool_walk = {\n" - "+\t\t.pmd_entry = madvise_cool_pte_range,\n" - "+\t\t.mm = vma->vm_mm,\n" - "+\t};\n" - "+\n" - "+\ttlb_start_vma(tlb, vma);\n" - "+\twalk_page_range(addr, end, &cool_walk);\n" - "+\ttlb_end_vma(tlb, vma);\n" - "+}\n" - "+\n" - "+static long madvise_cool(struct vm_area_struct *vma,\n" - "+\t\t\tunsigned long start_addr, unsigned long end_addr)\n" - "+{\n" - "+\tstruct mm_struct *mm = vma->vm_mm;\n" - "+\tstruct mmu_gather tlb;\n" - "+\n" - "+\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n" - "+\t\treturn -EINVAL;\n" - "+\n" - "+\tlru_add_drain();\n" - "+\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n" - "+\tmadvise_cool_page_range(&tlb, vma, start_addr, end_addr);\n" - "+\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n" - "+\n" - "+\treturn 0;\n" - "+}\n" - "+\n" - " static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,\n" - " \t\t\t\tunsigned long end, struct mm_walk *walk)\n" - " \n" - "@@ -695,6 +804,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,\n" - " \t\treturn madvise_remove(vma, prev, start, end);\n" - " \tcase MADV_WILLNEED:\n" - " \t\treturn madvise_willneed(vma, prev, start, end);\n" - "+\tcase MADV_COOL:\n" - "+\t\treturn madvise_cool(vma, start, end);\n" - " \tcase MADV_FREE:\n" - " \tcase MADV_DONTNEED:\n" - " \t\treturn madvise_dontneed_free(vma, prev, start, end, behavior);\n" - "@@ -716,6 +827,7 @@ madvise_behavior_valid(int behavior)\n" - " \tcase MADV_WILLNEED:\n" - " \tcase MADV_DONTNEED:\n" - " \tcase MADV_FREE:\n" - "+\tcase MADV_COOL:\n" - " #ifdef CONFIG_KSM\n" - " \tcase MADV_MERGEABLE:\n" - " \tcase MADV_UNMERGEABLE:\n" - "diff --git a/mm/swap.c b/mm/swap.c\n" - "index 3a75722e68a9..0f94c3b5397d 100644\n" - "--- a/mm/swap.c\n" - "+++ b/mm/swap.c\n" - "@@ -46,6 +46,7 @@ int page_cluster;\n" - " static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);\n" - " static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);\n" - " static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);\n" - "+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);\n" - " static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);\n" - " #ifdef CONFIG_SMP\n" - " static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);\n" - "@@ -537,6 +538,23 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,\n" - " \tupdate_page_reclaim_stat(lruvec, file, 0);\n" - " }\n" - " \n" - "+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,\n" - "+\t\t\t void *arg)\n" - "+{\n" - "+\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n" - "+\t\tint file = page_is_file_cache(page);\n" - "+\t\tint lru = page_lru_base_type(page);\n" - "+\n" - "+\t\tdel_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);\n" - "+\t\tClearPageActive(page);\n" - "+\t\tClearPageReferenced(page);\n" - "+\t\tclear_page_young(page);\n" - "+\t\tadd_page_to_lru_list(page, lruvec, lru);\n" - "+\n" - "+\t\t__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));\n" - "+\t\tupdate_page_reclaim_stat(lruvec, file, 0);\n" - "+\t}\n" - "+}\n" - " \n" - " static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,\n" - " \t\t\t void *arg)\n" - "@@ -589,6 +607,10 @@ void lru_add_drain_cpu(int cpu)\n" - " \tif (pagevec_count(pvec))\n" - " \t\tpagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);\n" - " \n" - "+\tpvec = &per_cpu(lru_deactivate_pvecs, cpu);\n" - "+\tif (pagevec_count(pvec))\n" - "+\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n" - "+\n" - " \tpvec = &per_cpu(lru_lazyfree_pvecs, cpu);\n" - " \tif (pagevec_count(pvec))\n" - " \t\tpagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);\n" - "@@ -622,6 +644,26 @@ void deactivate_file_page(struct page *page)\n" - " \t}\n" - " }\n" - " \n" - "+/*\n" - "+ * deactivate_page - deactivate a page\n" - "+ * @page: page to deactivate\n" - "+ *\n" - "+ * deactivate_page() moves @page to the inactive list if @page was on the active\n" - "+ * list and was not an unevictable page. This is done to accelerate the reclaim\n" - "+ * of @page.\n" - "+ */\n" - "+void deactivate_page(struct page *page)\n" - "+{\n" - "+\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n" - "+\t\tstruct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);\n" - "+\n" - "+\t\tget_page(page);\n" - "+\t\tif (!pagevec_add(pvec, page) || PageCompound(page))\n" - "+\t\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n" - "+\t\tput_cpu_var(lru_deactivate_pvecs);\n" - "+\t}\n" - "+}\n" - "+\n" - " /**\n" - " * mark_page_lazyfree - make an anon page lazyfree\n" - " * @page: page to deactivate\n" - "@@ -686,6 +728,7 @@ void lru_add_drain_all(void)\n" - " \t\tif (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||\n" - " \t\t pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||\n" - " \t\t pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||\n" - "+\t\t pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||\n" - " \t\t pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||\n" - " \t\t need_activate_page_drain(cpu)) {\n" - " \t\t\tINIT_WORK(work, lru_add_drain_per_cpu);\n" - "-- \n" - 2.21.0.1020.gf2820cf01a-goog + "s/end/next/ ?\n" + "> +\t\tptent = *pte;\n" + "> +\n" + "> +\t\tif (pte_none(ptent))\n" + "> +\t\t\tcontinue;\n" + "> +\n" + "> +\t\tif (!pte_present(ptent))\n" + "> +\t\t\tcontinue;\n" + "> +\n" + "> +\t\tpage = vm_normal_page(vma, addr, ptent);\n" + "> +\t\tif (!page)\n" + "> +\t\t\tcontinue;\n" + "> +\n" + "> +\t\tif (page_mapcount(page) > 1)\n" + "> +\t\t\tcontinue;\n" + "> +\n" + "> +\t\tptep_test_and_clear_young(vma, addr, pte);\n" + "> +\t\tdeactivate_page(page);\n" + "> +\t}\n" + "> +\n" + "> +\tpte_unmap_unlock(orig_pte, ptl);\n" + "> +\tcond_resched();\n" + "> +\n" + "> +\treturn 0;\n" + "> +}\n" + "> +\n" + "> +static long madvise_cool(struct vm_area_struct *vma,\n" + "> +\t\t\tunsigned long start_addr, unsigned long end_addr)\n" + "> +{\n" + "> +\tstruct mm_struct *mm = vma->vm_mm;\n" + "> +\tstruct mmu_gather tlb;\n" + "> +\n" + "> +\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n" + "> +\t\treturn -EINVAL;\n" + "\n" + "No service in case of VM_IO?\n" + "> +\n" + "> +\tlru_add_drain();\n" + "> +\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n" + "> +\tmadvise_cool_page_range(&tlb, vma, start_addr, end_addr);\n" + "> +\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n" + "> +\n" + "> +\treturn 0;\n" + "> +}\n" + "> +\n" + "> +/*\n" + "> + * deactivate_page - deactivate a page\n" + "> + * @page: page to deactivate\n" + "> + *\n" + "> + * deactivate_page() moves @page to the inactive list if @page was on the active\n" + "> + * list and was not an unevictable page. This is done to accelerate the reclaim\n" + "> + * of @page.\n" + "> + */\n" + "> +void deactivate_page(struct page *page)\n" + "> +{\n" + "> +\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n" + "> +\t\tstruct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);\n" + "> +\n" + "> +\t\tget_page(page);\n" + "\n" + "A line of comment seems needed for pinning the page.\n" + "\n" + "> +\t\tif (!pagevec_add(pvec, page) || PageCompound(page))\n" + "> +\t\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n" + "> +\t\tput_cpu_var(lru_deactivate_pvecs);\n" + "> +\t}\n" + "> +}\n" + "> +\n" + "\n" + "--\n" + Hillf -76714939b2b300d19638111661c4e1e3f756b169a376ced66bb0a12185ab9e8f +3f726f038c347818f2b246f151934fc80ce11befed99d28ed4eadc3835104774
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.