diff for duplicates of <20190520035254.57579-4-minchan@kernel.org> diff --git a/a/1.txt b/N1/1.txt index 124ba0e..f48cfe2 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,292 +1,73 @@ -When a process expects no accesses to a certain memory range -for a long time, it could hint kernel that the pages can be -reclaimed instantly but data should be preserved for future use. -This could reduce workingset eviction so it ends up increasing -performance. -This patch introduces the new MADV_COLD hint to madvise(2) -syscall. MADV_COLD can be used by a process to mark a memory range -as not expected to be used for a long time. The hint can help -kernel in deciding which pages to evict proactively. +On Mon, 20 May 2019 12:52:50 +0900 Minchan Kim wrote: +> +unsigned long reclaim_pages(struct list_head *page_list) +> +{ +> + int nid = -1; +> + unsigned long nr_isolated[2] = {0, }; +> + unsigned long nr_reclaimed = 0; +> + LIST_HEAD(node_page_list); +> + struct reclaim_stat dummy_stat; +> + struct scan_control sc = { +> + .gfp_mask = GFP_KERNEL, +> + .priority = DEF_PRIORITY, +> + .may_writepage = 1, +> + .may_unmap = 1, +> + .may_swap = 1, +> + }; +> + +> + while (!list_empty(page_list)) { +> + struct page *page; +> + +> + page = lru_to_page(page_list); +> + list_del(&page->lru); +> + +> + if (nid == -1) { +> + nid = page_to_nid(page); +> + INIT_LIST_HEAD(&node_page_list); +> + nr_isolated[0] = nr_isolated[1] = 0; +> + } +> + +> + if (nid == page_to_nid(page)) { +> + list_add(&page->lru, &node_page_list); +> + nr_isolated[!!page_is_file_cache(page)] += +> + hpage_nr_pages(page); +> + continue; +> + } +> + +Now, page's node != nid and any page on the node_page_list has +node == nid. +> + nid = page_to_nid(page); -Internally, it works via reclaiming memory in process context -the syscall is called. If the page is dirty but backing storage -is not synchronous device, the written page will be rotate back -into LRU's tail once the write is done so they will reclaim easily -when memory pressure happens. If backing storage is -synchrnous device(e.g., zram), hte page will be reclaimed instantly. +After updating nid, we get the node id of the isolated pages lost. -Signed-off-by: Minchan Kim <minchan@kernel.org> ---- - include/linux/swap.h | 1 + - include/uapi/asm-generic/mman-common.h | 1 + - mm/madvise.c | 123 +++++++++++++++++++++++++ - mm/vmscan.c | 74 +++++++++++++++ - 4 files changed, 199 insertions(+) +> + +> + mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON, +> + nr_isolated[0]); +> + mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE, +> + nr_isolated[1]); +> + nr_reclaimed += shrink_page_list(&node_page_list, +> + NODE_DATA(nid), &sc, TTU_IGNORE_ACCESS, -diff --git a/include/linux/swap.h b/include/linux/swap.h -index 64795abea003..7f32a948fc6a 100644 ---- a/include/linux/swap.h -+++ b/include/linux/swap.h -@@ -365,6 +365,7 @@ extern int vm_swappiness; - extern int remove_mapping(struct address_space *mapping, struct page *page); - extern unsigned long vm_total_pages; - -+extern unsigned long reclaim_pages(struct list_head *page_list); - #ifdef CONFIG_NUMA - extern int node_reclaim_mode; - extern int sysctl_min_unmapped_ratio; -diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h -index f7a4a5d4b642..b9b51eeb8e1a 100644 ---- a/include/uapi/asm-generic/mman-common.h -+++ b/include/uapi/asm-generic/mman-common.h -@@ -43,6 +43,7 @@ - #define MADV_WILLNEED 3 /* will need these pages */ - #define MADV_DONTNEED 4 /* don't need these pages */ - #define MADV_COOL 5 /* deactivatie these pages */ -+#define MADV_COLD 6 /* reclaim these pages */ - - /* common parameters: try to keep these consistent across architectures */ - #define MADV_FREE 8 /* free pages only if memory pressure */ -diff --git a/mm/madvise.c b/mm/madvise.c -index c05817fb570d..9a6698b56845 100644 ---- a/mm/madvise.c -+++ b/mm/madvise.c -@@ -42,6 +42,7 @@ static int madvise_need_mmap_write(int behavior) - case MADV_WILLNEED: - case MADV_DONTNEED: - case MADV_COOL: -+ case MADV_COLD: - case MADV_FREE: - return 0; - default: -@@ -416,6 +417,125 @@ static long madvise_cool(struct vm_area_struct *vma, - return 0; - } - -+static int madvise_cold_pte_range(pmd_t *pmd, unsigned long addr, -+ unsigned long end, struct mm_walk *walk) -+{ -+ pte_t *orig_pte, *pte, ptent; -+ spinlock_t *ptl; -+ LIST_HEAD(page_list); -+ struct page *page; -+ int isolated = 0; -+ struct vm_area_struct *vma = walk->vma; -+ unsigned long next; -+ -+ next = pmd_addr_end(addr, end); -+ if (pmd_trans_huge(*pmd)) { -+ spinlock_t *ptl; -+ -+ ptl = pmd_trans_huge_lock(pmd, vma); -+ if (!ptl) -+ return 0; -+ -+ if (is_huge_zero_pmd(*pmd)) -+ goto huge_unlock; -+ -+ page = pmd_page(*pmd); -+ if (page_mapcount(page) > 1) -+ goto huge_unlock; -+ -+ if (next - addr != HPAGE_PMD_SIZE) { -+ int err; -+ -+ get_page(page); -+ spin_unlock(ptl); -+ lock_page(page); -+ err = split_huge_page(page); -+ unlock_page(page); -+ put_page(page); -+ if (!err) -+ goto regular_page; -+ return 0; -+ } -+ -+ if (isolate_lru_page(page)) -+ goto huge_unlock; -+ -+ list_add(&page->lru, &page_list); -+huge_unlock: -+ spin_unlock(ptl); -+ reclaim_pages(&page_list); -+ return 0; -+ } -+ -+ if (pmd_trans_unstable(pmd)) -+ return 0; -+regular_page: -+ orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); -+ for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) { -+ ptent = *pte; -+ if (!pte_present(ptent)) -+ continue; -+ -+ page = vm_normal_page(vma, addr, ptent); -+ if (!page) -+ continue; -+ -+ if (page_mapcount(page) > 1) -+ continue; -+ -+ if (isolate_lru_page(page)) -+ continue; -+ -+ isolated++; -+ list_add(&page->lru, &page_list); -+ if (isolated >= SWAP_CLUSTER_MAX) { -+ pte_unmap_unlock(orig_pte, ptl); -+ reclaim_pages(&page_list); -+ isolated = 0; -+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); -+ orig_pte = pte; -+ } -+ } -+ -+ pte_unmap_unlock(orig_pte, ptl); -+ reclaim_pages(&page_list); -+ cond_resched(); -+ -+ return 0; -+} -+ -+static void madvise_cold_page_range(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, -+ unsigned long addr, unsigned long end) -+{ -+ struct mm_walk warm_walk = { -+ .pmd_entry = madvise_cold_pte_range, -+ .mm = vma->vm_mm, -+ }; -+ -+ tlb_start_vma(tlb, vma); -+ walk_page_range(addr, end, &warm_walk); -+ tlb_end_vma(tlb, vma); -+} -+ -+ -+static long madvise_cold(struct vm_area_struct *vma, -+ unsigned long start_addr, unsigned long end_addr) -+{ -+ struct mm_struct *mm = vma->vm_mm; -+ struct mmu_gather tlb; -+ -+ if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) -+ return -EINVAL; -+ -+ lru_add_drain(); -+ tlb_gather_mmu(&tlb, mm, start_addr, end_addr); -+ madvise_cold_page_range(&tlb, vma, start_addr, end_addr); -+ tlb_finish_mmu(&tlb, start_addr, end_addr); -+ -+ return 0; -+} -+ - static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, struct mm_walk *walk) - -@@ -806,6 +926,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, - return madvise_willneed(vma, prev, start, end); - case MADV_COOL: - return madvise_cool(vma, start, end); -+ case MADV_COLD: -+ return madvise_cold(vma, start, end); - case MADV_FREE: - case MADV_DONTNEED: - return madvise_dontneed_free(vma, prev, start, end, behavior); -@@ -828,6 +950,7 @@ madvise_behavior_valid(int behavior) - case MADV_DONTNEED: - case MADV_FREE: - case MADV_COOL: -+ case MADV_COLD: - #ifdef CONFIG_KSM - case MADV_MERGEABLE: - case MADV_UNMERGEABLE: -diff --git a/mm/vmscan.c b/mm/vmscan.c -index a28e5d17b495..1701b31f70a8 100644 ---- a/mm/vmscan.c -+++ b/mm/vmscan.c -@@ -2096,6 +2096,80 @@ static void shrink_active_list(unsigned long nr_to_scan, - nr_deactivate, nr_rotated, sc->priority, file); - } - -+unsigned long reclaim_pages(struct list_head *page_list) -+{ -+ int nid = -1; -+ unsigned long nr_isolated[2] = {0, }; -+ unsigned long nr_reclaimed = 0; -+ LIST_HEAD(node_page_list); -+ struct reclaim_stat dummy_stat; -+ struct scan_control sc = { -+ .gfp_mask = GFP_KERNEL, -+ .priority = DEF_PRIORITY, -+ .may_writepage = 1, -+ .may_unmap = 1, -+ .may_swap = 1, -+ }; -+ -+ while (!list_empty(page_list)) { -+ struct page *page; -+ -+ page = lru_to_page(page_list); -+ list_del(&page->lru); -+ -+ if (nid == -1) { -+ nid = page_to_nid(page); -+ INIT_LIST_HEAD(&node_page_list); -+ nr_isolated[0] = nr_isolated[1] = 0; -+ } -+ -+ if (nid == page_to_nid(page)) { -+ list_add(&page->lru, &node_page_list); -+ nr_isolated[!!page_is_file_cache(page)] += -+ hpage_nr_pages(page); -+ continue; -+ } -+ -+ nid = page_to_nid(page); -+ -+ mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON, -+ nr_isolated[0]); -+ mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE, -+ nr_isolated[1]); -+ nr_reclaimed += shrink_page_list(&node_page_list, -+ NODE_DATA(nid), &sc, TTU_IGNORE_ACCESS, -+ &dummy_stat, true); -+ while (!list_empty(&node_page_list)) { -+ struct page *page = lru_to_page(page_list); -+ -+ list_del(&page->lru); -+ putback_lru_page(page); -+ } -+ mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON, -+ -nr_isolated[0]); -+ mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE, -+ -nr_isolated[1]); -+ nr_isolated[0] = nr_isolated[1] = 0; -+ INIT_LIST_HEAD(&node_page_list); -+ } -+ -+ if (!list_empty(&node_page_list)) { -+ mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON, -+ nr_isolated[0]); -+ mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE, -+ nr_isolated[1]); -+ nr_reclaimed += shrink_page_list(&node_page_list, -+ NODE_DATA(nid), &sc, TTU_IGNORE_ACCESS, -+ &dummy_stat, true); -+ mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON, -+ -nr_isolated[0]); -+ mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE, -+ -nr_isolated[1]); -+ } -+ -+ return nr_reclaimed; -+} -+ - /* - * The inactive anon list should be small enough that the VM never has - * to do too much work. --- -2.21.0.1020.gf2820cf01a-goog +And nid no longer matches the node of the pages to be shrunk. + +> + &dummy_stat, true); +> + while (!list_empty(&node_page_list)) { +> + struct page *page = lru_to_page(page_list); + +Non-empty node_page_list will never become empty if pages are deleted +only from the page_list. +> + +> + list_del(&page->lru); +> + putback_lru_page(page); +> + } +> + mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON, +> + -nr_isolated[0]); +> + mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE, +> + -nr_isolated[1]); +> + nr_isolated[0] = nr_isolated[1] = 0; +> + INIT_LIST_HEAD(&node_page_list); +> + } +> + + +BR +Hillf diff --git a/a/content_digest b/N1/content_digest index af385d2..55aab37 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -1,9 +1,10 @@ "ref\020190520035254.57579-1-minchan@kernel.org\0" - "From\0Minchan Kim <minchan@kernel.org>\0" - "Subject\0[RFC 3/7] mm: introduce MADV_COLD\0" - "Date\0Mon, 20 May 2019 12:52:50 +0900\0" - "To\0Andrew Morton <akpm@linux-foundation.org>\0" - "Cc\0LKML <linux-kernel@vger.kernel.org>" + "From\0Hillf Danton <hdanton@sina.com>\0" + "Subject\0Re: [RFC 3/7] mm: introduce MADV_COLD\0" + "Date\0Tue, 28 May 2019 22:54:32 +0800\0" + "To\0Minchan Kim <minchan@kernel.org>\0" + "Cc\0Andrew Morton <akpm@linux-foundation.org>" + LKML <linux-kernel@vger.kernel.org> linux-mm <linux-mm@kvack.org> Michal Hocko <mhocko@suse.com> Johannes Weiner <hannes@cmpxchg.org> @@ -13,301 +14,81 @@ Daniel Colascione <dancol@google.com> Shakeel Butt <shakeelb@google.com> Sonny Rao <sonnyrao@google.com> - Brian Geffon <bgeffon@google.com> - " Minchan Kim <minchan@kernel.org>\0" + " Brian Geffon <bgeffon@google.com>\0" "\00:1\0" "b\0" - "When a process expects no accesses to a certain memory range\n" - "for a long time, it could hint kernel that the pages can be\n" - "reclaimed instantly but data should be preserved for future use.\n" - "This could reduce workingset eviction so it ends up increasing\n" - "performance.\n" "\n" - "This patch introduces the new MADV_COLD hint to madvise(2)\n" - "syscall. MADV_COLD can be used by a process to mark a memory range\n" - "as not expected to be used for a long time. The hint can help\n" - "kernel in deciding which pages to evict proactively.\n" + "On Mon, 20 May 2019 12:52:50 +0900 Minchan Kim wrote:\n" + "> +unsigned long reclaim_pages(struct list_head *page_list)\n" + "> +{\n" + "> +\tint nid = -1;\n" + "> +\tunsigned long nr_isolated[2] = {0, };\n" + "> +\tunsigned long nr_reclaimed = 0;\n" + "> +\tLIST_HEAD(node_page_list);\n" + "> +\tstruct reclaim_stat dummy_stat;\n" + "> +\tstruct scan_control sc = {\n" + "> +\t\t.gfp_mask = GFP_KERNEL,\n" + "> +\t\t.priority = DEF_PRIORITY,\n" + "> +\t\t.may_writepage = 1,\n" + "> +\t\t.may_unmap = 1,\n" + "> +\t\t.may_swap = 1,\n" + "> +\t};\n" + "> +\n" + "> +\twhile (!list_empty(page_list)) {\n" + "> +\t\tstruct page *page;\n" + "> +\n" + "> +\t\tpage = lru_to_page(page_list);\n" + "> +\t\tlist_del(&page->lru);\n" + "> +\n" + "> +\t\tif (nid == -1) {\n" + "> +\t\t\tnid = page_to_nid(page);\n" + "> +\t\t\tINIT_LIST_HEAD(&node_page_list);\n" + "> +\t\t\tnr_isolated[0] = nr_isolated[1] = 0;\n" + "> +\t\t}\n" + "> +\n" + "> +\t\tif (nid == page_to_nid(page)) {\n" + "> +\t\t\tlist_add(&page->lru, &node_page_list);\n" + "> +\t\t\tnr_isolated[!!page_is_file_cache(page)] +=\n" + "> +\t\t\t\t\t\thpage_nr_pages(page);\n" + "> +\t\t\tcontinue;\n" + "> +\t\t}\n" + "> +\n" + "Now, page's node != nid and any page on the node_page_list has\n" + "node == nid. \n" + "> +\t\tnid = page_to_nid(page);\n" "\n" - "Internally, it works via reclaiming memory in process context\n" - "the syscall is called. If the page is dirty but backing storage\n" - "is not synchronous device, the written page will be rotate back\n" - "into LRU's tail once the write is done so they will reclaim easily\n" - "when memory pressure happens. If backing storage is\n" - "synchrnous device(e.g., zram), hte page will be reclaimed instantly.\n" + "After updating nid, we get the node id of the isolated pages lost.\n" "\n" - "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" - "---\n" - " include/linux/swap.h | 1 +\n" - " include/uapi/asm-generic/mman-common.h | 1 +\n" - " mm/madvise.c | 123 +++++++++++++++++++++++++\n" - " mm/vmscan.c | 74 +++++++++++++++\n" - " 4 files changed, 199 insertions(+)\n" + "> +\n" + "> +\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n" + "> +\t\t\t\t\tnr_isolated[0]);\n" + "> +\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n" + "> +\t\t\t\t\tnr_isolated[1]);\n" + "> +\t\tnr_reclaimed += shrink_page_list(&node_page_list,\n" + "> +\t\t\t\tNODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,\n" "\n" - "diff --git a/include/linux/swap.h b/include/linux/swap.h\n" - "index 64795abea003..7f32a948fc6a 100644\n" - "--- a/include/linux/swap.h\n" - "+++ b/include/linux/swap.h\n" - "@@ -365,6 +365,7 @@ extern int vm_swappiness;\n" - " extern int remove_mapping(struct address_space *mapping, struct page *page);\n" - " extern unsigned long vm_total_pages;\n" - " \n" - "+extern unsigned long reclaim_pages(struct list_head *page_list);\n" - " #ifdef CONFIG_NUMA\n" - " extern int node_reclaim_mode;\n" - " extern int sysctl_min_unmapped_ratio;\n" - "diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h\n" - "index f7a4a5d4b642..b9b51eeb8e1a 100644\n" - "--- a/include/uapi/asm-generic/mman-common.h\n" - "+++ b/include/uapi/asm-generic/mman-common.h\n" - "@@ -43,6 +43,7 @@\n" - " #define MADV_WILLNEED\t3\t\t/* will need these pages */\n" - " #define MADV_DONTNEED\t4\t\t/* don't need these pages */\n" - " #define MADV_COOL\t5\t\t/* deactivatie these pages */\n" - "+#define MADV_COLD\t6\t\t/* reclaim these pages */\n" - " \n" - " /* common parameters: try to keep these consistent across architectures */\n" - " #define MADV_FREE\t8\t\t/* free pages only if memory pressure */\n" - "diff --git a/mm/madvise.c b/mm/madvise.c\n" - "index c05817fb570d..9a6698b56845 100644\n" - "--- a/mm/madvise.c\n" - "+++ b/mm/madvise.c\n" - "@@ -42,6 +42,7 @@ static int madvise_need_mmap_write(int behavior)\n" - " \tcase MADV_WILLNEED:\n" - " \tcase MADV_DONTNEED:\n" - " \tcase MADV_COOL:\n" - "+\tcase MADV_COLD:\n" - " \tcase MADV_FREE:\n" - " \t\treturn 0;\n" - " \tdefault:\n" - "@@ -416,6 +417,125 @@ static long madvise_cool(struct vm_area_struct *vma,\n" - " \treturn 0;\n" - " }\n" - " \n" - "+static int madvise_cold_pte_range(pmd_t *pmd, unsigned long addr,\n" - "+\t\t\t\tunsigned long end, struct mm_walk *walk)\n" - "+{\n" - "+\tpte_t *orig_pte, *pte, ptent;\n" - "+\tspinlock_t *ptl;\n" - "+\tLIST_HEAD(page_list);\n" - "+\tstruct page *page;\n" - "+\tint isolated = 0;\n" - "+\tstruct vm_area_struct *vma = walk->vma;\n" - "+\tunsigned long next;\n" - "+\n" - "+\tnext = pmd_addr_end(addr, end);\n" - "+\tif (pmd_trans_huge(*pmd)) {\n" - "+\t\tspinlock_t *ptl;\n" - "+\n" - "+\t\tptl = pmd_trans_huge_lock(pmd, vma);\n" - "+\t\tif (!ptl)\n" - "+\t\t\treturn 0;\n" - "+\n" - "+\t\tif (is_huge_zero_pmd(*pmd))\n" - "+\t\t\tgoto huge_unlock;\n" - "+\n" - "+\t\tpage = pmd_page(*pmd);\n" - "+\t\tif (page_mapcount(page) > 1)\n" - "+\t\t\tgoto huge_unlock;\n" - "+\n" - "+\t\tif (next - addr != HPAGE_PMD_SIZE) {\n" - "+\t\t\tint err;\n" - "+\n" - "+\t\t\tget_page(page);\n" - "+\t\t\tspin_unlock(ptl);\n" - "+\t\t\tlock_page(page);\n" - "+\t\t\terr = split_huge_page(page);\n" - "+\t\t\tunlock_page(page);\n" - "+\t\t\tput_page(page);\n" - "+\t\t\tif (!err)\n" - "+\t\t\t\tgoto regular_page;\n" - "+\t\t\treturn 0;\n" - "+\t\t}\n" - "+\n" - "+\t\tif (isolate_lru_page(page))\n" - "+\t\t\tgoto huge_unlock;\n" - "+\n" - "+\t\tlist_add(&page->lru, &page_list);\n" - "+huge_unlock:\n" - "+\t\tspin_unlock(ptl);\n" - "+\t\treclaim_pages(&page_list);\n" - "+\t\treturn 0;\n" - "+\t}\n" - "+\n" - "+\tif (pmd_trans_unstable(pmd))\n" - "+\t\treturn 0;\n" - "+regular_page:\n" - "+\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n" - "+\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n" - "+\t\tptent = *pte;\n" - "+\t\tif (!pte_present(ptent))\n" - "+\t\t\tcontinue;\n" - "+\n" - "+\t\tpage = vm_normal_page(vma, addr, ptent);\n" - "+\t\tif (!page)\n" - "+\t\t\tcontinue;\n" - "+\n" - "+\t\tif (page_mapcount(page) > 1)\n" - "+\t\t\tcontinue;\n" - "+\n" - "+\t\tif (isolate_lru_page(page))\n" - "+\t\t\tcontinue;\n" - "+\n" - "+\t\tisolated++;\n" - "+\t\tlist_add(&page->lru, &page_list);\n" - "+\t\tif (isolated >= SWAP_CLUSTER_MAX) {\n" - "+\t\t\tpte_unmap_unlock(orig_pte, ptl);\n" - "+\t\t\treclaim_pages(&page_list);\n" - "+\t\t\tisolated = 0;\n" - "+\t\t\tpte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n" - "+\t\t\torig_pte = pte;\n" - "+\t\t}\n" - "+\t}\n" - "+\n" - "+\tpte_unmap_unlock(orig_pte, ptl);\n" - "+\treclaim_pages(&page_list);\n" - "+\tcond_resched();\n" - "+\n" - "+\treturn 0;\n" - "+}\n" - "+\n" - "+static void madvise_cold_page_range(struct mmu_gather *tlb,\n" - "+\t\t\t struct vm_area_struct *vma,\n" - "+\t\t\t unsigned long addr, unsigned long end)\n" - "+{\n" - "+\tstruct mm_walk warm_walk = {\n" - "+\t\t.pmd_entry = madvise_cold_pte_range,\n" - "+\t\t.mm = vma->vm_mm,\n" - "+\t};\n" - "+\n" - "+\ttlb_start_vma(tlb, vma);\n" - "+\twalk_page_range(addr, end, &warm_walk);\n" - "+\ttlb_end_vma(tlb, vma);\n" - "+}\n" - "+\n" - "+\n" - "+static long madvise_cold(struct vm_area_struct *vma,\n" - "+\t\t\tunsigned long start_addr, unsigned long end_addr)\n" - "+{\n" - "+\tstruct mm_struct *mm = vma->vm_mm;\n" - "+\tstruct mmu_gather tlb;\n" - "+\n" - "+\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n" - "+\t\treturn -EINVAL;\n" - "+\n" - "+\tlru_add_drain();\n" - "+\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n" - "+\tmadvise_cold_page_range(&tlb, vma, start_addr, end_addr);\n" - "+\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n" - "+\n" - "+\treturn 0;\n" - "+}\n" - "+\n" - " static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,\n" - " \t\t\t\tunsigned long end, struct mm_walk *walk)\n" - " \n" - "@@ -806,6 +926,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,\n" - " \t\treturn madvise_willneed(vma, prev, start, end);\n" - " \tcase MADV_COOL:\n" - " \t\treturn madvise_cool(vma, start, end);\n" - "+\tcase MADV_COLD:\n" - "+\t\treturn madvise_cold(vma, start, end);\n" - " \tcase MADV_FREE:\n" - " \tcase MADV_DONTNEED:\n" - " \t\treturn madvise_dontneed_free(vma, prev, start, end, behavior);\n" - "@@ -828,6 +950,7 @@ madvise_behavior_valid(int behavior)\n" - " \tcase MADV_DONTNEED:\n" - " \tcase MADV_FREE:\n" - " \tcase MADV_COOL:\n" - "+\tcase MADV_COLD:\n" - " #ifdef CONFIG_KSM\n" - " \tcase MADV_MERGEABLE:\n" - " \tcase MADV_UNMERGEABLE:\n" - "diff --git a/mm/vmscan.c b/mm/vmscan.c\n" - "index a28e5d17b495..1701b31f70a8 100644\n" - "--- a/mm/vmscan.c\n" - "+++ b/mm/vmscan.c\n" - "@@ -2096,6 +2096,80 @@ static void shrink_active_list(unsigned long nr_to_scan,\n" - " \t\t\tnr_deactivate, nr_rotated, sc->priority, file);\n" - " }\n" - " \n" - "+unsigned long reclaim_pages(struct list_head *page_list)\n" - "+{\n" - "+\tint nid = -1;\n" - "+\tunsigned long nr_isolated[2] = {0, };\n" - "+\tunsigned long nr_reclaimed = 0;\n" - "+\tLIST_HEAD(node_page_list);\n" - "+\tstruct reclaim_stat dummy_stat;\n" - "+\tstruct scan_control sc = {\n" - "+\t\t.gfp_mask = GFP_KERNEL,\n" - "+\t\t.priority = DEF_PRIORITY,\n" - "+\t\t.may_writepage = 1,\n" - "+\t\t.may_unmap = 1,\n" - "+\t\t.may_swap = 1,\n" - "+\t};\n" - "+\n" - "+\twhile (!list_empty(page_list)) {\n" - "+\t\tstruct page *page;\n" - "+\n" - "+\t\tpage = lru_to_page(page_list);\n" - "+\t\tlist_del(&page->lru);\n" - "+\n" - "+\t\tif (nid == -1) {\n" - "+\t\t\tnid = page_to_nid(page);\n" - "+\t\t\tINIT_LIST_HEAD(&node_page_list);\n" - "+\t\t\tnr_isolated[0] = nr_isolated[1] = 0;\n" - "+\t\t}\n" - "+\n" - "+\t\tif (nid == page_to_nid(page)) {\n" - "+\t\t\tlist_add(&page->lru, &node_page_list);\n" - "+\t\t\tnr_isolated[!!page_is_file_cache(page)] +=\n" - "+\t\t\t\t\t\thpage_nr_pages(page);\n" - "+\t\t\tcontinue;\n" - "+\t\t}\n" - "+\n" - "+\t\tnid = page_to_nid(page);\n" - "+\n" - "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n" - "+\t\t\t\t\tnr_isolated[0]);\n" - "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n" - "+\t\t\t\t\tnr_isolated[1]);\n" - "+\t\tnr_reclaimed += shrink_page_list(&node_page_list,\n" - "+\t\t\t\tNODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,\n" - "+\t\t\t\t&dummy_stat, true);\n" - "+\t\twhile (!list_empty(&node_page_list)) {\n" - "+\t\t\tstruct page *page = lru_to_page(page_list);\n" - "+\n" - "+\t\t\tlist_del(&page->lru);\n" - "+\t\t\tputback_lru_page(page);\n" - "+\t\t}\n" - "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n" - "+\t\t\t\t\t-nr_isolated[0]);\n" - "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n" - "+\t\t\t\t\t-nr_isolated[1]);\n" - "+\t\tnr_isolated[0] = nr_isolated[1] = 0;\n" - "+\t\tINIT_LIST_HEAD(&node_page_list);\n" - "+\t}\n" - "+\n" - "+\tif (!list_empty(&node_page_list)) {\n" - "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n" - "+\t\t\t\t\tnr_isolated[0]);\n" - "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n" - "+\t\t\t\t\tnr_isolated[1]);\n" - "+\t\tnr_reclaimed += shrink_page_list(&node_page_list,\n" - "+\t\t\t\tNODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,\n" - "+\t\t\t\t&dummy_stat, true);\n" - "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n" - "+\t\t\t\t\t-nr_isolated[0]);\n" - "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n" - "+\t\t\t\t\t-nr_isolated[1]);\n" - "+\t}\n" - "+\n" - "+\treturn nr_reclaimed;\n" - "+}\n" - "+\n" - " /*\n" - " * The inactive anon list should be small enough that the VM never has\n" - " * to do too much work.\n" - "-- \n" - 2.21.0.1020.gf2820cf01a-goog + "And nid no longer matches the node of the pages to be shrunk.\n" + "\n" + "> +\t\t\t\t&dummy_stat, true);\n" + "> +\t\twhile (!list_empty(&node_page_list)) {\n" + "> +\t\t\tstruct page *page = lru_to_page(page_list);\n" + "\n" + "Non-empty node_page_list will never become empty if pages are deleted\n" + "only from the page_list.\n" + "> +\n" + "> +\t\t\tlist_del(&page->lru);\n" + "> +\t\t\tputback_lru_page(page);\n" + "> +\t\t}\n" + "> +\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n" + "> +\t\t\t\t\t-nr_isolated[0]);\n" + "> +\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n" + "> +\t\t\t\t\t-nr_isolated[1]);\n" + "> +\t\tnr_isolated[0] = nr_isolated[1] = 0;\n" + "> +\t\tINIT_LIST_HEAD(&node_page_list);\n" + "> +\t}\n" + "> +\n" + "\n" + "BR\n" + Hillf -ef6c3ab5184e3d813fc1368ded18e60ace39e0064c4d440f2fafd550597c3fc7 +46d85159f632b334bd012fa75453557df3abfe0e1b3edaa3d1e302b71079c202
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.