All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20190520035254.57579-4-minchan@kernel.org>

diff --git a/a/1.txt b/N1/1.txt
index 124ba0e..f48cfe2 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1,292 +1,73 @@
-When a process expects no accesses to a certain memory range
-for a long time, it could hint kernel that the pages can be
-reclaimed instantly but data should be preserved for future use.
-This could reduce workingset eviction so it ends up increasing
-performance.
 
-This patch introduces the new MADV_COLD hint to madvise(2)
-syscall. MADV_COLD can be used by a process to mark a memory range
-as not expected to be used for a long time. The hint can help
-kernel in deciding which pages to evict proactively.
+On Mon, 20 May 2019 12:52:50 +0900 Minchan Kim wrote:
+> +unsigned long reclaim_pages(struct list_head *page_list)
+> +{
+> +	int nid = -1;
+> +	unsigned long nr_isolated[2] = {0, };
+> +	unsigned long nr_reclaimed = 0;
+> +	LIST_HEAD(node_page_list);
+> +	struct reclaim_stat dummy_stat;
+> +	struct scan_control sc = {
+> +		.gfp_mask = GFP_KERNEL,
+> +		.priority = DEF_PRIORITY,
+> +		.may_writepage = 1,
+> +		.may_unmap = 1,
+> +		.may_swap = 1,
+> +	};
+> +
+> +	while (!list_empty(page_list)) {
+> +		struct page *page;
+> +
+> +		page = lru_to_page(page_list);
+> +		list_del(&page->lru);
+> +
+> +		if (nid == -1) {
+> +			nid = page_to_nid(page);
+> +			INIT_LIST_HEAD(&node_page_list);
+> +			nr_isolated[0] = nr_isolated[1] = 0;
+> +		}
+> +
+> +		if (nid == page_to_nid(page)) {
+> +			list_add(&page->lru, &node_page_list);
+> +			nr_isolated[!!page_is_file_cache(page)] +=
+> +						hpage_nr_pages(page);
+> +			continue;
+> +		}
+> +
+Now, page's node != nid and any page on the node_page_list has
+node == nid. 
+> +		nid = page_to_nid(page);
 
-Internally, it works via reclaiming memory in process context
-the syscall is called. If the page is dirty but backing storage
-is not synchronous device, the written page will be rotate back
-into LRU's tail once the write is done so they will reclaim easily
-when memory pressure happens. If backing storage is
-synchrnous device(e.g., zram), hte page will be reclaimed instantly.
+After updating nid, we get the node id of the isolated pages lost.
 
-Signed-off-by: Minchan Kim <minchan@kernel.org>
----
- include/linux/swap.h                   |   1 +
- include/uapi/asm-generic/mman-common.h |   1 +
- mm/madvise.c                           | 123 +++++++++++++++++++++++++
- mm/vmscan.c                            |  74 +++++++++++++++
- 4 files changed, 199 insertions(+)
+> +
+> +		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,
+> +					nr_isolated[0]);
+> +		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,
+> +					nr_isolated[1]);
+> +		nr_reclaimed += shrink_page_list(&node_page_list,
+> +				NODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,
 
-diff --git a/include/linux/swap.h b/include/linux/swap.h
-index 64795abea003..7f32a948fc6a 100644
---- a/include/linux/swap.h
-+++ b/include/linux/swap.h
-@@ -365,6 +365,7 @@ extern int vm_swappiness;
- extern int remove_mapping(struct address_space *mapping, struct page *page);
- extern unsigned long vm_total_pages;
- 
-+extern unsigned long reclaim_pages(struct list_head *page_list);
- #ifdef CONFIG_NUMA
- extern int node_reclaim_mode;
- extern int sysctl_min_unmapped_ratio;
-diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
-index f7a4a5d4b642..b9b51eeb8e1a 100644
---- a/include/uapi/asm-generic/mman-common.h
-+++ b/include/uapi/asm-generic/mman-common.h
-@@ -43,6 +43,7 @@
- #define MADV_WILLNEED	3		/* will need these pages */
- #define MADV_DONTNEED	4		/* don't need these pages */
- #define MADV_COOL	5		/* deactivatie these pages */
-+#define MADV_COLD	6		/* reclaim these pages */
- 
- /* common parameters: try to keep these consistent across architectures */
- #define MADV_FREE	8		/* free pages only if memory pressure */
-diff --git a/mm/madvise.c b/mm/madvise.c
-index c05817fb570d..9a6698b56845 100644
---- a/mm/madvise.c
-+++ b/mm/madvise.c
-@@ -42,6 +42,7 @@ static int madvise_need_mmap_write(int behavior)
- 	case MADV_WILLNEED:
- 	case MADV_DONTNEED:
- 	case MADV_COOL:
-+	case MADV_COLD:
- 	case MADV_FREE:
- 		return 0;
- 	default:
-@@ -416,6 +417,125 @@ static long madvise_cool(struct vm_area_struct *vma,
- 	return 0;
- }
- 
-+static int madvise_cold_pte_range(pmd_t *pmd, unsigned long addr,
-+				unsigned long end, struct mm_walk *walk)
-+{
-+	pte_t *orig_pte, *pte, ptent;
-+	spinlock_t *ptl;
-+	LIST_HEAD(page_list);
-+	struct page *page;
-+	int isolated = 0;
-+	struct vm_area_struct *vma = walk->vma;
-+	unsigned long next;
-+
-+	next = pmd_addr_end(addr, end);
-+	if (pmd_trans_huge(*pmd)) {
-+		spinlock_t *ptl;
-+
-+		ptl = pmd_trans_huge_lock(pmd, vma);
-+		if (!ptl)
-+			return 0;
-+
-+		if (is_huge_zero_pmd(*pmd))
-+			goto huge_unlock;
-+
-+		page = pmd_page(*pmd);
-+		if (page_mapcount(page) > 1)
-+			goto huge_unlock;
-+
-+		if (next - addr != HPAGE_PMD_SIZE) {
-+			int err;
-+
-+			get_page(page);
-+			spin_unlock(ptl);
-+			lock_page(page);
-+			err = split_huge_page(page);
-+			unlock_page(page);
-+			put_page(page);
-+			if (!err)
-+				goto regular_page;
-+			return 0;
-+		}
-+
-+		if (isolate_lru_page(page))
-+			goto huge_unlock;
-+
-+		list_add(&page->lru, &page_list);
-+huge_unlock:
-+		spin_unlock(ptl);
-+		reclaim_pages(&page_list);
-+		return 0;
-+	}
-+
-+	if (pmd_trans_unstable(pmd))
-+		return 0;
-+regular_page:
-+	orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-+	for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
-+		ptent = *pte;
-+		if (!pte_present(ptent))
-+			continue;
-+
-+		page = vm_normal_page(vma, addr, ptent);
-+		if (!page)
-+			continue;
-+
-+		if (page_mapcount(page) > 1)
-+			continue;
-+
-+		if (isolate_lru_page(page))
-+			continue;
-+
-+		isolated++;
-+		list_add(&page->lru, &page_list);
-+		if (isolated >= SWAP_CLUSTER_MAX) {
-+			pte_unmap_unlock(orig_pte, ptl);
-+			reclaim_pages(&page_list);
-+			isolated = 0;
-+			pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-+			orig_pte = pte;
-+		}
-+	}
-+
-+	pte_unmap_unlock(orig_pte, ptl);
-+	reclaim_pages(&page_list);
-+	cond_resched();
-+
-+	return 0;
-+}
-+
-+static void madvise_cold_page_range(struct mmu_gather *tlb,
-+			     struct vm_area_struct *vma,
-+			     unsigned long addr, unsigned long end)
-+{
-+	struct mm_walk warm_walk = {
-+		.pmd_entry = madvise_cold_pte_range,
-+		.mm = vma->vm_mm,
-+	};
-+
-+	tlb_start_vma(tlb, vma);
-+	walk_page_range(addr, end, &warm_walk);
-+	tlb_end_vma(tlb, vma);
-+}
-+
-+
-+static long madvise_cold(struct vm_area_struct *vma,
-+			unsigned long start_addr, unsigned long end_addr)
-+{
-+	struct mm_struct *mm = vma->vm_mm;
-+	struct mmu_gather tlb;
-+
-+	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
-+		return -EINVAL;
-+
-+	lru_add_drain();
-+	tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
-+	madvise_cold_page_range(&tlb, vma, start_addr, end_addr);
-+	tlb_finish_mmu(&tlb, start_addr, end_addr);
-+
-+	return 0;
-+}
-+
- static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
- 				unsigned long end, struct mm_walk *walk)
- 
-@@ -806,6 +926,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
- 		return madvise_willneed(vma, prev, start, end);
- 	case MADV_COOL:
- 		return madvise_cool(vma, start, end);
-+	case MADV_COLD:
-+		return madvise_cold(vma, start, end);
- 	case MADV_FREE:
- 	case MADV_DONTNEED:
- 		return madvise_dontneed_free(vma, prev, start, end, behavior);
-@@ -828,6 +950,7 @@ madvise_behavior_valid(int behavior)
- 	case MADV_DONTNEED:
- 	case MADV_FREE:
- 	case MADV_COOL:
-+	case MADV_COLD:
- #ifdef CONFIG_KSM
- 	case MADV_MERGEABLE:
- 	case MADV_UNMERGEABLE:
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index a28e5d17b495..1701b31f70a8 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -2096,6 +2096,80 @@ static void shrink_active_list(unsigned long nr_to_scan,
- 			nr_deactivate, nr_rotated, sc->priority, file);
- }
- 
-+unsigned long reclaim_pages(struct list_head *page_list)
-+{
-+	int nid = -1;
-+	unsigned long nr_isolated[2] = {0, };
-+	unsigned long nr_reclaimed = 0;
-+	LIST_HEAD(node_page_list);
-+	struct reclaim_stat dummy_stat;
-+	struct scan_control sc = {
-+		.gfp_mask = GFP_KERNEL,
-+		.priority = DEF_PRIORITY,
-+		.may_writepage = 1,
-+		.may_unmap = 1,
-+		.may_swap = 1,
-+	};
-+
-+	while (!list_empty(page_list)) {
-+		struct page *page;
-+
-+		page = lru_to_page(page_list);
-+		list_del(&page->lru);
-+
-+		if (nid == -1) {
-+			nid = page_to_nid(page);
-+			INIT_LIST_HEAD(&node_page_list);
-+			nr_isolated[0] = nr_isolated[1] = 0;
-+		}
-+
-+		if (nid == page_to_nid(page)) {
-+			list_add(&page->lru, &node_page_list);
-+			nr_isolated[!!page_is_file_cache(page)] +=
-+						hpage_nr_pages(page);
-+			continue;
-+		}
-+
-+		nid = page_to_nid(page);
-+
-+		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,
-+					nr_isolated[0]);
-+		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,
-+					nr_isolated[1]);
-+		nr_reclaimed += shrink_page_list(&node_page_list,
-+				NODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,
-+				&dummy_stat, true);
-+		while (!list_empty(&node_page_list)) {
-+			struct page *page = lru_to_page(page_list);
-+
-+			list_del(&page->lru);
-+			putback_lru_page(page);
-+		}
-+		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,
-+					-nr_isolated[0]);
-+		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,
-+					-nr_isolated[1]);
-+		nr_isolated[0] = nr_isolated[1] = 0;
-+		INIT_LIST_HEAD(&node_page_list);
-+	}
-+
-+	if (!list_empty(&node_page_list)) {
-+		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,
-+					nr_isolated[0]);
-+		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,
-+					nr_isolated[1]);
-+		nr_reclaimed += shrink_page_list(&node_page_list,
-+				NODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,
-+				&dummy_stat, true);
-+		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,
-+					-nr_isolated[0]);
-+		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,
-+					-nr_isolated[1]);
-+	}
-+
-+	return nr_reclaimed;
-+}
-+
- /*
-  * The inactive anon list should be small enough that the VM never has
-  * to do too much work.
--- 
-2.21.0.1020.gf2820cf01a-goog
+And nid no longer matches the node of the pages to be shrunk.
+
+> +				&dummy_stat, true);
+> +		while (!list_empty(&node_page_list)) {
+> +			struct page *page = lru_to_page(page_list);
+
+Non-empty node_page_list will never become empty if pages are deleted
+only from the page_list.
+> +
+> +			list_del(&page->lru);
+> +			putback_lru_page(page);
+> +		}
+> +		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,
+> +					-nr_isolated[0]);
+> +		mod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,
+> +					-nr_isolated[1]);
+> +		nr_isolated[0] = nr_isolated[1] = 0;
+> +		INIT_LIST_HEAD(&node_page_list);
+> +	}
+> +
+
+BR
+Hillf
diff --git a/a/content_digest b/N1/content_digest
index af385d2..55aab37 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -1,9 +1,10 @@
  "ref\020190520035254.57579-1-minchan@kernel.org\0"
- "From\0Minchan Kim <minchan@kernel.org>\0"
- "Subject\0[RFC 3/7] mm: introduce MADV_COLD\0"
- "Date\0Mon, 20 May 2019 12:52:50 +0900\0"
- "To\0Andrew Morton <akpm@linux-foundation.org>\0"
- "Cc\0LKML <linux-kernel@vger.kernel.org>"
+ "From\0Hillf Danton <hdanton@sina.com>\0"
+ "Subject\0Re: [RFC 3/7] mm: introduce MADV_COLD\0"
+ "Date\0Tue, 28 May 2019 22:54:32 +0800\0"
+ "To\0Minchan Kim <minchan@kernel.org>\0"
+ "Cc\0Andrew Morton <akpm@linux-foundation.org>"
+  LKML <linux-kernel@vger.kernel.org>
   linux-mm <linux-mm@kvack.org>
   Michal Hocko <mhocko@suse.com>
   Johannes Weiner <hannes@cmpxchg.org>
@@ -13,301 +14,81 @@
   Daniel Colascione <dancol@google.com>
   Shakeel Butt <shakeelb@google.com>
   Sonny Rao <sonnyrao@google.com>
-  Brian Geffon <bgeffon@google.com>
- " Minchan Kim <minchan@kernel.org>\0"
+ " Brian Geffon <bgeffon@google.com>\0"
  "\00:1\0"
  "b\0"
- "When a process expects no accesses to a certain memory range\n"
- "for a long time, it could hint kernel that the pages can be\n"
- "reclaimed instantly but data should be preserved for future use.\n"
- "This could reduce workingset eviction so it ends up increasing\n"
- "performance.\n"
  "\n"
- "This patch introduces the new MADV_COLD hint to madvise(2)\n"
- "syscall. MADV_COLD can be used by a process to mark a memory range\n"
- "as not expected to be used for a long time. The hint can help\n"
- "kernel in deciding which pages to evict proactively.\n"
+ "On Mon, 20 May 2019 12:52:50 +0900 Minchan Kim wrote:\n"
+ "> +unsigned long reclaim_pages(struct list_head *page_list)\n"
+ "> +{\n"
+ "> +\tint nid = -1;\n"
+ "> +\tunsigned long nr_isolated[2] = {0, };\n"
+ "> +\tunsigned long nr_reclaimed = 0;\n"
+ "> +\tLIST_HEAD(node_page_list);\n"
+ "> +\tstruct reclaim_stat dummy_stat;\n"
+ "> +\tstruct scan_control sc = {\n"
+ "> +\t\t.gfp_mask = GFP_KERNEL,\n"
+ "> +\t\t.priority = DEF_PRIORITY,\n"
+ "> +\t\t.may_writepage = 1,\n"
+ "> +\t\t.may_unmap = 1,\n"
+ "> +\t\t.may_swap = 1,\n"
+ "> +\t};\n"
+ "> +\n"
+ "> +\twhile (!list_empty(page_list)) {\n"
+ "> +\t\tstruct page *page;\n"
+ "> +\n"
+ "> +\t\tpage = lru_to_page(page_list);\n"
+ "> +\t\tlist_del(&page->lru);\n"
+ "> +\n"
+ "> +\t\tif (nid == -1) {\n"
+ "> +\t\t\tnid = page_to_nid(page);\n"
+ "> +\t\t\tINIT_LIST_HEAD(&node_page_list);\n"
+ "> +\t\t\tnr_isolated[0] = nr_isolated[1] = 0;\n"
+ "> +\t\t}\n"
+ "> +\n"
+ "> +\t\tif (nid == page_to_nid(page)) {\n"
+ "> +\t\t\tlist_add(&page->lru, &node_page_list);\n"
+ "> +\t\t\tnr_isolated[!!page_is_file_cache(page)] +=\n"
+ "> +\t\t\t\t\t\thpage_nr_pages(page);\n"
+ "> +\t\t\tcontinue;\n"
+ "> +\t\t}\n"
+ "> +\n"
+ "Now, page's node != nid and any page on the node_page_list has\n"
+ "node == nid. \n"
+ "> +\t\tnid = page_to_nid(page);\n"
  "\n"
- "Internally, it works via reclaiming memory in process context\n"
- "the syscall is called. If the page is dirty but backing storage\n"
- "is not synchronous device, the written page will be rotate back\n"
- "into LRU's tail once the write is done so they will reclaim easily\n"
- "when memory pressure happens. If backing storage is\n"
- "synchrnous device(e.g., zram), hte page will be reclaimed instantly.\n"
+ "After updating nid, we get the node id of the isolated pages lost.\n"
  "\n"
- "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
- "---\n"
- " include/linux/swap.h                   |   1 +\n"
- " include/uapi/asm-generic/mman-common.h |   1 +\n"
- " mm/madvise.c                           | 123 +++++++++++++++++++++++++\n"
- " mm/vmscan.c                            |  74 +++++++++++++++\n"
- " 4 files changed, 199 insertions(+)\n"
+ "> +\n"
+ "> +\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n"
+ "> +\t\t\t\t\tnr_isolated[0]);\n"
+ "> +\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n"
+ "> +\t\t\t\t\tnr_isolated[1]);\n"
+ "> +\t\tnr_reclaimed += shrink_page_list(&node_page_list,\n"
+ "> +\t\t\t\tNODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,\n"
  "\n"
- "diff --git a/include/linux/swap.h b/include/linux/swap.h\n"
- "index 64795abea003..7f32a948fc6a 100644\n"
- "--- a/include/linux/swap.h\n"
- "+++ b/include/linux/swap.h\n"
- "@@ -365,6 +365,7 @@ extern int vm_swappiness;\n"
- " extern int remove_mapping(struct address_space *mapping, struct page *page);\n"
- " extern unsigned long vm_total_pages;\n"
- " \n"
- "+extern unsigned long reclaim_pages(struct list_head *page_list);\n"
- " #ifdef CONFIG_NUMA\n"
- " extern int node_reclaim_mode;\n"
- " extern int sysctl_min_unmapped_ratio;\n"
- "diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h\n"
- "index f7a4a5d4b642..b9b51eeb8e1a 100644\n"
- "--- a/include/uapi/asm-generic/mman-common.h\n"
- "+++ b/include/uapi/asm-generic/mman-common.h\n"
- "@@ -43,6 +43,7 @@\n"
- " #define MADV_WILLNEED\t3\t\t/* will need these pages */\n"
- " #define MADV_DONTNEED\t4\t\t/* don't need these pages */\n"
- " #define MADV_COOL\t5\t\t/* deactivatie these pages */\n"
- "+#define MADV_COLD\t6\t\t/* reclaim these pages */\n"
- " \n"
- " /* common parameters: try to keep these consistent across architectures */\n"
- " #define MADV_FREE\t8\t\t/* free pages only if memory pressure */\n"
- "diff --git a/mm/madvise.c b/mm/madvise.c\n"
- "index c05817fb570d..9a6698b56845 100644\n"
- "--- a/mm/madvise.c\n"
- "+++ b/mm/madvise.c\n"
- "@@ -42,6 +42,7 @@ static int madvise_need_mmap_write(int behavior)\n"
- " \tcase MADV_WILLNEED:\n"
- " \tcase MADV_DONTNEED:\n"
- " \tcase MADV_COOL:\n"
- "+\tcase MADV_COLD:\n"
- " \tcase MADV_FREE:\n"
- " \t\treturn 0;\n"
- " \tdefault:\n"
- "@@ -416,6 +417,125 @@ static long madvise_cool(struct vm_area_struct *vma,\n"
- " \treturn 0;\n"
- " }\n"
- " \n"
- "+static int madvise_cold_pte_range(pmd_t *pmd, unsigned long addr,\n"
- "+\t\t\t\tunsigned long end, struct mm_walk *walk)\n"
- "+{\n"
- "+\tpte_t *orig_pte, *pte, ptent;\n"
- "+\tspinlock_t *ptl;\n"
- "+\tLIST_HEAD(page_list);\n"
- "+\tstruct page *page;\n"
- "+\tint isolated = 0;\n"
- "+\tstruct vm_area_struct *vma = walk->vma;\n"
- "+\tunsigned long next;\n"
- "+\n"
- "+\tnext = pmd_addr_end(addr, end);\n"
- "+\tif (pmd_trans_huge(*pmd)) {\n"
- "+\t\tspinlock_t *ptl;\n"
- "+\n"
- "+\t\tptl = pmd_trans_huge_lock(pmd, vma);\n"
- "+\t\tif (!ptl)\n"
- "+\t\t\treturn 0;\n"
- "+\n"
- "+\t\tif (is_huge_zero_pmd(*pmd))\n"
- "+\t\t\tgoto huge_unlock;\n"
- "+\n"
- "+\t\tpage = pmd_page(*pmd);\n"
- "+\t\tif (page_mapcount(page) > 1)\n"
- "+\t\t\tgoto huge_unlock;\n"
- "+\n"
- "+\t\tif (next - addr != HPAGE_PMD_SIZE) {\n"
- "+\t\t\tint err;\n"
- "+\n"
- "+\t\t\tget_page(page);\n"
- "+\t\t\tspin_unlock(ptl);\n"
- "+\t\t\tlock_page(page);\n"
- "+\t\t\terr = split_huge_page(page);\n"
- "+\t\t\tunlock_page(page);\n"
- "+\t\t\tput_page(page);\n"
- "+\t\t\tif (!err)\n"
- "+\t\t\t\tgoto regular_page;\n"
- "+\t\t\treturn 0;\n"
- "+\t\t}\n"
- "+\n"
- "+\t\tif (isolate_lru_page(page))\n"
- "+\t\t\tgoto huge_unlock;\n"
- "+\n"
- "+\t\tlist_add(&page->lru, &page_list);\n"
- "+huge_unlock:\n"
- "+\t\tspin_unlock(ptl);\n"
- "+\t\treclaim_pages(&page_list);\n"
- "+\t\treturn 0;\n"
- "+\t}\n"
- "+\n"
- "+\tif (pmd_trans_unstable(pmd))\n"
- "+\t\treturn 0;\n"
- "+regular_page:\n"
- "+\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n"
- "+\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n"
- "+\t\tptent = *pte;\n"
- "+\t\tif (!pte_present(ptent))\n"
- "+\t\t\tcontinue;\n"
- "+\n"
- "+\t\tpage = vm_normal_page(vma, addr, ptent);\n"
- "+\t\tif (!page)\n"
- "+\t\t\tcontinue;\n"
- "+\n"
- "+\t\tif (page_mapcount(page) > 1)\n"
- "+\t\t\tcontinue;\n"
- "+\n"
- "+\t\tif (isolate_lru_page(page))\n"
- "+\t\t\tcontinue;\n"
- "+\n"
- "+\t\tisolated++;\n"
- "+\t\tlist_add(&page->lru, &page_list);\n"
- "+\t\tif (isolated >= SWAP_CLUSTER_MAX) {\n"
- "+\t\t\tpte_unmap_unlock(orig_pte, ptl);\n"
- "+\t\t\treclaim_pages(&page_list);\n"
- "+\t\t\tisolated = 0;\n"
- "+\t\t\tpte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n"
- "+\t\t\torig_pte = pte;\n"
- "+\t\t}\n"
- "+\t}\n"
- "+\n"
- "+\tpte_unmap_unlock(orig_pte, ptl);\n"
- "+\treclaim_pages(&page_list);\n"
- "+\tcond_resched();\n"
- "+\n"
- "+\treturn 0;\n"
- "+}\n"
- "+\n"
- "+static void madvise_cold_page_range(struct mmu_gather *tlb,\n"
- "+\t\t\t     struct vm_area_struct *vma,\n"
- "+\t\t\t     unsigned long addr, unsigned long end)\n"
- "+{\n"
- "+\tstruct mm_walk warm_walk = {\n"
- "+\t\t.pmd_entry = madvise_cold_pte_range,\n"
- "+\t\t.mm = vma->vm_mm,\n"
- "+\t};\n"
- "+\n"
- "+\ttlb_start_vma(tlb, vma);\n"
- "+\twalk_page_range(addr, end, &warm_walk);\n"
- "+\ttlb_end_vma(tlb, vma);\n"
- "+}\n"
- "+\n"
- "+\n"
- "+static long madvise_cold(struct vm_area_struct *vma,\n"
- "+\t\t\tunsigned long start_addr, unsigned long end_addr)\n"
- "+{\n"
- "+\tstruct mm_struct *mm = vma->vm_mm;\n"
- "+\tstruct mmu_gather tlb;\n"
- "+\n"
- "+\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n"
- "+\t\treturn -EINVAL;\n"
- "+\n"
- "+\tlru_add_drain();\n"
- "+\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n"
- "+\tmadvise_cold_page_range(&tlb, vma, start_addr, end_addr);\n"
- "+\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n"
- "+\n"
- "+\treturn 0;\n"
- "+}\n"
- "+\n"
- " static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,\n"
- " \t\t\t\tunsigned long end, struct mm_walk *walk)\n"
- " \n"
- "@@ -806,6 +926,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,\n"
- " \t\treturn madvise_willneed(vma, prev, start, end);\n"
- " \tcase MADV_COOL:\n"
- " \t\treturn madvise_cool(vma, start, end);\n"
- "+\tcase MADV_COLD:\n"
- "+\t\treturn madvise_cold(vma, start, end);\n"
- " \tcase MADV_FREE:\n"
- " \tcase MADV_DONTNEED:\n"
- " \t\treturn madvise_dontneed_free(vma, prev, start, end, behavior);\n"
- "@@ -828,6 +950,7 @@ madvise_behavior_valid(int behavior)\n"
- " \tcase MADV_DONTNEED:\n"
- " \tcase MADV_FREE:\n"
- " \tcase MADV_COOL:\n"
- "+\tcase MADV_COLD:\n"
- " #ifdef CONFIG_KSM\n"
- " \tcase MADV_MERGEABLE:\n"
- " \tcase MADV_UNMERGEABLE:\n"
- "diff --git a/mm/vmscan.c b/mm/vmscan.c\n"
- "index a28e5d17b495..1701b31f70a8 100644\n"
- "--- a/mm/vmscan.c\n"
- "+++ b/mm/vmscan.c\n"
- "@@ -2096,6 +2096,80 @@ static void shrink_active_list(unsigned long nr_to_scan,\n"
- " \t\t\tnr_deactivate, nr_rotated, sc->priority, file);\n"
- " }\n"
- " \n"
- "+unsigned long reclaim_pages(struct list_head *page_list)\n"
- "+{\n"
- "+\tint nid = -1;\n"
- "+\tunsigned long nr_isolated[2] = {0, };\n"
- "+\tunsigned long nr_reclaimed = 0;\n"
- "+\tLIST_HEAD(node_page_list);\n"
- "+\tstruct reclaim_stat dummy_stat;\n"
- "+\tstruct scan_control sc = {\n"
- "+\t\t.gfp_mask = GFP_KERNEL,\n"
- "+\t\t.priority = DEF_PRIORITY,\n"
- "+\t\t.may_writepage = 1,\n"
- "+\t\t.may_unmap = 1,\n"
- "+\t\t.may_swap = 1,\n"
- "+\t};\n"
- "+\n"
- "+\twhile (!list_empty(page_list)) {\n"
- "+\t\tstruct page *page;\n"
- "+\n"
- "+\t\tpage = lru_to_page(page_list);\n"
- "+\t\tlist_del(&page->lru);\n"
- "+\n"
- "+\t\tif (nid == -1) {\n"
- "+\t\t\tnid = page_to_nid(page);\n"
- "+\t\t\tINIT_LIST_HEAD(&node_page_list);\n"
- "+\t\t\tnr_isolated[0] = nr_isolated[1] = 0;\n"
- "+\t\t}\n"
- "+\n"
- "+\t\tif (nid == page_to_nid(page)) {\n"
- "+\t\t\tlist_add(&page->lru, &node_page_list);\n"
- "+\t\t\tnr_isolated[!!page_is_file_cache(page)] +=\n"
- "+\t\t\t\t\t\thpage_nr_pages(page);\n"
- "+\t\t\tcontinue;\n"
- "+\t\t}\n"
- "+\n"
- "+\t\tnid = page_to_nid(page);\n"
- "+\n"
- "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n"
- "+\t\t\t\t\tnr_isolated[0]);\n"
- "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n"
- "+\t\t\t\t\tnr_isolated[1]);\n"
- "+\t\tnr_reclaimed += shrink_page_list(&node_page_list,\n"
- "+\t\t\t\tNODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,\n"
- "+\t\t\t\t&dummy_stat, true);\n"
- "+\t\twhile (!list_empty(&node_page_list)) {\n"
- "+\t\t\tstruct page *page = lru_to_page(page_list);\n"
- "+\n"
- "+\t\t\tlist_del(&page->lru);\n"
- "+\t\t\tputback_lru_page(page);\n"
- "+\t\t}\n"
- "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n"
- "+\t\t\t\t\t-nr_isolated[0]);\n"
- "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n"
- "+\t\t\t\t\t-nr_isolated[1]);\n"
- "+\t\tnr_isolated[0] = nr_isolated[1] = 0;\n"
- "+\t\tINIT_LIST_HEAD(&node_page_list);\n"
- "+\t}\n"
- "+\n"
- "+\tif (!list_empty(&node_page_list)) {\n"
- "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n"
- "+\t\t\t\t\tnr_isolated[0]);\n"
- "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n"
- "+\t\t\t\t\tnr_isolated[1]);\n"
- "+\t\tnr_reclaimed += shrink_page_list(&node_page_list,\n"
- "+\t\t\t\tNODE_DATA(nid), &sc, TTU_IGNORE_ACCESS,\n"
- "+\t\t\t\t&dummy_stat, true);\n"
- "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n"
- "+\t\t\t\t\t-nr_isolated[0]);\n"
- "+\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n"
- "+\t\t\t\t\t-nr_isolated[1]);\n"
- "+\t}\n"
- "+\n"
- "+\treturn nr_reclaimed;\n"
- "+}\n"
- "+\n"
- " /*\n"
- "  * The inactive anon list should be small enough that the VM never has\n"
- "  * to do too much work.\n"
- "-- \n"
- 2.21.0.1020.gf2820cf01a-goog
+ "And nid no longer matches the node of the pages to be shrunk.\n"
+ "\n"
+ "> +\t\t\t\t&dummy_stat, true);\n"
+ "> +\t\twhile (!list_empty(&node_page_list)) {\n"
+ "> +\t\t\tstruct page *page = lru_to_page(page_list);\n"
+ "\n"
+ "Non-empty node_page_list will never become empty if pages are deleted\n"
+ "only from the page_list.\n"
+ "> +\n"
+ "> +\t\t\tlist_del(&page->lru);\n"
+ "> +\t\t\tputback_lru_page(page);\n"
+ "> +\t\t}\n"
+ "> +\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_ANON,\n"
+ "> +\t\t\t\t\t-nr_isolated[0]);\n"
+ "> +\t\tmod_node_page_state(NODE_DATA(nid), NR_ISOLATED_FILE,\n"
+ "> +\t\t\t\t\t-nr_isolated[1]);\n"
+ "> +\t\tnr_isolated[0] = nr_isolated[1] = 0;\n"
+ "> +\t\tINIT_LIST_HEAD(&node_page_list);\n"
+ "> +\t}\n"
+ "> +\n"
+ "\n"
+ "BR\n"
+ Hillf
 
-ef6c3ab5184e3d813fc1368ded18e60ace39e0064c4d440f2fafd550597c3fc7
+46d85159f632b334bd012fa75453557df3abfe0e1b3edaa3d1e302b71079c202

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.