All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20190520035254.57579-2-minchan@kernel.org>

diff --git a/a/1.txt b/N1/1.txt
index 12d2a8a..317b4ca 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1,338 +1,133 @@
-When a process expects no accesses to a certain memory range
-it could hint kernel that the pages can be reclaimed
-when memory pressure happens but data should be preserved
-for future use.  This could reduce workingset eviction so it
-ends up increasing performance.
 
-This patch introduces the new MADV_COOL hint to madvise(2)
-syscall. MADV_COOL can be used by a process to mark a memory range
-as not expected to be used in the near future. The hint can help
-kernel in deciding which pages to evict early during memory
-pressure.
+On Mon, 20 May 2019 12:52:48 +0900 Minchan Kim wrote:
+> +static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,
+> +				unsigned long end, struct mm_walk *walk)
+> +{
+> +	pte_t *orig_pte, *pte, ptent;
+> +	spinlock_t *ptl;
+> +	struct page *page;
+> +	struct vm_area_struct *vma = walk->vma;
+> +	unsigned long next;
+> +
+> +	next = pmd_addr_end(addr, end);
+> +	if (pmd_trans_huge(*pmd)) {
+> +		spinlock_t *ptl;
 
-Internally, it works via deactivating memory from active list to
-inactive's head so when the memory pressure happens, they will be
-reclaimed earlier than other active pages unless there is no
-access until the time.
+Seems not needed with another ptl declared above.
+> +
+> +		ptl = pmd_trans_huge_lock(pmd, vma);
+> +		if (!ptl)
+> +			return 0;
+> +
+> +		if (is_huge_zero_pmd(*pmd))
+> +			goto huge_unlock;
+> +
+> +		page = pmd_page(*pmd);
+> +		if (page_mapcount(page) > 1)
+> +			goto huge_unlock;
+> +
+> +		if (next - addr != HPAGE_PMD_SIZE) {
+> +			int err;
 
-* v1r2
- * use clear_page_young in deactivate_page - joelaf
+Alternately, we deactivate thp only if the address range from userspace
+is sane enough, in order to avoid complex works we have to do here.
+> +
+> +			get_page(page);
+> +			spin_unlock(ptl);
+> +			lock_page(page);
+> +			err = split_huge_page(page);
+> +			unlock_page(page);
+> +			put_page(page);
+> +			if (!err)
+> +				goto regular_page;
+> +			return 0;
+> +		}
+> +
+> +		pmdp_test_and_clear_young(vma, addr, pmd);
+> +		deactivate_page(page);
+> +huge_unlock:
+> +		spin_unlock(ptl);
+> +		return 0;
+> +	}
+> +
+> +	if (pmd_trans_unstable(pmd))
+> +		return 0;
+> +
+> +regular_page:
 
-* v1r1
- * Revise the description - surenb
- * Renaming from MADV_WARM to MADV_COOL - surenb
+Take a look at pending signal?
 
-Signed-off-by: Minchan Kim <minchan@kernel.org>
----
- include/linux/page-flags.h             |   1 +
- include/linux/page_idle.h              |  15 ++++
- include/linux/swap.h                   |   1 +
- include/uapi/asm-generic/mman-common.h |   1 +
- mm/madvise.c                           | 112 +++++++++++++++++++++++++
- mm/swap.c                              |  43 ++++++++++
- 6 files changed, 173 insertions(+)
+> +	orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+> +	for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
 
-diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
-index 9f8712a4b1a5..58b06654c8dd 100644
---- a/include/linux/page-flags.h
-+++ b/include/linux/page-flags.h
-@@ -424,6 +424,7 @@ static inline bool set_hwpoison_free_buddy_page(struct page *page)
- TESTPAGEFLAG(Young, young, PF_ANY)
- SETPAGEFLAG(Young, young, PF_ANY)
- TESTCLEARFLAG(Young, young, PF_ANY)
-+CLEARPAGEFLAG(Young, young, PF_ANY)
- PAGEFLAG(Idle, idle, PF_ANY)
- #endif
- 
-diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
-index 1e894d34bdce..f3f43b317150 100644
---- a/include/linux/page_idle.h
-+++ b/include/linux/page_idle.h
-@@ -19,6 +19,11 @@ static inline void set_page_young(struct page *page)
- 	SetPageYoung(page);
- }
- 
-+static inline void clear_page_young(struct page *page)
-+{
-+	ClearPageYoung(page);
-+}
-+
- static inline bool test_and_clear_page_young(struct page *page)
- {
- 	return TestClearPageYoung(page);
-@@ -65,6 +70,16 @@ static inline void set_page_young(struct page *page)
- 	set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
- }
- 
-+static void clear_page_young(struct page *page)
-+{
-+	struct page_ext *page_ext = lookup_page_ext(page);
-+
-+	if (unlikely(!page_ext))
-+		return;
-+
-+	clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
-+}
-+
- static inline bool test_and_clear_page_young(struct page *page)
- {
- 	struct page_ext *page_ext = lookup_page_ext(page);
-diff --git a/include/linux/swap.h b/include/linux/swap.h
-index 4bfb5c4ac108..64795abea003 100644
---- a/include/linux/swap.h
-+++ b/include/linux/swap.h
-@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu);
- extern void lru_add_drain_all(void);
- extern void rotate_reclaimable_page(struct page *page);
- extern void deactivate_file_page(struct page *page);
-+extern void deactivate_page(struct page *page);
- extern void mark_page_lazyfree(struct page *page);
- extern void swap_setup(void);
- 
-diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
-index abd238d0f7a4..f7a4a5d4b642 100644
---- a/include/uapi/asm-generic/mman-common.h
-+++ b/include/uapi/asm-generic/mman-common.h
-@@ -42,6 +42,7 @@
- #define MADV_SEQUENTIAL	2		/* expect sequential page references */
- #define MADV_WILLNEED	3		/* will need these pages */
- #define MADV_DONTNEED	4		/* don't need these pages */
-+#define MADV_COOL	5		/* deactivatie these pages */
- 
- /* common parameters: try to keep these consistent across architectures */
- #define MADV_FREE	8		/* free pages only if memory pressure */
-diff --git a/mm/madvise.c b/mm/madvise.c
-index 628022e674a7..c05817fb570d 100644
---- a/mm/madvise.c
-+++ b/mm/madvise.c
-@@ -8,6 +8,7 @@
- 
- #include <linux/mman.h>
- #include <linux/pagemap.h>
-+#include <linux/page_idle.h>
- #include <linux/syscalls.h>
- #include <linux/mempolicy.h>
- #include <linux/page-isolation.h>
-@@ -40,6 +41,7 @@ static int madvise_need_mmap_write(int behavior)
- 	case MADV_REMOVE:
- 	case MADV_WILLNEED:
- 	case MADV_DONTNEED:
-+	case MADV_COOL:
- 	case MADV_FREE:
- 		return 0;
- 	default:
-@@ -307,6 +309,113 @@ static long madvise_willneed(struct vm_area_struct *vma,
- 	return 0;
- }
- 
-+static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,
-+				unsigned long end, struct mm_walk *walk)
-+{
-+	pte_t *orig_pte, *pte, ptent;
-+	spinlock_t *ptl;
-+	struct page *page;
-+	struct vm_area_struct *vma = walk->vma;
-+	unsigned long next;
-+
-+	next = pmd_addr_end(addr, end);
-+	if (pmd_trans_huge(*pmd)) {
-+		spinlock_t *ptl;
-+
-+		ptl = pmd_trans_huge_lock(pmd, vma);
-+		if (!ptl)
-+			return 0;
-+
-+		if (is_huge_zero_pmd(*pmd))
-+			goto huge_unlock;
-+
-+		page = pmd_page(*pmd);
-+		if (page_mapcount(page) > 1)
-+			goto huge_unlock;
-+
-+		if (next - addr != HPAGE_PMD_SIZE) {
-+			int err;
-+
-+			get_page(page);
-+			spin_unlock(ptl);
-+			lock_page(page);
-+			err = split_huge_page(page);
-+			unlock_page(page);
-+			put_page(page);
-+			if (!err)
-+				goto regular_page;
-+			return 0;
-+		}
-+
-+		pmdp_test_and_clear_young(vma, addr, pmd);
-+		deactivate_page(page);
-+huge_unlock:
-+		spin_unlock(ptl);
-+		return 0;
-+	}
-+
-+	if (pmd_trans_unstable(pmd))
-+		return 0;
-+
-+regular_page:
-+	orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-+	for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
-+		ptent = *pte;
-+
-+		if (pte_none(ptent))
-+			continue;
-+
-+		if (!pte_present(ptent))
-+			continue;
-+
-+		page = vm_normal_page(vma, addr, ptent);
-+		if (!page)
-+			continue;
-+
-+		if (page_mapcount(page) > 1)
-+			continue;
-+
-+		ptep_test_and_clear_young(vma, addr, pte);
-+		deactivate_page(page);
-+	}
-+
-+	pte_unmap_unlock(orig_pte, ptl);
-+	cond_resched();
-+
-+	return 0;
-+}
-+
-+static void madvise_cool_page_range(struct mmu_gather *tlb,
-+			     struct vm_area_struct *vma,
-+			     unsigned long addr, unsigned long end)
-+{
-+	struct mm_walk cool_walk = {
-+		.pmd_entry = madvise_cool_pte_range,
-+		.mm = vma->vm_mm,
-+	};
-+
-+	tlb_start_vma(tlb, vma);
-+	walk_page_range(addr, end, &cool_walk);
-+	tlb_end_vma(tlb, vma);
-+}
-+
-+static long madvise_cool(struct vm_area_struct *vma,
-+			unsigned long start_addr, unsigned long end_addr)
-+{
-+	struct mm_struct *mm = vma->vm_mm;
-+	struct mmu_gather tlb;
-+
-+	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
-+		return -EINVAL;
-+
-+	lru_add_drain();
-+	tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
-+	madvise_cool_page_range(&tlb, vma, start_addr, end_addr);
-+	tlb_finish_mmu(&tlb, start_addr, end_addr);
-+
-+	return 0;
-+}
-+
- static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
- 				unsigned long end, struct mm_walk *walk)
- 
-@@ -695,6 +804,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
- 		return madvise_remove(vma, prev, start, end);
- 	case MADV_WILLNEED:
- 		return madvise_willneed(vma, prev, start, end);
-+	case MADV_COOL:
-+		return madvise_cool(vma, start, end);
- 	case MADV_FREE:
- 	case MADV_DONTNEED:
- 		return madvise_dontneed_free(vma, prev, start, end, behavior);
-@@ -716,6 +827,7 @@ madvise_behavior_valid(int behavior)
- 	case MADV_WILLNEED:
- 	case MADV_DONTNEED:
- 	case MADV_FREE:
-+	case MADV_COOL:
- #ifdef CONFIG_KSM
- 	case MADV_MERGEABLE:
- 	case MADV_UNMERGEABLE:
-diff --git a/mm/swap.c b/mm/swap.c
-index 3a75722e68a9..0f94c3b5397d 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -46,6 +46,7 @@ int page_cluster;
- static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
- static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
- static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
-+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
- static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
- #ifdef CONFIG_SMP
- static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
-@@ -537,6 +538,23 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
- 	update_page_reclaim_stat(lruvec, file, 0);
- }
- 
-+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
-+			    void *arg)
-+{
-+	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-+		int file = page_is_file_cache(page);
-+		int lru = page_lru_base_type(page);
-+
-+		del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
-+		ClearPageActive(page);
-+		ClearPageReferenced(page);
-+		clear_page_young(page);
-+		add_page_to_lru_list(page, lruvec, lru);
-+
-+		__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
-+		update_page_reclaim_stat(lruvec, file, 0);
-+	}
-+}
- 
- static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
- 			    void *arg)
-@@ -589,6 +607,10 @@ void lru_add_drain_cpu(int cpu)
- 	if (pagevec_count(pvec))
- 		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
- 
-+	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
-+	if (pagevec_count(pvec))
-+		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
-+
- 	pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
- 	if (pagevec_count(pvec))
- 		pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
-@@ -622,6 +644,26 @@ void deactivate_file_page(struct page *page)
- 	}
- }
- 
-+/*
-+ * deactivate_page - deactivate a page
-+ * @page: page to deactivate
-+ *
-+ * deactivate_page() moves @page to the inactive list if @page was on the active
-+ * list and was not an unevictable page.  This is done to accelerate the reclaim
-+ * of @page.
-+ */
-+void deactivate_page(struct page *page)
-+{
-+	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-+		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
-+
-+		get_page(page);
-+		if (!pagevec_add(pvec, page) || PageCompound(page))
-+			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
-+		put_cpu_var(lru_deactivate_pvecs);
-+	}
-+}
-+
- /**
-  * mark_page_lazyfree - make an anon page lazyfree
-  * @page: page to deactivate
-@@ -686,6 +728,7 @@ void lru_add_drain_all(void)
- 		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
- 		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
- 		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
-+		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
- 		    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
- 		    need_activate_page_drain(cpu)) {
- 			INIT_WORK(work, lru_add_drain_per_cpu);
--- 
-2.21.0.1020.gf2820cf01a-goog
+s/end/next/ ?
+> +		ptent = *pte;
+> +
+> +		if (pte_none(ptent))
+> +			continue;
+> +
+> +		if (!pte_present(ptent))
+> +			continue;
+> +
+> +		page = vm_normal_page(vma, addr, ptent);
+> +		if (!page)
+> +			continue;
+> +
+> +		if (page_mapcount(page) > 1)
+> +			continue;
+> +
+> +		ptep_test_and_clear_young(vma, addr, pte);
+> +		deactivate_page(page);
+> +	}
+> +
+> +	pte_unmap_unlock(orig_pte, ptl);
+> +	cond_resched();
+> +
+> +	return 0;
+> +}
+> +
+> +static long madvise_cool(struct vm_area_struct *vma,
+> +			unsigned long start_addr, unsigned long end_addr)
+> +{
+> +	struct mm_struct *mm = vma->vm_mm;
+> +	struct mmu_gather tlb;
+> +
+> +	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+> +		return -EINVAL;
+
+No service in case of VM_IO?
+> +
+> +	lru_add_drain();
+> +	tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+> +	madvise_cool_page_range(&tlb, vma, start_addr, end_addr);
+> +	tlb_finish_mmu(&tlb, start_addr, end_addr);
+> +
+> +	return 0;
+> +}
+> +
+> +/*
+> + * deactivate_page - deactivate a page
+> + * @page: page to deactivate
+> + *
+> + * deactivate_page() moves @page to the inactive list if @page was on the active
+> + * list and was not an unevictable page.  This is done to accelerate the reclaim
+> + * of @page.
+> + */
+> +void deactivate_page(struct page *page)
+> +{
+> +	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+> +		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+> +
+> +		get_page(page);
+
+A line of comment seems needed for pinning the page.
+
+> +		if (!pagevec_add(pvec, page) || PageCompound(page))
+> +			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+> +		put_cpu_var(lru_deactivate_pvecs);
+> +	}
+> +}
+> +
+
+--
+Hillf
diff --git a/a/content_digest b/N1/content_digest
index c3e13e8..37fbc5e 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -1,9 +1,10 @@
  "ref\020190520035254.57579-1-minchan@kernel.org\0"
- "From\0Minchan Kim <minchan@kernel.org>\0"
- "Subject\0[RFC 1/7] mm: introduce MADV_COOL\0"
- "Date\0Mon, 20 May 2019 12:52:48 +0900\0"
- "To\0Andrew Morton <akpm@linux-foundation.org>\0"
- "Cc\0LKML <linux-kernel@vger.kernel.org>"
+ "From\0Hillf Danton <hdanton@sina.com>\0"
+ "Subject\0Re: [RFC 1/7] mm: introduce MADV_COOL\0"
+ "Date\0Tue, 28 May 2019 16:53:01 +0800\0"
+ "To\0Minchan Kim <minchan@kernel.org>\0"
+ "Cc\0Andrew Morton <akpm@linux-foundation.org>"
+  LKML <linux-kernel@vger.kernel.org>
   linux-mm <linux-mm@kvack.org>
   Michal Hocko <mhocko@suse.com>
   Johannes Weiner <hannes@cmpxchg.org>
@@ -13,347 +14,141 @@
   Daniel Colascione <dancol@google.com>
   Shakeel Butt <shakeelb@google.com>
   Sonny Rao <sonnyrao@google.com>
-  Brian Geffon <bgeffon@google.com>
- " Minchan Kim <minchan@kernel.org>\0"
+ " Brian Geffon <bgeffon@google.com>\0"
  "\00:1\0"
  "b\0"
- "When a process expects no accesses to a certain memory range\n"
- "it could hint kernel that the pages can be reclaimed\n"
- "when memory pressure happens but data should be preserved\n"
- "for future use.  This could reduce workingset eviction so it\n"
- "ends up increasing performance.\n"
  "\n"
- "This patch introduces the new MADV_COOL hint to madvise(2)\n"
- "syscall. MADV_COOL can be used by a process to mark a memory range\n"
- "as not expected to be used in the near future. The hint can help\n"
- "kernel in deciding which pages to evict early during memory\n"
- "pressure.\n"
+ "On Mon, 20 May 2019 12:52:48 +0900 Minchan Kim wrote:\n"
+ "> +static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,\n"
+ "> +\t\t\t\tunsigned long end, struct mm_walk *walk)\n"
+ "> +{\n"
+ "> +\tpte_t *orig_pte, *pte, ptent;\n"
+ "> +\tspinlock_t *ptl;\n"
+ "> +\tstruct page *page;\n"
+ "> +\tstruct vm_area_struct *vma = walk->vma;\n"
+ "> +\tunsigned long next;\n"
+ "> +\n"
+ "> +\tnext = pmd_addr_end(addr, end);\n"
+ "> +\tif (pmd_trans_huge(*pmd)) {\n"
+ "> +\t\tspinlock_t *ptl;\n"
  "\n"
- "Internally, it works via deactivating memory from active list to\n"
- "inactive's head so when the memory pressure happens, they will be\n"
- "reclaimed earlier than other active pages unless there is no\n"
- "access until the time.\n"
+ "Seems not needed with another ptl declared above.\n"
+ "> +\n"
+ "> +\t\tptl = pmd_trans_huge_lock(pmd, vma);\n"
+ "> +\t\tif (!ptl)\n"
+ "> +\t\t\treturn 0;\n"
+ "> +\n"
+ "> +\t\tif (is_huge_zero_pmd(*pmd))\n"
+ "> +\t\t\tgoto huge_unlock;\n"
+ "> +\n"
+ "> +\t\tpage = pmd_page(*pmd);\n"
+ "> +\t\tif (page_mapcount(page) > 1)\n"
+ "> +\t\t\tgoto huge_unlock;\n"
+ "> +\n"
+ "> +\t\tif (next - addr != HPAGE_PMD_SIZE) {\n"
+ "> +\t\t\tint err;\n"
  "\n"
- "* v1r2\n"
- " * use clear_page_young in deactivate_page - joelaf\n"
+ "Alternately, we deactivate thp only if the address range from userspace\n"
+ "is sane enough, in order to avoid complex works we have to do here.\n"
+ "> +\n"
+ "> +\t\t\tget_page(page);\n"
+ "> +\t\t\tspin_unlock(ptl);\n"
+ "> +\t\t\tlock_page(page);\n"
+ "> +\t\t\terr = split_huge_page(page);\n"
+ "> +\t\t\tunlock_page(page);\n"
+ "> +\t\t\tput_page(page);\n"
+ "> +\t\t\tif (!err)\n"
+ "> +\t\t\t\tgoto regular_page;\n"
+ "> +\t\t\treturn 0;\n"
+ "> +\t\t}\n"
+ "> +\n"
+ "> +\t\tpmdp_test_and_clear_young(vma, addr, pmd);\n"
+ "> +\t\tdeactivate_page(page);\n"
+ "> +huge_unlock:\n"
+ "> +\t\tspin_unlock(ptl);\n"
+ "> +\t\treturn 0;\n"
+ "> +\t}\n"
+ "> +\n"
+ "> +\tif (pmd_trans_unstable(pmd))\n"
+ "> +\t\treturn 0;\n"
+ "> +\n"
+ "> +regular_page:\n"
  "\n"
- "* v1r1\n"
- " * Revise the description - surenb\n"
- " * Renaming from MADV_WARM to MADV_COOL - surenb\n"
+ "Take a look at pending signal?\n"
  "\n"
- "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
- "---\n"
- " include/linux/page-flags.h             |   1 +\n"
- " include/linux/page_idle.h              |  15 ++++\n"
- " include/linux/swap.h                   |   1 +\n"
- " include/uapi/asm-generic/mman-common.h |   1 +\n"
- " mm/madvise.c                           | 112 +++++++++++++++++++++++++\n"
- " mm/swap.c                              |  43 ++++++++++\n"
- " 6 files changed, 173 insertions(+)\n"
+ "> +\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n"
+ "> +\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n"
  "\n"
- "diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h\n"
- "index 9f8712a4b1a5..58b06654c8dd 100644\n"
- "--- a/include/linux/page-flags.h\n"
- "+++ b/include/linux/page-flags.h\n"
- "@@ -424,6 +424,7 @@ static inline bool set_hwpoison_free_buddy_page(struct page *page)\n"
- " TESTPAGEFLAG(Young, young, PF_ANY)\n"
- " SETPAGEFLAG(Young, young, PF_ANY)\n"
- " TESTCLEARFLAG(Young, young, PF_ANY)\n"
- "+CLEARPAGEFLAG(Young, young, PF_ANY)\n"
- " PAGEFLAG(Idle, idle, PF_ANY)\n"
- " #endif\n"
- " \n"
- "diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h\n"
- "index 1e894d34bdce..f3f43b317150 100644\n"
- "--- a/include/linux/page_idle.h\n"
- "+++ b/include/linux/page_idle.h\n"
- "@@ -19,6 +19,11 @@ static inline void set_page_young(struct page *page)\n"
- " \tSetPageYoung(page);\n"
- " }\n"
- " \n"
- "+static inline void clear_page_young(struct page *page)\n"
- "+{\n"
- "+\tClearPageYoung(page);\n"
- "+}\n"
- "+\n"
- " static inline bool test_and_clear_page_young(struct page *page)\n"
- " {\n"
- " \treturn TestClearPageYoung(page);\n"
- "@@ -65,6 +70,16 @@ static inline void set_page_young(struct page *page)\n"
- " \tset_bit(PAGE_EXT_YOUNG, &page_ext->flags);\n"
- " }\n"
- " \n"
- "+static void clear_page_young(struct page *page)\n"
- "+{\n"
- "+\tstruct page_ext *page_ext = lookup_page_ext(page);\n"
- "+\n"
- "+\tif (unlikely(!page_ext))\n"
- "+\t\treturn;\n"
- "+\n"
- "+\tclear_bit(PAGE_EXT_YOUNG, &page_ext->flags);\n"
- "+}\n"
- "+\n"
- " static inline bool test_and_clear_page_young(struct page *page)\n"
- " {\n"
- " \tstruct page_ext *page_ext = lookup_page_ext(page);\n"
- "diff --git a/include/linux/swap.h b/include/linux/swap.h\n"
- "index 4bfb5c4ac108..64795abea003 100644\n"
- "--- a/include/linux/swap.h\n"
- "+++ b/include/linux/swap.h\n"
- "@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu);\n"
- " extern void lru_add_drain_all(void);\n"
- " extern void rotate_reclaimable_page(struct page *page);\n"
- " extern void deactivate_file_page(struct page *page);\n"
- "+extern void deactivate_page(struct page *page);\n"
- " extern void mark_page_lazyfree(struct page *page);\n"
- " extern void swap_setup(void);\n"
- " \n"
- "diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h\n"
- "index abd238d0f7a4..f7a4a5d4b642 100644\n"
- "--- a/include/uapi/asm-generic/mman-common.h\n"
- "+++ b/include/uapi/asm-generic/mman-common.h\n"
- "@@ -42,6 +42,7 @@\n"
- " #define MADV_SEQUENTIAL\t2\t\t/* expect sequential page references */\n"
- " #define MADV_WILLNEED\t3\t\t/* will need these pages */\n"
- " #define MADV_DONTNEED\t4\t\t/* don't need these pages */\n"
- "+#define MADV_COOL\t5\t\t/* deactivatie these pages */\n"
- " \n"
- " /* common parameters: try to keep these consistent across architectures */\n"
- " #define MADV_FREE\t8\t\t/* free pages only if memory pressure */\n"
- "diff --git a/mm/madvise.c b/mm/madvise.c\n"
- "index 628022e674a7..c05817fb570d 100644\n"
- "--- a/mm/madvise.c\n"
- "+++ b/mm/madvise.c\n"
- "@@ -8,6 +8,7 @@\n"
- " \n"
- " #include <linux/mman.h>\n"
- " #include <linux/pagemap.h>\n"
- "+#include <linux/page_idle.h>\n"
- " #include <linux/syscalls.h>\n"
- " #include <linux/mempolicy.h>\n"
- " #include <linux/page-isolation.h>\n"
- "@@ -40,6 +41,7 @@ static int madvise_need_mmap_write(int behavior)\n"
- " \tcase MADV_REMOVE:\n"
- " \tcase MADV_WILLNEED:\n"
- " \tcase MADV_DONTNEED:\n"
- "+\tcase MADV_COOL:\n"
- " \tcase MADV_FREE:\n"
- " \t\treturn 0;\n"
- " \tdefault:\n"
- "@@ -307,6 +309,113 @@ static long madvise_willneed(struct vm_area_struct *vma,\n"
- " \treturn 0;\n"
- " }\n"
- " \n"
- "+static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,\n"
- "+\t\t\t\tunsigned long end, struct mm_walk *walk)\n"
- "+{\n"
- "+\tpte_t *orig_pte, *pte, ptent;\n"
- "+\tspinlock_t *ptl;\n"
- "+\tstruct page *page;\n"
- "+\tstruct vm_area_struct *vma = walk->vma;\n"
- "+\tunsigned long next;\n"
- "+\n"
- "+\tnext = pmd_addr_end(addr, end);\n"
- "+\tif (pmd_trans_huge(*pmd)) {\n"
- "+\t\tspinlock_t *ptl;\n"
- "+\n"
- "+\t\tptl = pmd_trans_huge_lock(pmd, vma);\n"
- "+\t\tif (!ptl)\n"
- "+\t\t\treturn 0;\n"
- "+\n"
- "+\t\tif (is_huge_zero_pmd(*pmd))\n"
- "+\t\t\tgoto huge_unlock;\n"
- "+\n"
- "+\t\tpage = pmd_page(*pmd);\n"
- "+\t\tif (page_mapcount(page) > 1)\n"
- "+\t\t\tgoto huge_unlock;\n"
- "+\n"
- "+\t\tif (next - addr != HPAGE_PMD_SIZE) {\n"
- "+\t\t\tint err;\n"
- "+\n"
- "+\t\t\tget_page(page);\n"
- "+\t\t\tspin_unlock(ptl);\n"
- "+\t\t\tlock_page(page);\n"
- "+\t\t\terr = split_huge_page(page);\n"
- "+\t\t\tunlock_page(page);\n"
- "+\t\t\tput_page(page);\n"
- "+\t\t\tif (!err)\n"
- "+\t\t\t\tgoto regular_page;\n"
- "+\t\t\treturn 0;\n"
- "+\t\t}\n"
- "+\n"
- "+\t\tpmdp_test_and_clear_young(vma, addr, pmd);\n"
- "+\t\tdeactivate_page(page);\n"
- "+huge_unlock:\n"
- "+\t\tspin_unlock(ptl);\n"
- "+\t\treturn 0;\n"
- "+\t}\n"
- "+\n"
- "+\tif (pmd_trans_unstable(pmd))\n"
- "+\t\treturn 0;\n"
- "+\n"
- "+regular_page:\n"
- "+\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n"
- "+\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n"
- "+\t\tptent = *pte;\n"
- "+\n"
- "+\t\tif (pte_none(ptent))\n"
- "+\t\t\tcontinue;\n"
- "+\n"
- "+\t\tif (!pte_present(ptent))\n"
- "+\t\t\tcontinue;\n"
- "+\n"
- "+\t\tpage = vm_normal_page(vma, addr, ptent);\n"
- "+\t\tif (!page)\n"
- "+\t\t\tcontinue;\n"
- "+\n"
- "+\t\tif (page_mapcount(page) > 1)\n"
- "+\t\t\tcontinue;\n"
- "+\n"
- "+\t\tptep_test_and_clear_young(vma, addr, pte);\n"
- "+\t\tdeactivate_page(page);\n"
- "+\t}\n"
- "+\n"
- "+\tpte_unmap_unlock(orig_pte, ptl);\n"
- "+\tcond_resched();\n"
- "+\n"
- "+\treturn 0;\n"
- "+}\n"
- "+\n"
- "+static void madvise_cool_page_range(struct mmu_gather *tlb,\n"
- "+\t\t\t     struct vm_area_struct *vma,\n"
- "+\t\t\t     unsigned long addr, unsigned long end)\n"
- "+{\n"
- "+\tstruct mm_walk cool_walk = {\n"
- "+\t\t.pmd_entry = madvise_cool_pte_range,\n"
- "+\t\t.mm = vma->vm_mm,\n"
- "+\t};\n"
- "+\n"
- "+\ttlb_start_vma(tlb, vma);\n"
- "+\twalk_page_range(addr, end, &cool_walk);\n"
- "+\ttlb_end_vma(tlb, vma);\n"
- "+}\n"
- "+\n"
- "+static long madvise_cool(struct vm_area_struct *vma,\n"
- "+\t\t\tunsigned long start_addr, unsigned long end_addr)\n"
- "+{\n"
- "+\tstruct mm_struct *mm = vma->vm_mm;\n"
- "+\tstruct mmu_gather tlb;\n"
- "+\n"
- "+\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n"
- "+\t\treturn -EINVAL;\n"
- "+\n"
- "+\tlru_add_drain();\n"
- "+\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n"
- "+\tmadvise_cool_page_range(&tlb, vma, start_addr, end_addr);\n"
- "+\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n"
- "+\n"
- "+\treturn 0;\n"
- "+}\n"
- "+\n"
- " static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,\n"
- " \t\t\t\tunsigned long end, struct mm_walk *walk)\n"
- " \n"
- "@@ -695,6 +804,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,\n"
- " \t\treturn madvise_remove(vma, prev, start, end);\n"
- " \tcase MADV_WILLNEED:\n"
- " \t\treturn madvise_willneed(vma, prev, start, end);\n"
- "+\tcase MADV_COOL:\n"
- "+\t\treturn madvise_cool(vma, start, end);\n"
- " \tcase MADV_FREE:\n"
- " \tcase MADV_DONTNEED:\n"
- " \t\treturn madvise_dontneed_free(vma, prev, start, end, behavior);\n"
- "@@ -716,6 +827,7 @@ madvise_behavior_valid(int behavior)\n"
- " \tcase MADV_WILLNEED:\n"
- " \tcase MADV_DONTNEED:\n"
- " \tcase MADV_FREE:\n"
- "+\tcase MADV_COOL:\n"
- " #ifdef CONFIG_KSM\n"
- " \tcase MADV_MERGEABLE:\n"
- " \tcase MADV_UNMERGEABLE:\n"
- "diff --git a/mm/swap.c b/mm/swap.c\n"
- "index 3a75722e68a9..0f94c3b5397d 100644\n"
- "--- a/mm/swap.c\n"
- "+++ b/mm/swap.c\n"
- "@@ -46,6 +46,7 @@ int page_cluster;\n"
- " static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);\n"
- " static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);\n"
- " static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);\n"
- "+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);\n"
- " static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);\n"
- " #ifdef CONFIG_SMP\n"
- " static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);\n"
- "@@ -537,6 +538,23 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,\n"
- " \tupdate_page_reclaim_stat(lruvec, file, 0);\n"
- " }\n"
- " \n"
- "+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,\n"
- "+\t\t\t    void *arg)\n"
- "+{\n"
- "+\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n"
- "+\t\tint file = page_is_file_cache(page);\n"
- "+\t\tint lru = page_lru_base_type(page);\n"
- "+\n"
- "+\t\tdel_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);\n"
- "+\t\tClearPageActive(page);\n"
- "+\t\tClearPageReferenced(page);\n"
- "+\t\tclear_page_young(page);\n"
- "+\t\tadd_page_to_lru_list(page, lruvec, lru);\n"
- "+\n"
- "+\t\t__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));\n"
- "+\t\tupdate_page_reclaim_stat(lruvec, file, 0);\n"
- "+\t}\n"
- "+}\n"
- " \n"
- " static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,\n"
- " \t\t\t    void *arg)\n"
- "@@ -589,6 +607,10 @@ void lru_add_drain_cpu(int cpu)\n"
- " \tif (pagevec_count(pvec))\n"
- " \t\tpagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);\n"
- " \n"
- "+\tpvec = &per_cpu(lru_deactivate_pvecs, cpu);\n"
- "+\tif (pagevec_count(pvec))\n"
- "+\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n"
- "+\n"
- " \tpvec = &per_cpu(lru_lazyfree_pvecs, cpu);\n"
- " \tif (pagevec_count(pvec))\n"
- " \t\tpagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);\n"
- "@@ -622,6 +644,26 @@ void deactivate_file_page(struct page *page)\n"
- " \t}\n"
- " }\n"
- " \n"
- "+/*\n"
- "+ * deactivate_page - deactivate a page\n"
- "+ * @page: page to deactivate\n"
- "+ *\n"
- "+ * deactivate_page() moves @page to the inactive list if @page was on the active\n"
- "+ * list and was not an unevictable page.  This is done to accelerate the reclaim\n"
- "+ * of @page.\n"
- "+ */\n"
- "+void deactivate_page(struct page *page)\n"
- "+{\n"
- "+\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n"
- "+\t\tstruct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);\n"
- "+\n"
- "+\t\tget_page(page);\n"
- "+\t\tif (!pagevec_add(pvec, page) || PageCompound(page))\n"
- "+\t\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n"
- "+\t\tput_cpu_var(lru_deactivate_pvecs);\n"
- "+\t}\n"
- "+}\n"
- "+\n"
- " /**\n"
- "  * mark_page_lazyfree - make an anon page lazyfree\n"
- "  * @page: page to deactivate\n"
- "@@ -686,6 +728,7 @@ void lru_add_drain_all(void)\n"
- " \t\tif (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||\n"
- " \t\t    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||\n"
- " \t\t    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||\n"
- "+\t\t    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||\n"
- " \t\t    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||\n"
- " \t\t    need_activate_page_drain(cpu)) {\n"
- " \t\t\tINIT_WORK(work, lru_add_drain_per_cpu);\n"
- "-- \n"
- 2.21.0.1020.gf2820cf01a-goog
+ "s/end/next/ ?\n"
+ "> +\t\tptent = *pte;\n"
+ "> +\n"
+ "> +\t\tif (pte_none(ptent))\n"
+ "> +\t\t\tcontinue;\n"
+ "> +\n"
+ "> +\t\tif (!pte_present(ptent))\n"
+ "> +\t\t\tcontinue;\n"
+ "> +\n"
+ "> +\t\tpage = vm_normal_page(vma, addr, ptent);\n"
+ "> +\t\tif (!page)\n"
+ "> +\t\t\tcontinue;\n"
+ "> +\n"
+ "> +\t\tif (page_mapcount(page) > 1)\n"
+ "> +\t\t\tcontinue;\n"
+ "> +\n"
+ "> +\t\tptep_test_and_clear_young(vma, addr, pte);\n"
+ "> +\t\tdeactivate_page(page);\n"
+ "> +\t}\n"
+ "> +\n"
+ "> +\tpte_unmap_unlock(orig_pte, ptl);\n"
+ "> +\tcond_resched();\n"
+ "> +\n"
+ "> +\treturn 0;\n"
+ "> +}\n"
+ "> +\n"
+ "> +static long madvise_cool(struct vm_area_struct *vma,\n"
+ "> +\t\t\tunsigned long start_addr, unsigned long end_addr)\n"
+ "> +{\n"
+ "> +\tstruct mm_struct *mm = vma->vm_mm;\n"
+ "> +\tstruct mmu_gather tlb;\n"
+ "> +\n"
+ "> +\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n"
+ "> +\t\treturn -EINVAL;\n"
+ "\n"
+ "No service in case of VM_IO?\n"
+ "> +\n"
+ "> +\tlru_add_drain();\n"
+ "> +\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n"
+ "> +\tmadvise_cool_page_range(&tlb, vma, start_addr, end_addr);\n"
+ "> +\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n"
+ "> +\n"
+ "> +\treturn 0;\n"
+ "> +}\n"
+ "> +\n"
+ "> +/*\n"
+ "> + * deactivate_page - deactivate a page\n"
+ "> + * @page: page to deactivate\n"
+ "> + *\n"
+ "> + * deactivate_page() moves @page to the inactive list if @page was on the active\n"
+ "> + * list and was not an unevictable page.  This is done to accelerate the reclaim\n"
+ "> + * of @page.\n"
+ "> + */\n"
+ "> +void deactivate_page(struct page *page)\n"
+ "> +{\n"
+ "> +\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n"
+ "> +\t\tstruct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);\n"
+ "> +\n"
+ "> +\t\tget_page(page);\n"
+ "\n"
+ "A line of comment seems needed for pinning the page.\n"
+ "\n"
+ "> +\t\tif (!pagevec_add(pvec, page) || PageCompound(page))\n"
+ "> +\t\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n"
+ "> +\t\tput_cpu_var(lru_deactivate_pvecs);\n"
+ "> +\t}\n"
+ "> +}\n"
+ "> +\n"
+ "\n"
+ "--\n"
+ Hillf
 
-76714939b2b300d19638111661c4e1e3f756b169a376ced66bb0a12185ab9e8f
+3f726f038c347818f2b246f151934fc80ce11befed99d28ed4eadc3835104774

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.