All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20150303032537.GA25015@blaptop>

diff --git a/a/1.txt b/N1/1.txt
index 1d72ab1..8d5fc2c 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -136,3 +136,121 @@ I will send different patch. Please review it.
 
 So, my suggestion is below. It always makes pte dirty so let's Cc
 Cyrill to take care of softdirty and Hugh who is Mr.Swap.
+
+>From 30c6d5b35a3dc7e451041183ce5efd6a6c42bf88 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Tue, 3 Mar 2015 10:06:59 +0900
+Subject: [RFC] mm: make every pte dirty on do_swap_page
+
+Bascially, MADV_FREE relys on the pte dirty to decide whether
+VM should discard or not. However, swapped-in page doesn't have
+pte_dirty. Instead, it checks PageDirty and PageSwapCache for
+such page because swapped-in page could live on swap cache or
+set PageDirty when it is removed from swapcache so MADV_FREE
+checks it and doesn't discard.
+
+The problem in here is any anonymous page can have PageDirty if
+it is removed from swapcache so that VM cannot parse those pages
+as freeable even if we did madvise_free. Look at below example.
+
+ptr = malloc();
+memset(ptr);
+..
+heavy memory pressure -> swap-out all of pages
+..
+out of memory pressure
+..
+var = *ptr; -> swap-in page/remove the page from swapcache. so pte_clean
+               but SetPageDirty
+
+madvise_free(ptr);
+..
+..
+heavy memory pressure -> VM cannot discard the page by PageDirty.
+
+PageDirty for anonymous page aims for avoiding duplicating
+swapping out. In other words, if a page have swapped-in but
+live swapcache(ie, !PageDirty), we could save swapout if the page
+is selected as victim by VM in future because swap device have
+kept previous swapped-out contents of the page.
+
+So, rather than relying on the PG_dirty for working madvise_free,
+pte_dirty is more straightforward.
+Inherently, swapped-out page was pte_dirty so this patch restores
+the dirtiness when swap-in fault happens and madvise_free doesn't
+rely on the PageDirty.
+
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+---
+ mm/madvise.c | 1 -
+ mm/memory.c  | 9 +++++++--
+ mm/rmap.c    | 2 +-
+ mm/vmscan.c  | 3 +--
+ 4 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/mm/madvise.c b/mm/madvise.c
+index 6d0fcb8..d64200e 100644
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -309,7 +309,6 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
+ 				continue;
+ 			}
+ 
+-			ClearPageDirty(page);
+ 			unlock_page(page);
+ 		}
+ 
+diff --git a/mm/memory.c b/mm/memory.c
+index 8ae52c9..2f45e77 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2460,9 +2460,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ 
+ 	inc_mm_counter_fast(mm, MM_ANONPAGES);
+ 	dec_mm_counter_fast(mm, MM_SWAPENTS);
+-	pte = mk_pte(page, vma->vm_page_prot);
++
++	/*
++	 * Every page swapped-out was pte_dirty so we makes pte dirty again.
++	 * MADV_FREE relys on it.
++	 */
++	pte = mk_pte(pte_mkdirty(page), vma->vm_page_prot);
+ 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+-		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
++		pte = maybe_mkwrite(pte, vma);
+ 		flags &= ~FAULT_FLAG_WRITE;
+ 		ret |= VM_FAULT_WRITE;
+ 		exclusive = 1;
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 47b3ba8..34c1d66 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1268,7 +1268,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+ 
+ 		if (flags & TTU_FREE) {
+ 			VM_BUG_ON_PAGE(PageSwapCache(page), page);
+-			if (!dirty && !PageDirty(page)) {
++			if (!dirty) {
+ 				/* It's a freeable page by MADV_FREE */
+ 				dec_mm_counter(mm, MM_ANONPAGES);
+ 				goto discard;
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 671e47e..7f520c9 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -805,8 +805,7 @@ static enum page_references page_check_references(struct page *page,
+ 		return PAGEREF_KEEP;
+ 	}
+ 
+-	if (PageAnon(page) && !pte_dirty && !PageSwapCache(page) &&
+-			!PageDirty(page))
++	if (PageAnon(page) && !pte_dirty && !PageSwapCache(page))
+ 		*freeable = true;
+ 
+ 	/* Reclaim if clean, defer dirty pages to writeback */
+-- 
+1.9.3
+
+-- 
+Kind regards,
+Minchan Kim
diff --git a/a/content_digest b/N1/content_digest
index 99a28c4..0d18ad0 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -152,6 +152,124 @@
  "I will send different patch. Please review it.\n"
  "\n"
  "So, my suggestion is below. It always makes pte dirty so let's Cc\n"
- Cyrill to take care of softdirty and Hugh who is Mr.Swap.
+ "Cyrill to take care of softdirty and Hugh who is Mr.Swap.\n"
+ "\n"
+ ">From 30c6d5b35a3dc7e451041183ce5efd6a6c42bf88 Mon Sep 17 00:00:00 2001\n"
+ "From: Minchan Kim <minchan@kernel.org>\n"
+ "Date: Tue, 3 Mar 2015 10:06:59 +0900\n"
+ "Subject: [RFC] mm: make every pte dirty on do_swap_page\n"
+ "\n"
+ "Bascially, MADV_FREE relys on the pte dirty to decide whether\n"
+ "VM should discard or not. However, swapped-in page doesn't have\n"
+ "pte_dirty. Instead, it checks PageDirty and PageSwapCache for\n"
+ "such page because swapped-in page could live on swap cache or\n"
+ "set PageDirty when it is removed from swapcache so MADV_FREE\n"
+ "checks it and doesn't discard.\n"
+ "\n"
+ "The problem in here is any anonymous page can have PageDirty if\n"
+ "it is removed from swapcache so that VM cannot parse those pages\n"
+ "as freeable even if we did madvise_free. Look at below example.\n"
+ "\n"
+ "ptr = malloc();\n"
+ "memset(ptr);\n"
+ "..\n"
+ "heavy memory pressure -> swap-out all of pages\n"
+ "..\n"
+ "out of memory pressure\n"
+ "..\n"
+ "var = *ptr; -> swap-in page/remove the page from swapcache. so pte_clean\n"
+ "               but SetPageDirty\n"
+ "\n"
+ "madvise_free(ptr);\n"
+ "..\n"
+ "..\n"
+ "heavy memory pressure -> VM cannot discard the page by PageDirty.\n"
+ "\n"
+ "PageDirty for anonymous page aims for avoiding duplicating\n"
+ "swapping out. In other words, if a page have swapped-in but\n"
+ "live swapcache(ie, !PageDirty), we could save swapout if the page\n"
+ "is selected as victim by VM in future because swap device have\n"
+ "kept previous swapped-out contents of the page.\n"
+ "\n"
+ "So, rather than relying on the PG_dirty for working madvise_free,\n"
+ "pte_dirty is more straightforward.\n"
+ "Inherently, swapped-out page was pte_dirty so this patch restores\n"
+ "the dirtiness when swap-in fault happens and madvise_free doesn't\n"
+ "rely on the PageDirty.\n"
+ "\n"
+ "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
+ "---\n"
+ " mm/madvise.c | 1 -\n"
+ " mm/memory.c  | 9 +++++++--\n"
+ " mm/rmap.c    | 2 +-\n"
+ " mm/vmscan.c  | 3 +--\n"
+ " 4 files changed, 9 insertions(+), 6 deletions(-)\n"
+ "\n"
+ "diff --git a/mm/madvise.c b/mm/madvise.c\n"
+ "index 6d0fcb8..d64200e 100644\n"
+ "--- a/mm/madvise.c\n"
+ "+++ b/mm/madvise.c\n"
+ "@@ -309,7 +309,6 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,\n"
+ " \t\t\t\tcontinue;\n"
+ " \t\t\t}\n"
+ " \n"
+ "-\t\t\tClearPageDirty(page);\n"
+ " \t\t\tunlock_page(page);\n"
+ " \t\t}\n"
+ " \n"
+ "diff --git a/mm/memory.c b/mm/memory.c\n"
+ "index 8ae52c9..2f45e77 100644\n"
+ "--- a/mm/memory.c\n"
+ "+++ b/mm/memory.c\n"
+ "@@ -2460,9 +2460,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,\n"
+ " \n"
+ " \tinc_mm_counter_fast(mm, MM_ANONPAGES);\n"
+ " \tdec_mm_counter_fast(mm, MM_SWAPENTS);\n"
+ "-\tpte = mk_pte(page, vma->vm_page_prot);\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * Every page swapped-out was pte_dirty so we makes pte dirty again.\n"
+ "+\t * MADV_FREE relys on it.\n"
+ "+\t */\n"
+ "+\tpte = mk_pte(pte_mkdirty(page), vma->vm_page_prot);\n"
+ " \tif ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {\n"
+ "-\t\tpte = maybe_mkwrite(pte_mkdirty(pte), vma);\n"
+ "+\t\tpte = maybe_mkwrite(pte, vma);\n"
+ " \t\tflags &= ~FAULT_FLAG_WRITE;\n"
+ " \t\tret |= VM_FAULT_WRITE;\n"
+ " \t\texclusive = 1;\n"
+ "diff --git a/mm/rmap.c b/mm/rmap.c\n"
+ "index 47b3ba8..34c1d66 100644\n"
+ "--- a/mm/rmap.c\n"
+ "+++ b/mm/rmap.c\n"
+ "@@ -1268,7 +1268,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,\n"
+ " \n"
+ " \t\tif (flags & TTU_FREE) {\n"
+ " \t\t\tVM_BUG_ON_PAGE(PageSwapCache(page), page);\n"
+ "-\t\t\tif (!dirty && !PageDirty(page)) {\n"
+ "+\t\t\tif (!dirty) {\n"
+ " \t\t\t\t/* It's a freeable page by MADV_FREE */\n"
+ " \t\t\t\tdec_mm_counter(mm, MM_ANONPAGES);\n"
+ " \t\t\t\tgoto discard;\n"
+ "diff --git a/mm/vmscan.c b/mm/vmscan.c\n"
+ "index 671e47e..7f520c9 100644\n"
+ "--- a/mm/vmscan.c\n"
+ "+++ b/mm/vmscan.c\n"
+ "@@ -805,8 +805,7 @@ static enum page_references page_check_references(struct page *page,\n"
+ " \t\treturn PAGEREF_KEEP;\n"
+ " \t}\n"
+ " \n"
+ "-\tif (PageAnon(page) && !pte_dirty && !PageSwapCache(page) &&\n"
+ "-\t\t\t!PageDirty(page))\n"
+ "+\tif (PageAnon(page) && !pte_dirty && !PageSwapCache(page))\n"
+ " \t\t*freeable = true;\n"
+ " \n"
+ " \t/* Reclaim if clean, defer dirty pages to writeback */\n"
+ "-- \n"
+ "1.9.3\n"
+ "\n"
+ "-- \n"
+ "Kind regards,\n"
+ Minchan Kim
 
-bb8e528f1aff44664ae92e0db7848b41e9ecaf0fceffd35b6a4d8f282d12a197
+74ee3fa20dfa7c5d4bd7973290e685c428c56cd828778ed12ecc321615b61b0c

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.