All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20150330052250.GA3008@blaptop>

diff --git a/a/1.txt b/N1/1.txt
index f2a771d..0f3661e 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1 +1,139 @@
 2nd description trial.
+
+>From ccfc6c79634f6cec69d8fb23b0e863ebfa5b893c Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Mon, 30 Mar 2015 13:43:08 +0900
+Subject: [PATCH v2] mm: make every pte dirty on do_swap_page
+
+Bascially, MADV_FREE relys on the dirty bit in page table entry
+to decide whether VM allows to discard the page or not.
+IOW, if page table entry includes marked dirty bit, VM shouldn't
+discard the page.
+
+However, if swap-in by read fault happens, page table entry
+point out the page doesn't have marked dirty bit so MADV_FREE
+might discard the page wrongly. For avoiding the problem,
+MADV_FREE did more checks with PageDirty and PageSwapCache.
+It worked out because swapped-in page lives on swap cache
+and since it was evicted from the swap cache, the page has
+PG_dirty flag. So both page flags checks effectvely prevent
+wrong discarding by MADV_FREE.
+
+A problem in above logic is that swapped-in page has PG_dirty
+since they are removed from swap cache so VM cannot consider
+those pages as freeable any more alghouth madvise_free is
+called in future. Look at below example for detail.
+
+ptr = malloc();
+memset(ptr);
+..
+..
+.. heavy memory pressure so all of pages are swapped out
+..
+..
+var = *ptr; -> a page swapped-in and removed from swapcache.
+               page table doesn't mark dirty bit and page
+               descriptor includes PG_dirty
+..
+..
+madvise_free(ptr);
+..
+..
+..
+.. heavy memory pressure again.
+.. In this time, VM cannot discard the page because the page
+.. has *PG_dirty*
+
+Rather than relying on the PG_dirty of page descriptor
+for preventing discarding a page, dirty bit in page table is more
+straightforward and simple. So, this patch makes page table dirty
+bit marked whenever swap-in happens. Inherenty, page table entry
+point out swapped-out page had dirty bit so I think it's no prblem.
+
+With this, it removes complicated logic and makes freeable page
+checking by madvise_free simple. Of course, we could solve
+above mentioned example.
+
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Cyrill Gorcunov <gorcunov@gmail.com>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Reported-by: Yalin Wang <yalin.wang@sonymobile.com>
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+---
+
+* From v1:
+  * Rewrite description - Andrew
+
+ mm/madvise.c |  1 -
+ mm/memory.c  | 10 ++++++++--
+ mm/rmap.c    |  2 +-
+ mm/vmscan.c  |  3 +--
+ 4 files changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/mm/madvise.c b/mm/madvise.c
+index 22e8f0c..a045798 100644
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -325,7 +325,6 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
+ 				continue;
+ 			}
+ 
+-			ClearPageDirty(page);
+ 			unlock_page(page);
+ 		}
+ 
+diff --git a/mm/memory.c b/mm/memory.c
+index 6743966..48ff537 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2521,9 +2521,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ 
+ 	inc_mm_counter_fast(mm, MM_ANONPAGES);
+ 	dec_mm_counter_fast(mm, MM_SWAPENTS);
+-	pte = mk_pte(page, vma->vm_page_prot);
++
++	/*
++	 * The page is swapping in now was dirty before it was swapped out
++	 * so restore the state again(ie, pte_mkdirty) because MADV_FREE
++	 * relies on the dirty bit on page table.
++	 */
++	pte = pte_mkdirty(mk_pte(page, vma->vm_page_prot));
+ 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+-		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
++		pte = maybe_mkwrite(pte, vma);
+ 		flags &= ~FAULT_FLAG_WRITE;
+ 		ret |= VM_FAULT_WRITE;
+ 		exclusive = 1;
+diff --git a/mm/rmap.c b/mm/rmap.c
+index dad23a4..281e806 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1275,7 +1275,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+ 
+ 		if (flags & TTU_FREE) {
+ 			VM_BUG_ON_PAGE(PageSwapCache(page), page);
+-			if (!dirty && !PageDirty(page)) {
++			if (!dirty) {
+ 				/* It's a freeable page by MADV_FREE */
+ 				dec_mm_counter(mm, MM_ANONPAGES);
+ 				goto discard;
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index dc6cd51..fffebf0 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -805,8 +805,7 @@ static enum page_references page_check_references(struct page *page,
+ 		return PAGEREF_KEEP;
+ 	}
+ 
+-	if (PageAnon(page) && !pte_dirty && !PageSwapCache(page) &&
+-			!PageDirty(page))
++	if (PageAnon(page) && !pte_dirty && !PageSwapCache(page))
+ 		*freeable = true;
+ 
+ 	/* Reclaim if clean, defer dirty pages to writeback */
+-- 
+1.9.3
+
+-- 
+Kind regards,
+Minchan Kim
diff --git a/a/content_digest b/N1/content_digest
index d467f2c..d4176af 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -17,6 +17,144 @@
  " Pavel Emelyanov <xemul@parallels.com>\0"
  "\00:1\0"
  "b\0"
- 2nd description trial.
+ "2nd description trial.\n"
+ "\n"
+ ">From ccfc6c79634f6cec69d8fb23b0e863ebfa5b893c Mon Sep 17 00:00:00 2001\n"
+ "From: Minchan Kim <minchan@kernel.org>\n"
+ "Date: Mon, 30 Mar 2015 13:43:08 +0900\n"
+ "Subject: [PATCH v2] mm: make every pte dirty on do_swap_page\n"
+ "\n"
+ "Bascially, MADV_FREE relys on the dirty bit in page table entry\n"
+ "to decide whether VM allows to discard the page or not.\n"
+ "IOW, if page table entry includes marked dirty bit, VM shouldn't\n"
+ "discard the page.\n"
+ "\n"
+ "However, if swap-in by read fault happens, page table entry\n"
+ "point out the page doesn't have marked dirty bit so MADV_FREE\n"
+ "might discard the page wrongly. For avoiding the problem,\n"
+ "MADV_FREE did more checks with PageDirty and PageSwapCache.\n"
+ "It worked out because swapped-in page lives on swap cache\n"
+ "and since it was evicted from the swap cache, the page has\n"
+ "PG_dirty flag. So both page flags checks effectvely prevent\n"
+ "wrong discarding by MADV_FREE.\n"
+ "\n"
+ "A problem in above logic is that swapped-in page has PG_dirty\n"
+ "since they are removed from swap cache so VM cannot consider\n"
+ "those pages as freeable any more alghouth madvise_free is\n"
+ "called in future. Look at below example for detail.\n"
+ "\n"
+ "ptr = malloc();\n"
+ "memset(ptr);\n"
+ "..\n"
+ "..\n"
+ ".. heavy memory pressure so all of pages are swapped out\n"
+ "..\n"
+ "..\n"
+ "var = *ptr; -> a page swapped-in and removed from swapcache.\n"
+ "               page table doesn't mark dirty bit and page\n"
+ "               descriptor includes PG_dirty\n"
+ "..\n"
+ "..\n"
+ "madvise_free(ptr);\n"
+ "..\n"
+ "..\n"
+ "..\n"
+ ".. heavy memory pressure again.\n"
+ ".. In this time, VM cannot discard the page because the page\n"
+ ".. has *PG_dirty*\n"
+ "\n"
+ "Rather than relying on the PG_dirty of page descriptor\n"
+ "for preventing discarding a page, dirty bit in page table is more\n"
+ "straightforward and simple. So, this patch makes page table dirty\n"
+ "bit marked whenever swap-in happens. Inherenty, page table entry\n"
+ "point out swapped-out page had dirty bit so I think it's no prblem.\n"
+ "\n"
+ "With this, it removes complicated logic and makes freeable page\n"
+ "checking by madvise_free simple. Of course, we could solve\n"
+ "above mentioned example.\n"
+ "\n"
+ "Cc: Hugh Dickins <hughd@google.com>\n"
+ "Cc: Cyrill Gorcunov <gorcunov@gmail.com>\n"
+ "Cc: Pavel Emelyanov <xemul@parallels.com>\n"
+ "Reported-by: Yalin Wang <yalin.wang@sonymobile.com>\n"
+ "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
+ "---\n"
+ "\n"
+ "* From v1:\n"
+ "  * Rewrite description - Andrew\n"
+ "\n"
+ " mm/madvise.c |  1 -\n"
+ " mm/memory.c  | 10 ++++++++--\n"
+ " mm/rmap.c    |  2 +-\n"
+ " mm/vmscan.c  |  3 +--\n"
+ " 4 files changed, 10 insertions(+), 6 deletions(-)\n"
+ "\n"
+ "diff --git a/mm/madvise.c b/mm/madvise.c\n"
+ "index 22e8f0c..a045798 100644\n"
+ "--- a/mm/madvise.c\n"
+ "+++ b/mm/madvise.c\n"
+ "@@ -325,7 +325,6 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,\n"
+ " \t\t\t\tcontinue;\n"
+ " \t\t\t}\n"
+ " \n"
+ "-\t\t\tClearPageDirty(page);\n"
+ " \t\t\tunlock_page(page);\n"
+ " \t\t}\n"
+ " \n"
+ "diff --git a/mm/memory.c b/mm/memory.c\n"
+ "index 6743966..48ff537 100644\n"
+ "--- a/mm/memory.c\n"
+ "+++ b/mm/memory.c\n"
+ "@@ -2521,9 +2521,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,\n"
+ " \n"
+ " \tinc_mm_counter_fast(mm, MM_ANONPAGES);\n"
+ " \tdec_mm_counter_fast(mm, MM_SWAPENTS);\n"
+ "-\tpte = mk_pte(page, vma->vm_page_prot);\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * The page is swapping in now was dirty before it was swapped out\n"
+ "+\t * so restore the state again(ie, pte_mkdirty) because MADV_FREE\n"
+ "+\t * relies on the dirty bit on page table.\n"
+ "+\t */\n"
+ "+\tpte = pte_mkdirty(mk_pte(page, vma->vm_page_prot));\n"
+ " \tif ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {\n"
+ "-\t\tpte = maybe_mkwrite(pte_mkdirty(pte), vma);\n"
+ "+\t\tpte = maybe_mkwrite(pte, vma);\n"
+ " \t\tflags &= ~FAULT_FLAG_WRITE;\n"
+ " \t\tret |= VM_FAULT_WRITE;\n"
+ " \t\texclusive = 1;\n"
+ "diff --git a/mm/rmap.c b/mm/rmap.c\n"
+ "index dad23a4..281e806 100644\n"
+ "--- a/mm/rmap.c\n"
+ "+++ b/mm/rmap.c\n"
+ "@@ -1275,7 +1275,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,\n"
+ " \n"
+ " \t\tif (flags & TTU_FREE) {\n"
+ " \t\t\tVM_BUG_ON_PAGE(PageSwapCache(page), page);\n"
+ "-\t\t\tif (!dirty && !PageDirty(page)) {\n"
+ "+\t\t\tif (!dirty) {\n"
+ " \t\t\t\t/* It's a freeable page by MADV_FREE */\n"
+ " \t\t\t\tdec_mm_counter(mm, MM_ANONPAGES);\n"
+ " \t\t\t\tgoto discard;\n"
+ "diff --git a/mm/vmscan.c b/mm/vmscan.c\n"
+ "index dc6cd51..fffebf0 100644\n"
+ "--- a/mm/vmscan.c\n"
+ "+++ b/mm/vmscan.c\n"
+ "@@ -805,8 +805,7 @@ static enum page_references page_check_references(struct page *page,\n"
+ " \t\treturn PAGEREF_KEEP;\n"
+ " \t}\n"
+ " \n"
+ "-\tif (PageAnon(page) && !pte_dirty && !PageSwapCache(page) &&\n"
+ "-\t\t\t!PageDirty(page))\n"
+ "+\tif (PageAnon(page) && !pte_dirty && !PageSwapCache(page))\n"
+ " \t\t*freeable = true;\n"
+ " \n"
+ " \t/* Reclaim if clean, defer dirty pages to writeback */\n"
+ "-- \n"
+ "1.9.3\n"
+ "\n"
+ "-- \n"
+ "Kind regards,\n"
+ Minchan Kim
 
-f85df9b8c917e4c1ecd47eb5a5d8eff8db09ef28134331be085bd261cff0bc0f
+19e804f2addba4d10366e53de9f7b9cd0dd344bac872e2724a105153bcc4b4cc

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.