diff for duplicates of <20160429163444.GM11700@redhat.com> diff --git a/a/1.txt b/N1/1.txt index 9e839b3..63ab40b 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -21,3 +21,143 @@ because it counts things wrong in reuse_swap_page. Like I said there's room for optimizations so today I tried to optimize more stuff... + +>From 74f1fd7fab71a2cce0d1796fb38241acde2c1224 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli <aarcange@redhat.com> +Date: Fri, 29 Apr 2016 01:05:06 +0200 +Subject: [PATCH 1/1] mm: thp: calculate the mapcount correctly for THP pages + during WP faults + +This will provide fully accuracy to the mapcount calculation in the +write protect faults, so page pinning will not get broken by false +positive copy-on-writes. + +total_mapcount() isn't the right calculation needed in +reuse_swap_page, so this introduces a page_trans_huge_mapcount() that +is effectively the full accurate return value for page_mapcount() if +dealing with Transparent Hugepages, however we only use the +page_trans_huge_mapcount() during COW faults where it strictly needed, +due to its higher runtime cost. + +Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> +--- + include/linux/mm.h | 5 +++++ + include/linux/swap.h | 3 +-- + mm/huge_memory.c | 44 ++++++++++++++++++++++++++++++++++++-------- + mm/swapfile.c | 5 +---- + 4 files changed, 43 insertions(+), 14 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 8fb3604..c2026a1 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -501,11 +501,16 @@ static inline int page_mapcount(struct page *page) + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + int total_mapcount(struct page *page); ++int page_trans_huge_mapcount(struct page *page); + #else + static inline int total_mapcount(struct page *page) + { + return page_mapcount(page); + } ++static inline int page_trans_huge_mapcount(struct page *page) ++{ ++ return page_mapcount(page); ++} + #endif + + static inline struct page *virt_to_head_page(const void *x) +diff --git a/include/linux/swap.h b/include/linux/swap.h +index 2f6478f..905bf8e 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -517,8 +517,7 @@ static inline int swp_swapcount(swp_entry_t entry) + return 0; + } + +-#define reuse_swap_page(page) \ +- (!PageTransCompound(page) && page_mapcount(page) == 1) ++#define reuse_swap_page(page) (page_trans_huge_mapcount(page) == 1) + + static inline int try_to_free_swap(struct page *page) + { +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 06bce0f..6a6d9c0 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1298,15 +1298,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, + VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page); + /* + * We can only reuse the page if nobody else maps the huge page or it's +- * part. We can do it by checking page_mapcount() on each sub-page, but +- * it's expensive. +- * The cheaper way is to check page_count() to be equal 1: every +- * mapcount takes page reference reference, so this way we can +- * guarantee, that the PMD is the only mapping. +- * This can give false negative if somebody pinned the page, but that's +- * fine. ++ * part. + */ +- if (page_mapcount(page) == 1 && page_count(page) == 1) { ++ if (page_trans_huge_mapcount(page) == 1) { + pmd_t entry; + entry = pmd_mkyoung(orig_pmd); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); +@@ -3226,6 +3220,40 @@ int total_mapcount(struct page *page) + } + + /* ++ * This calculates accurately how many mappings a transparent hugepage ++ * has (unlike page_mapcount() which isn't fully accurate). This full ++ * accuracy is primarily needed to know if copy-on-write faults can ++ * takeover the page and change the mapping to read-write instead of ++ * copying them. This is different from total_mapcount() too: we must ++ * not count all mappings on the subpages individually, but instead we ++ * must check the highest mapcount any one of the subpages has. ++ * ++ * It would be entirely safe and even more correct to replace ++ * page_mapcount() with page_trans_huge_mapcount(), however we only ++ * use page_trans_huge_mapcount() in the copy-on-write faults where we ++ * need full accuracy to avoid breaking page pinning. ++ */ ++int page_trans_huge_mapcount(struct page *page) ++{ ++ int i, ret; ++ ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ ++ if (likely(!PageCompound(page))) ++ return atomic_read(&page->_mapcount) + 1; ++ ++ ret = 0; ++ if (likely(!PageHuge(page))) { ++ for (i = 0; i < HPAGE_PMD_NR; i++) ++ ret = max(ret, atomic_read(&page[i]._mapcount) + 1); ++ if (PageDoubleMap(page)) ++ ret -= 1; ++ } ++ ret += compound_mapcount(page); ++ return ret; ++} ++ ++/* + * This function splits huge page into normal pages. @page can point to any + * subpage of huge page to split. Split doesn't change the position of @page. + * +diff --git a/mm/swapfile.c b/mm/swapfile.c +index 83874ec..984470a 100644 +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -930,10 +930,7 @@ int reuse_swap_page(struct page *page) + VM_BUG_ON_PAGE(!PageLocked(page), page); + if (unlikely(PageKsm(page))) + return 0; +- /* The page is part of THP and cannot be reused */ +- if (PageTransCompound(page)) +- return 0; +- count = page_mapcount(page); ++ count = page_trans_huge_mapcount(page); + if (count <= 1 && PageSwapCache(page)) { + count += page_swapcount(page); + if (count == 1 && !PageWriteback(page)) { diff --git a/a/content_digest b/N1/content_digest index 7ef7b4b..5188814 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -37,6 +37,146 @@ "because it counts things wrong in reuse_swap_page.\n" "\n" "Like I said there's room for optimizations so today I tried to\n" - optimize more stuff... + "optimize more stuff...\n" + "\n" + ">From 74f1fd7fab71a2cce0d1796fb38241acde2c1224 Mon Sep 17 00:00:00 2001\n" + "From: Andrea Arcangeli <aarcange@redhat.com>\n" + "Date: Fri, 29 Apr 2016 01:05:06 +0200\n" + "Subject: [PATCH 1/1] mm: thp: calculate the mapcount correctly for THP pages\n" + " during WP faults\n" + "\n" + "This will provide fully accuracy to the mapcount calculation in the\n" + "write protect faults, so page pinning will not get broken by false\n" + "positive copy-on-writes.\n" + "\n" + "total_mapcount() isn't the right calculation needed in\n" + "reuse_swap_page, so this introduces a page_trans_huge_mapcount() that\n" + "is effectively the full accurate return value for page_mapcount() if\n" + "dealing with Transparent Hugepages, however we only use the\n" + "page_trans_huge_mapcount() during COW faults where it strictly needed,\n" + "due to its higher runtime cost.\n" + "\n" + "Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>\n" + "---\n" + " include/linux/mm.h | 5 +++++\n" + " include/linux/swap.h | 3 +--\n" + " mm/huge_memory.c | 44 ++++++++++++++++++++++++++++++++++++--------\n" + " mm/swapfile.c | 5 +----\n" + " 4 files changed, 43 insertions(+), 14 deletions(-)\n" + "\n" + "diff --git a/include/linux/mm.h b/include/linux/mm.h\n" + "index 8fb3604..c2026a1 100644\n" + "--- a/include/linux/mm.h\n" + "+++ b/include/linux/mm.h\n" + "@@ -501,11 +501,16 @@ static inline int page_mapcount(struct page *page)\n" + " \n" + " #ifdef CONFIG_TRANSPARENT_HUGEPAGE\n" + " int total_mapcount(struct page *page);\n" + "+int page_trans_huge_mapcount(struct page *page);\n" + " #else\n" + " static inline int total_mapcount(struct page *page)\n" + " {\n" + " \treturn page_mapcount(page);\n" + " }\n" + "+static inline int page_trans_huge_mapcount(struct page *page)\n" + "+{\n" + "+\treturn page_mapcount(page);\n" + "+}\n" + " #endif\n" + " \n" + " static inline struct page *virt_to_head_page(const void *x)\n" + "diff --git a/include/linux/swap.h b/include/linux/swap.h\n" + "index 2f6478f..905bf8e 100644\n" + "--- a/include/linux/swap.h\n" + "+++ b/include/linux/swap.h\n" + "@@ -517,8 +517,7 @@ static inline int swp_swapcount(swp_entry_t entry)\n" + " \treturn 0;\n" + " }\n" + " \n" + "-#define reuse_swap_page(page) \\\n" + "-\t(!PageTransCompound(page) && page_mapcount(page) == 1)\n" + "+#define reuse_swap_page(page) (page_trans_huge_mapcount(page) == 1)\n" + " \n" + " static inline int try_to_free_swap(struct page *page)\n" + " {\n" + "diff --git a/mm/huge_memory.c b/mm/huge_memory.c\n" + "index 06bce0f..6a6d9c0 100644\n" + "--- a/mm/huge_memory.c\n" + "+++ b/mm/huge_memory.c\n" + "@@ -1298,15 +1298,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,\n" + " \tVM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);\n" + " \t/*\n" + " \t * We can only reuse the page if nobody else maps the huge page or it's\n" + "-\t * part. We can do it by checking page_mapcount() on each sub-page, but\n" + "-\t * it's expensive.\n" + "-\t * The cheaper way is to check page_count() to be equal 1: every\n" + "-\t * mapcount takes page reference reference, so this way we can\n" + "-\t * guarantee, that the PMD is the only mapping.\n" + "-\t * This can give false negative if somebody pinned the page, but that's\n" + "-\t * fine.\n" + "+\t * part.\n" + " \t */\n" + "-\tif (page_mapcount(page) == 1 && page_count(page) == 1) {\n" + "+\tif (page_trans_huge_mapcount(page) == 1) {\n" + " \t\tpmd_t entry;\n" + " \t\tentry = pmd_mkyoung(orig_pmd);\n" + " \t\tentry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);\n" + "@@ -3226,6 +3220,40 @@ int total_mapcount(struct page *page)\n" + " }\n" + " \n" + " /*\n" + "+ * This calculates accurately how many mappings a transparent hugepage\n" + "+ * has (unlike page_mapcount() which isn't fully accurate). This full\n" + "+ * accuracy is primarily needed to know if copy-on-write faults can\n" + "+ * takeover the page and change the mapping to read-write instead of\n" + "+ * copying them. This is different from total_mapcount() too: we must\n" + "+ * not count all mappings on the subpages individually, but instead we\n" + "+ * must check the highest mapcount any one of the subpages has.\n" + "+ *\n" + "+ * It would be entirely safe and even more correct to replace\n" + "+ * page_mapcount() with page_trans_huge_mapcount(), however we only\n" + "+ * use page_trans_huge_mapcount() in the copy-on-write faults where we\n" + "+ * need full accuracy to avoid breaking page pinning.\n" + "+ */\n" + "+int page_trans_huge_mapcount(struct page *page)\n" + "+{\n" + "+\tint i, ret;\n" + "+\n" + "+\tVM_BUG_ON_PAGE(PageTail(page), page);\n" + "+\n" + "+\tif (likely(!PageCompound(page)))\n" + "+\t\treturn atomic_read(&page->_mapcount) + 1;\n" + "+\n" + "+\tret = 0;\n" + "+\tif (likely(!PageHuge(page))) {\n" + "+\t\tfor (i = 0; i < HPAGE_PMD_NR; i++)\n" + "+\t\t\tret = max(ret, atomic_read(&page[i]._mapcount) + 1);\n" + "+\t\tif (PageDoubleMap(page))\n" + "+\t\t\tret -= 1;\n" + "+\t}\n" + "+\tret += compound_mapcount(page);\n" + "+\treturn ret;\n" + "+}\n" + "+\n" + "+/*\n" + " * This function splits huge page into normal pages. @page can point to any\n" + " * subpage of huge page to split. Split doesn't change the position of @page.\n" + " *\n" + "diff --git a/mm/swapfile.c b/mm/swapfile.c\n" + "index 83874ec..984470a 100644\n" + "--- a/mm/swapfile.c\n" + "+++ b/mm/swapfile.c\n" + "@@ -930,10 +930,7 @@ int reuse_swap_page(struct page *page)\n" + " \tVM_BUG_ON_PAGE(!PageLocked(page), page);\n" + " \tif (unlikely(PageKsm(page)))\n" + " \t\treturn 0;\n" + "-\t/* The page is part of THP and cannot be reused */\n" + "-\tif (PageTransCompound(page))\n" + "-\t\treturn 0;\n" + "-\tcount = page_mapcount(page);\n" + "+\tcount = page_trans_huge_mapcount(page);\n" + " \tif (count <= 1 && PageSwapCache(page)) {\n" + " \t\tcount += page_swapcount(page);\n" + " \t\tif (count == 1 && !PageWriteback(page)) {" -b49af10ba3f9372cfbad30e17f54023ab55ba4894ce0b56b698b1257d4a324ca +abaa58ad4b94e86be58dc1487ac1936689f762260495d0bfaf0848346969fb49
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.