diff for duplicates of <20131120002053.GE10493@redhat.com> diff --git a/a/1.txt b/N1/1.txt index 33a0693..58a7d18 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -16,3 +16,187 @@ The current version would look like below, which is less obviously horrible than before :). We may also consider to keep your indent cleanup incremental as it's much easier to review the actual change of code logic the below way. + +>From 8a7bd3b25f7ee69b53a1a9e681790333df9504a6 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli <aarcange@redhat.com> +Date: Thu, 14 Nov 2013 20:17:38 +0100 +Subject: [PATCH] mm: tail page refcounting optimization for slab and hugetlbfs + +This skips the _mapcount mangling for slab and hugetlbfs pages. + +The main trouble in doing this is to guarantee that PageSlab and +PageHeadHuge remains constant for all get_page/put_page run on the +tail of slab or hugetlbfs compound pages. Otherwise if they're set +during get_page but not set during put_page, the _mapcount of the tail +page would underflow. + +PageHeadHuge will remain true until the compound page is released and +enters the buddy allocator so it won't risk to change even if the tail +page is the last reference left on the page. + +PG_slab instead is cleared before the slab frees the head page with +put_page, so if the tail pin is released after the slab freed the +page, we would have a problem. But in the slab case the tail pin +cannot be the last reference left on the page. This is because the +slab code is free to reuse the compound page after a +kfree/kmem_cache_free without having to check if there's any tail pin +left. In turn all tail pins must be always released while the head is +still pinned by the slab code and so we know PG_slab will be still set +too. + +Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> +--- + include/linux/hugetlb.h | 6 ------ + include/linux/mm.h | 32 +++++++++++++++++++++++++++++++- + mm/internal.h | 3 ++- + mm/swap.c | 33 +++++++++++++++++++++++++++------ + 4 files changed, 60 insertions(+), 14 deletions(-) + +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h +index d4f3dbf..acd2010 100644 +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); + void hugepage_put_subpool(struct hugepage_subpool *spool); + + int PageHuge(struct page *page); +-int PageHeadHuge(struct page *page_head); + + void reset_vma_resv_huge_pages(struct vm_area_struct *vma); + int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +@@ -105,11 +104,6 @@ static inline int PageHuge(struct page *page) + return 0; + } + +-static inline int PageHeadHuge(struct page *page_head) +-{ +- return 0; +-} +- + static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) + { + } +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 0548eb2..6b20b34 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -414,15 +414,45 @@ static inline int page_count(struct page *page) + return atomic_read(&compound_head(page)->_count); + } + ++#ifdef CONFIG_HUGETLB_PAGE ++extern int PageHeadHuge(struct page *page_head); ++#else /* CONFIG_HUGETLB_PAGE */ ++static inline int PageHeadHuge(struct page *page_head) ++{ ++ return 0; ++} ++#endif /* CONFIG_HUGETLB_PAGE */ ++ ++static inline bool __compound_tail_refcounted(struct page *page) ++{ ++ return !PageSlab(page) && !PageHeadHuge(page); ++} ++ ++/* ++ * This takes a head page as parameter and tells if the ++ * tail page reference counting can be skipped. ++ * ++ * For this to be safe, PageSlab and PageHeadHuge must remain true on ++ * any given page where they return true here, until all tail pins ++ * have been released. ++ */ ++static inline bool compound_tail_refcounted(struct page *page) ++{ ++ VM_BUG_ON(!PageHead(page)); ++ return __compound_tail_refcounted(page); ++} ++ + static inline void get_huge_page_tail(struct page *page) + { + /* + * __split_huge_page_refcount() cannot run + * from under us. ++ * In turn no need of compound_trans_head here. + */ + VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); +- atomic_inc(&page->_mapcount); ++ if (compound_tail_refcounted(compound_head(page))) ++ atomic_inc(&page->_mapcount); + } + + extern bool __get_page_tail(struct page *page); +diff --git a/mm/internal.h b/mm/internal.h +index 684f7aa..a85a3ab 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -51,7 +51,8 @@ static inline void __get_page_tail_foll(struct page *page, + VM_BUG_ON(page_mapcount(page) < 0); + if (get_page_head) + atomic_inc(&page->first_page->_count); +- atomic_inc(&page->_mapcount); ++ if (compound_tail_refcounted(page->first_page)) ++ atomic_inc(&page->_mapcount); + } + + /* +diff --git a/mm/swap.c b/mm/swap.c +index dbf5427..b4c49bf 100644 +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -88,8 +88,9 @@ static void put_compound_page(struct page *page) + + /* + * THP can not break up slab pages so avoid taking +- * compound_lock(). Slab performs non-atomic bit ops +- * on page->flags for better performance. In ++ * compound_lock() and skip the tail page refcounting ++ * (in _mapcount) too. Slab performs non-atomic bit ++ * ops on page->flags for better performance. In + * particular slab_unlock() in slub used to be a hot + * path. It is still hot on arches that do not support + * this_cpu_cmpxchg_double(). +@@ -102,7 +103,7 @@ static void put_compound_page(struct page *page) + * PageTail clear after smp_rmb() and we'll threat it + * as a single page. + */ +- if (PageSlab(page_head) || PageHeadHuge(page_head)) { ++ if (!__compound_tail_refcounted(page_head)) { + /* + * If "page" is a THP tail, we must read the tail page + * flags after the head page flags. The +@@ -117,10 +118,30 @@ static void put_compound_page(struct page *page) + * cannot race here. + */ + VM_BUG_ON(!PageHead(page_head)); +- VM_BUG_ON(page_mapcount(page) <= 0); +- atomic_dec(&page->_mapcount); +- if (put_page_testzero(page_head)) ++ VM_BUG_ON(page_mapcount(page) != 0); ++ if (put_page_testzero(page_head)) { ++ /* ++ * If this is the tail of a ++ * slab compound page, the ++ * tail pin must not be the ++ * last reference held on the ++ * page, because the PG_slab ++ * cannot be cleared before ++ * all tail pins (which skips ++ * the _mapcount tail ++ * refcounting) have been ++ * released. For hugetlbfs the ++ * tail pin may be the last ++ * reference on the page ++ * instead, because ++ * PageHeadHuge will not go ++ * away until the compound ++ * page enters the buddy ++ * allocator. ++ */ ++ VM_BUG_ON(PageSlab(page_head)); + __put_compound_page(page_head); ++ } + return; + } else + /* diff --git a/a/content_digest b/N1/content_digest index 166ed53..f0f6877 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -37,6 +37,190 @@ "The current version would look like below, which is less obviously\n" "horrible than before :). We may also consider to keep your indent\n" "cleanup incremental as it's much easier to review the actual change of\n" - code logic the below way. + "code logic the below way.\n" + "\n" + ">From 8a7bd3b25f7ee69b53a1a9e681790333df9504a6 Mon Sep 17 00:00:00 2001\n" + "From: Andrea Arcangeli <aarcange@redhat.com>\n" + "Date: Thu, 14 Nov 2013 20:17:38 +0100\n" + "Subject: [PATCH] mm: tail page refcounting optimization for slab and hugetlbfs\n" + "\n" + "This skips the _mapcount mangling for slab and hugetlbfs pages.\n" + "\n" + "The main trouble in doing this is to guarantee that PageSlab and\n" + "PageHeadHuge remains constant for all get_page/put_page run on the\n" + "tail of slab or hugetlbfs compound pages. Otherwise if they're set\n" + "during get_page but not set during put_page, the _mapcount of the tail\n" + "page would underflow.\n" + "\n" + "PageHeadHuge will remain true until the compound page is released and\n" + "enters the buddy allocator so it won't risk to change even if the tail\n" + "page is the last reference left on the page.\n" + "\n" + "PG_slab instead is cleared before the slab frees the head page with\n" + "put_page, so if the tail pin is released after the slab freed the\n" + "page, we would have a problem. But in the slab case the tail pin\n" + "cannot be the last reference left on the page. This is because the\n" + "slab code is free to reuse the compound page after a\n" + "kfree/kmem_cache_free without having to check if there's any tail pin\n" + "left. In turn all tail pins must be always released while the head is\n" + "still pinned by the slab code and so we know PG_slab will be still set\n" + "too.\n" + "\n" + "Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>\n" + "---\n" + " include/linux/hugetlb.h | 6 ------\n" + " include/linux/mm.h | 32 +++++++++++++++++++++++++++++++-\n" + " mm/internal.h | 3 ++-\n" + " mm/swap.c | 33 +++++++++++++++++++++++++++------\n" + " 4 files changed, 60 insertions(+), 14 deletions(-)\n" + "\n" + "diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h\n" + "index d4f3dbf..acd2010 100644\n" + "--- a/include/linux/hugetlb.h\n" + "+++ b/include/linux/hugetlb.h\n" + "@@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);\n" + " void hugepage_put_subpool(struct hugepage_subpool *spool);\n" + " \n" + " int PageHuge(struct page *page);\n" + "-int PageHeadHuge(struct page *page_head);\n" + " \n" + " void reset_vma_resv_huge_pages(struct vm_area_struct *vma);\n" + " int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);\n" + "@@ -105,11 +104,6 @@ static inline int PageHuge(struct page *page)\n" + " \treturn 0;\n" + " }\n" + " \n" + "-static inline int PageHeadHuge(struct page *page_head)\n" + "-{\n" + "-\treturn 0;\n" + "-}\n" + "-\n" + " static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)\n" + " {\n" + " }\n" + "diff --git a/include/linux/mm.h b/include/linux/mm.h\n" + "index 0548eb2..6b20b34 100644\n" + "--- a/include/linux/mm.h\n" + "+++ b/include/linux/mm.h\n" + "@@ -414,15 +414,45 @@ static inline int page_count(struct page *page)\n" + " \treturn atomic_read(&compound_head(page)->_count);\n" + " }\n" + " \n" + "+#ifdef CONFIG_HUGETLB_PAGE\n" + "+extern int PageHeadHuge(struct page *page_head);\n" + "+#else /* CONFIG_HUGETLB_PAGE */\n" + "+static inline int PageHeadHuge(struct page *page_head)\n" + "+{\n" + "+\treturn 0;\n" + "+}\n" + "+#endif /* CONFIG_HUGETLB_PAGE */\n" + "+\n" + "+static inline bool __compound_tail_refcounted(struct page *page)\n" + "+{\n" + "+\treturn !PageSlab(page) && !PageHeadHuge(page);\n" + "+}\n" + "+\n" + "+/*\n" + "+ * This takes a head page as parameter and tells if the\n" + "+ * tail page reference counting can be skipped.\n" + "+ *\n" + "+ * For this to be safe, PageSlab and PageHeadHuge must remain true on\n" + "+ * any given page where they return true here, until all tail pins\n" + "+ * have been released.\n" + "+ */\n" + "+static inline bool compound_tail_refcounted(struct page *page)\n" + "+{\n" + "+\tVM_BUG_ON(!PageHead(page));\n" + "+\treturn __compound_tail_refcounted(page);\n" + "+}\n" + "+\n" + " static inline void get_huge_page_tail(struct page *page)\n" + " {\n" + " \t/*\n" + " \t * __split_huge_page_refcount() cannot run\n" + " \t * from under us.\n" + "+\t * In turn no need of compound_trans_head here.\n" + " \t */\n" + " \tVM_BUG_ON(page_mapcount(page) < 0);\n" + " \tVM_BUG_ON(atomic_read(&page->_count) != 0);\n" + "-\tatomic_inc(&page->_mapcount);\n" + "+\tif (compound_tail_refcounted(compound_head(page)))\n" + "+\t\tatomic_inc(&page->_mapcount);\n" + " }\n" + " \n" + " extern bool __get_page_tail(struct page *page);\n" + "diff --git a/mm/internal.h b/mm/internal.h\n" + "index 684f7aa..a85a3ab 100644\n" + "--- a/mm/internal.h\n" + "+++ b/mm/internal.h\n" + "@@ -51,7 +51,8 @@ static inline void __get_page_tail_foll(struct page *page,\n" + " \tVM_BUG_ON(page_mapcount(page) < 0);\n" + " \tif (get_page_head)\n" + " \t\tatomic_inc(&page->first_page->_count);\n" + "-\tatomic_inc(&page->_mapcount);\n" + "+\tif (compound_tail_refcounted(page->first_page))\n" + "+\t\tatomic_inc(&page->_mapcount);\n" + " }\n" + " \n" + " /*\n" + "diff --git a/mm/swap.c b/mm/swap.c\n" + "index dbf5427..b4c49bf 100644\n" + "--- a/mm/swap.c\n" + "+++ b/mm/swap.c\n" + "@@ -88,8 +88,9 @@ static void put_compound_page(struct page *page)\n" + " \n" + " \t\t/*\n" + " \t\t * THP can not break up slab pages so avoid taking\n" + "-\t\t * compound_lock(). Slab performs non-atomic bit ops\n" + "-\t\t * on page->flags for better performance. In\n" + "+\t\t * compound_lock() and skip the tail page refcounting\n" + "+\t\t * (in _mapcount) too. Slab performs non-atomic bit\n" + "+\t\t * ops on page->flags for better performance. In\n" + " \t\t * particular slab_unlock() in slub used to be a hot\n" + " \t\t * path. It is still hot on arches that do not support\n" + " \t\t * this_cpu_cmpxchg_double().\n" + "@@ -102,7 +103,7 @@ static void put_compound_page(struct page *page)\n" + " \t\t * PageTail clear after smp_rmb() and we'll threat it\n" + " \t\t * as a single page.\n" + " \t\t */\n" + "-\t\tif (PageSlab(page_head) || PageHeadHuge(page_head)) {\n" + "+\t\tif (!__compound_tail_refcounted(page_head)) {\n" + " \t\t\t/*\n" + " \t\t\t * If \"page\" is a THP tail, we must read the tail page\n" + " \t\t\t * flags after the head page flags. The\n" + "@@ -117,10 +118,30 @@ static void put_compound_page(struct page *page)\n" + " \t\t\t\t * cannot race here.\n" + " \t\t\t\t */\n" + " \t\t\t\tVM_BUG_ON(!PageHead(page_head));\n" + "-\t\t\t\tVM_BUG_ON(page_mapcount(page) <= 0);\n" + "-\t\t\t\tatomic_dec(&page->_mapcount);\n" + "-\t\t\t\tif (put_page_testzero(page_head))\n" + "+\t\t\t\tVM_BUG_ON(page_mapcount(page) != 0);\n" + "+\t\t\t\tif (put_page_testzero(page_head)) {\n" + "+\t\t\t\t\t/*\n" + "+\t\t\t\t\t * If this is the tail of a\n" + "+\t\t\t\t\t * slab compound page, the\n" + "+\t\t\t\t\t * tail pin must not be the\n" + "+\t\t\t\t\t * last reference held on the\n" + "+\t\t\t\t\t * page, because the PG_slab\n" + "+\t\t\t\t\t * cannot be cleared before\n" + "+\t\t\t\t\t * all tail pins (which skips\n" + "+\t\t\t\t\t * the _mapcount tail\n" + "+\t\t\t\t\t * refcounting) have been\n" + "+\t\t\t\t\t * released. For hugetlbfs the\n" + "+\t\t\t\t\t * tail pin may be the last\n" + "+\t\t\t\t\t * reference on the page\n" + "+\t\t\t\t\t * instead, because\n" + "+\t\t\t\t\t * PageHeadHuge will not go\n" + "+\t\t\t\t\t * away until the compound\n" + "+\t\t\t\t\t * page enters the buddy\n" + "+\t\t\t\t\t * allocator.\n" + "+\t\t\t\t\t */\n" + "+\t\t\t\t\tVM_BUG_ON(PageSlab(page_head));\n" + " \t\t\t\t\t__put_compound_page(page_head);\n" + "+\t\t\t\t}\n" + " \t\t\t\treturn;\n" + " \t\t\t} else\n" + " \t\t\t\t/*" -8e2ece3c46adc7aa2b082ce188498e0f55adaf1fbc007ea67cec5203c7014b4e +198d0cd3cac2bcd41e7221dfc28afddaad651344d418beb1af77404fd79e8068
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.