All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20131120002053.GE10493@redhat.com>

diff --git a/a/1.txt b/N1/1.txt
index 33a0693..58a7d18 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -16,3 +16,187 @@ The current version would look like below, which is less obviously
 horrible than before :). We may also consider to keep your indent
 cleanup incremental as it's much easier to review the actual change of
 code logic the below way.
+
+>From 8a7bd3b25f7ee69b53a1a9e681790333df9504a6 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Thu, 14 Nov 2013 20:17:38 +0100
+Subject: [PATCH] mm: tail page refcounting optimization for slab and hugetlbfs
+
+This skips the _mapcount mangling for slab and hugetlbfs pages.
+
+The main trouble in doing this is to guarantee that PageSlab and
+PageHeadHuge remains constant for all get_page/put_page run on the
+tail of slab or hugetlbfs compound pages. Otherwise if they're set
+during get_page but not set during put_page, the _mapcount of the tail
+page would underflow.
+
+PageHeadHuge will remain true until the compound page is released and
+enters the buddy allocator so it won't risk to change even if the tail
+page is the last reference left on the page.
+
+PG_slab instead is cleared before the slab frees the head page with
+put_page, so if the tail pin is released after the slab freed the
+page, we would have a problem. But in the slab case the tail pin
+cannot be the last reference left on the page. This is because the
+slab code is free to reuse the compound page after a
+kfree/kmem_cache_free without having to check if there's any tail pin
+left. In turn all tail pins must be always released while the head is
+still pinned by the slab code and so we know PG_slab will be still set
+too.
+
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+---
+ include/linux/hugetlb.h |  6 ------
+ include/linux/mm.h      | 32 +++++++++++++++++++++++++++++++-
+ mm/internal.h           |  3 ++-
+ mm/swap.c               | 33 +++++++++++++++++++++++++++------
+ 4 files changed, 60 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index d4f3dbf..acd2010 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
+ void hugepage_put_subpool(struct hugepage_subpool *spool);
+ 
+ int PageHuge(struct page *page);
+-int PageHeadHuge(struct page *page_head);
+ 
+ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
+ int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+@@ -105,11 +104,6 @@ static inline int PageHuge(struct page *page)
+ 	return 0;
+ }
+ 
+-static inline int PageHeadHuge(struct page *page_head)
+-{
+-	return 0;
+-}
+-
+ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+ {
+ }
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 0548eb2..6b20b34 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -414,15 +414,45 @@ static inline int page_count(struct page *page)
+ 	return atomic_read(&compound_head(page)->_count);
+ }
+ 
++#ifdef CONFIG_HUGETLB_PAGE
++extern int PageHeadHuge(struct page *page_head);
++#else /* CONFIG_HUGETLB_PAGE */
++static inline int PageHeadHuge(struct page *page_head)
++{
++	return 0;
++}
++#endif /* CONFIG_HUGETLB_PAGE */
++
++static inline bool __compound_tail_refcounted(struct page *page)
++{
++	return !PageSlab(page) && !PageHeadHuge(page);
++}
++
++/*
++ * This takes a head page as parameter and tells if the
++ * tail page reference counting can be skipped.
++ *
++ * For this to be safe, PageSlab and PageHeadHuge must remain true on
++ * any given page where they return true here, until all tail pins
++ * have been released.
++ */
++static inline bool compound_tail_refcounted(struct page *page)
++{
++	VM_BUG_ON(!PageHead(page));
++	return __compound_tail_refcounted(page);
++}
++
+ static inline void get_huge_page_tail(struct page *page)
+ {
+ 	/*
+ 	 * __split_huge_page_refcount() cannot run
+ 	 * from under us.
++	 * In turn no need of compound_trans_head here.
+ 	 */
+ 	VM_BUG_ON(page_mapcount(page) < 0);
+ 	VM_BUG_ON(atomic_read(&page->_count) != 0);
+-	atomic_inc(&page->_mapcount);
++	if (compound_tail_refcounted(compound_head(page)))
++		atomic_inc(&page->_mapcount);
+ }
+ 
+ extern bool __get_page_tail(struct page *page);
+diff --git a/mm/internal.h b/mm/internal.h
+index 684f7aa..a85a3ab 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -51,7 +51,8 @@ static inline void __get_page_tail_foll(struct page *page,
+ 	VM_BUG_ON(page_mapcount(page) < 0);
+ 	if (get_page_head)
+ 		atomic_inc(&page->first_page->_count);
+-	atomic_inc(&page->_mapcount);
++	if (compound_tail_refcounted(page->first_page))
++		atomic_inc(&page->_mapcount);
+ }
+ 
+ /*
+diff --git a/mm/swap.c b/mm/swap.c
+index dbf5427..b4c49bf 100644
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -88,8 +88,9 @@ static void put_compound_page(struct page *page)
+ 
+ 		/*
+ 		 * THP can not break up slab pages so avoid taking
+-		 * compound_lock(). Slab performs non-atomic bit ops
+-		 * on page->flags for better performance. In
++		 * compound_lock() and skip the tail page refcounting
++		 * (in _mapcount) too. Slab performs non-atomic bit
++		 * ops on page->flags for better performance. In
+ 		 * particular slab_unlock() in slub used to be a hot
+ 		 * path. It is still hot on arches that do not support
+ 		 * this_cpu_cmpxchg_double().
+@@ -102,7 +103,7 @@ static void put_compound_page(struct page *page)
+ 		 * PageTail clear after smp_rmb() and we'll threat it
+ 		 * as a single page.
+ 		 */
+-		if (PageSlab(page_head) || PageHeadHuge(page_head)) {
++		if (!__compound_tail_refcounted(page_head)) {
+ 			/*
+ 			 * If "page" is a THP tail, we must read the tail page
+ 			 * flags after the head page flags. The
+@@ -117,10 +118,30 @@ static void put_compound_page(struct page *page)
+ 				 * cannot race here.
+ 				 */
+ 				VM_BUG_ON(!PageHead(page_head));
+-				VM_BUG_ON(page_mapcount(page) <= 0);
+-				atomic_dec(&page->_mapcount);
+-				if (put_page_testzero(page_head))
++				VM_BUG_ON(page_mapcount(page) != 0);
++				if (put_page_testzero(page_head)) {
++					/*
++					 * If this is the tail of a
++					 * slab compound page, the
++					 * tail pin must not be the
++					 * last reference held on the
++					 * page, because the PG_slab
++					 * cannot be cleared before
++					 * all tail pins (which skips
++					 * the _mapcount tail
++					 * refcounting) have been
++					 * released. For hugetlbfs the
++					 * tail pin may be the last
++					 * reference on the page
++					 * instead, because
++					 * PageHeadHuge will not go
++					 * away until the compound
++					 * page enters the buddy
++					 * allocator.
++					 */
++					VM_BUG_ON(PageSlab(page_head));
+ 					__put_compound_page(page_head);
++				}
+ 				return;
+ 			} else
+ 				/*
diff --git a/a/content_digest b/N1/content_digest
index 166ed53..f0f6877 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -37,6 +37,190 @@
  "The current version would look like below, which is less obviously\n"
  "horrible than before :). We may also consider to keep your indent\n"
  "cleanup incremental as it's much easier to review the actual change of\n"
- code logic the below way.
+ "code logic the below way.\n"
+ "\n"
+ ">From 8a7bd3b25f7ee69b53a1a9e681790333df9504a6 Mon Sep 17 00:00:00 2001\n"
+ "From: Andrea Arcangeli <aarcange@redhat.com>\n"
+ "Date: Thu, 14 Nov 2013 20:17:38 +0100\n"
+ "Subject: [PATCH] mm: tail page refcounting optimization for slab and hugetlbfs\n"
+ "\n"
+ "This skips the _mapcount mangling for slab and hugetlbfs pages.\n"
+ "\n"
+ "The main trouble in doing this is to guarantee that PageSlab and\n"
+ "PageHeadHuge remains constant for all get_page/put_page run on the\n"
+ "tail of slab or hugetlbfs compound pages. Otherwise if they're set\n"
+ "during get_page but not set during put_page, the _mapcount of the tail\n"
+ "page would underflow.\n"
+ "\n"
+ "PageHeadHuge will remain true until the compound page is released and\n"
+ "enters the buddy allocator so it won't risk to change even if the tail\n"
+ "page is the last reference left on the page.\n"
+ "\n"
+ "PG_slab instead is cleared before the slab frees the head page with\n"
+ "put_page, so if the tail pin is released after the slab freed the\n"
+ "page, we would have a problem. But in the slab case the tail pin\n"
+ "cannot be the last reference left on the page. This is because the\n"
+ "slab code is free to reuse the compound page after a\n"
+ "kfree/kmem_cache_free without having to check if there's any tail pin\n"
+ "left. In turn all tail pins must be always released while the head is\n"
+ "still pinned by the slab code and so we know PG_slab will be still set\n"
+ "too.\n"
+ "\n"
+ "Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>\n"
+ "---\n"
+ " include/linux/hugetlb.h |  6 ------\n"
+ " include/linux/mm.h      | 32 +++++++++++++++++++++++++++++++-\n"
+ " mm/internal.h           |  3 ++-\n"
+ " mm/swap.c               | 33 +++++++++++++++++++++++++++------\n"
+ " 4 files changed, 60 insertions(+), 14 deletions(-)\n"
+ "\n"
+ "diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h\n"
+ "index d4f3dbf..acd2010 100644\n"
+ "--- a/include/linux/hugetlb.h\n"
+ "+++ b/include/linux/hugetlb.h\n"
+ "@@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);\n"
+ " void hugepage_put_subpool(struct hugepage_subpool *spool);\n"
+ " \n"
+ " int PageHuge(struct page *page);\n"
+ "-int PageHeadHuge(struct page *page_head);\n"
+ " \n"
+ " void reset_vma_resv_huge_pages(struct vm_area_struct *vma);\n"
+ " int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);\n"
+ "@@ -105,11 +104,6 @@ static inline int PageHuge(struct page *page)\n"
+ " \treturn 0;\n"
+ " }\n"
+ " \n"
+ "-static inline int PageHeadHuge(struct page *page_head)\n"
+ "-{\n"
+ "-\treturn 0;\n"
+ "-}\n"
+ "-\n"
+ " static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)\n"
+ " {\n"
+ " }\n"
+ "diff --git a/include/linux/mm.h b/include/linux/mm.h\n"
+ "index 0548eb2..6b20b34 100644\n"
+ "--- a/include/linux/mm.h\n"
+ "+++ b/include/linux/mm.h\n"
+ "@@ -414,15 +414,45 @@ static inline int page_count(struct page *page)\n"
+ " \treturn atomic_read(&compound_head(page)->_count);\n"
+ " }\n"
+ " \n"
+ "+#ifdef CONFIG_HUGETLB_PAGE\n"
+ "+extern int PageHeadHuge(struct page *page_head);\n"
+ "+#else /* CONFIG_HUGETLB_PAGE */\n"
+ "+static inline int PageHeadHuge(struct page *page_head)\n"
+ "+{\n"
+ "+\treturn 0;\n"
+ "+}\n"
+ "+#endif /* CONFIG_HUGETLB_PAGE */\n"
+ "+\n"
+ "+static inline bool __compound_tail_refcounted(struct page *page)\n"
+ "+{\n"
+ "+\treturn !PageSlab(page) && !PageHeadHuge(page);\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * This takes a head page as parameter and tells if the\n"
+ "+ * tail page reference counting can be skipped.\n"
+ "+ *\n"
+ "+ * For this to be safe, PageSlab and PageHeadHuge must remain true on\n"
+ "+ * any given page where they return true here, until all tail pins\n"
+ "+ * have been released.\n"
+ "+ */\n"
+ "+static inline bool compound_tail_refcounted(struct page *page)\n"
+ "+{\n"
+ "+\tVM_BUG_ON(!PageHead(page));\n"
+ "+\treturn __compound_tail_refcounted(page);\n"
+ "+}\n"
+ "+\n"
+ " static inline void get_huge_page_tail(struct page *page)\n"
+ " {\n"
+ " \t/*\n"
+ " \t * __split_huge_page_refcount() cannot run\n"
+ " \t * from under us.\n"
+ "+\t * In turn no need of compound_trans_head here.\n"
+ " \t */\n"
+ " \tVM_BUG_ON(page_mapcount(page) < 0);\n"
+ " \tVM_BUG_ON(atomic_read(&page->_count) != 0);\n"
+ "-\tatomic_inc(&page->_mapcount);\n"
+ "+\tif (compound_tail_refcounted(compound_head(page)))\n"
+ "+\t\tatomic_inc(&page->_mapcount);\n"
+ " }\n"
+ " \n"
+ " extern bool __get_page_tail(struct page *page);\n"
+ "diff --git a/mm/internal.h b/mm/internal.h\n"
+ "index 684f7aa..a85a3ab 100644\n"
+ "--- a/mm/internal.h\n"
+ "+++ b/mm/internal.h\n"
+ "@@ -51,7 +51,8 @@ static inline void __get_page_tail_foll(struct page *page,\n"
+ " \tVM_BUG_ON(page_mapcount(page) < 0);\n"
+ " \tif (get_page_head)\n"
+ " \t\tatomic_inc(&page->first_page->_count);\n"
+ "-\tatomic_inc(&page->_mapcount);\n"
+ "+\tif (compound_tail_refcounted(page->first_page))\n"
+ "+\t\tatomic_inc(&page->_mapcount);\n"
+ " }\n"
+ " \n"
+ " /*\n"
+ "diff --git a/mm/swap.c b/mm/swap.c\n"
+ "index dbf5427..b4c49bf 100644\n"
+ "--- a/mm/swap.c\n"
+ "+++ b/mm/swap.c\n"
+ "@@ -88,8 +88,9 @@ static void put_compound_page(struct page *page)\n"
+ " \n"
+ " \t\t/*\n"
+ " \t\t * THP can not break up slab pages so avoid taking\n"
+ "-\t\t * compound_lock(). Slab performs non-atomic bit ops\n"
+ "-\t\t * on page->flags for better performance. In\n"
+ "+\t\t * compound_lock() and skip the tail page refcounting\n"
+ "+\t\t * (in _mapcount) too. Slab performs non-atomic bit\n"
+ "+\t\t * ops on page->flags for better performance. In\n"
+ " \t\t * particular slab_unlock() in slub used to be a hot\n"
+ " \t\t * path. It is still hot on arches that do not support\n"
+ " \t\t * this_cpu_cmpxchg_double().\n"
+ "@@ -102,7 +103,7 @@ static void put_compound_page(struct page *page)\n"
+ " \t\t * PageTail clear after smp_rmb() and we'll threat it\n"
+ " \t\t * as a single page.\n"
+ " \t\t */\n"
+ "-\t\tif (PageSlab(page_head) || PageHeadHuge(page_head)) {\n"
+ "+\t\tif (!__compound_tail_refcounted(page_head)) {\n"
+ " \t\t\t/*\n"
+ " \t\t\t * If \"page\" is a THP tail, we must read the tail page\n"
+ " \t\t\t * flags after the head page flags. The\n"
+ "@@ -117,10 +118,30 @@ static void put_compound_page(struct page *page)\n"
+ " \t\t\t\t * cannot race here.\n"
+ " \t\t\t\t */\n"
+ " \t\t\t\tVM_BUG_ON(!PageHead(page_head));\n"
+ "-\t\t\t\tVM_BUG_ON(page_mapcount(page) <= 0);\n"
+ "-\t\t\t\tatomic_dec(&page->_mapcount);\n"
+ "-\t\t\t\tif (put_page_testzero(page_head))\n"
+ "+\t\t\t\tVM_BUG_ON(page_mapcount(page) != 0);\n"
+ "+\t\t\t\tif (put_page_testzero(page_head)) {\n"
+ "+\t\t\t\t\t/*\n"
+ "+\t\t\t\t\t * If this is the tail of a\n"
+ "+\t\t\t\t\t * slab compound page, the\n"
+ "+\t\t\t\t\t * tail pin must not be the\n"
+ "+\t\t\t\t\t * last reference held on the\n"
+ "+\t\t\t\t\t * page, because the PG_slab\n"
+ "+\t\t\t\t\t * cannot be cleared before\n"
+ "+\t\t\t\t\t * all tail pins (which skips\n"
+ "+\t\t\t\t\t * the _mapcount tail\n"
+ "+\t\t\t\t\t * refcounting) have been\n"
+ "+\t\t\t\t\t * released. For hugetlbfs the\n"
+ "+\t\t\t\t\t * tail pin may be the last\n"
+ "+\t\t\t\t\t * reference on the page\n"
+ "+\t\t\t\t\t * instead, because\n"
+ "+\t\t\t\t\t * PageHeadHuge will not go\n"
+ "+\t\t\t\t\t * away until the compound\n"
+ "+\t\t\t\t\t * page enters the buddy\n"
+ "+\t\t\t\t\t * allocator.\n"
+ "+\t\t\t\t\t */\n"
+ "+\t\t\t\t\tVM_BUG_ON(PageSlab(page_head));\n"
+ " \t\t\t\t\t__put_compound_page(page_head);\n"
+ "+\t\t\t\t}\n"
+ " \t\t\t\treturn;\n"
+ " \t\t\t} else\n"
+ " \t\t\t\t/*"
 
-8e2ece3c46adc7aa2b082ce188498e0f55adaf1fbc007ea67cec5203c7014b4e
+198d0cd3cac2bcd41e7221dfc28afddaad651344d418beb1af77404fd79e8068

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.