diff for duplicates of <20140924210322.GA11017@cmpxchg.org> diff --git a/a/1.txt b/N1/1.txt index 5523308..63e5799 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -44,176 +44,3 @@ Even then, the end result is more concise and busts the lock where it's actually taken, making the whole thing a bit more obvious: --- -From 367f3dcf25141ff6c30400c00ec09cc3c5486f94 Mon Sep 17 00:00:00 2001 -From: Michal Hocko <mhocko@suse.cz> -Date: Fri, 5 Sep 2014 11:16:17 +0200 -Subject: [patch] mm: memcontrol: do not kill uncharge batching in - free_pages_and_swap_cache - -free_pages_and_swap_cache limits release_pages to PAGEVEC_SIZE chunks. -This is not a big deal for the normal release path but it completely -kills memcg uncharge batching which reduces res_counter spin_lock -contention. Dave has noticed this with his page fault scalability test -case on a large machine when the lock was basically dominating on all -CPUs: - 80.18% 80.18% [kernel] [k] _raw_spin_lock - | - --- _raw_spin_lock - | - |--66.59%-- res_counter_uncharge_until - | res_counter_uncharge - | uncharge_batch - | uncharge_list - | mem_cgroup_uncharge_list - | release_pages - | free_pages_and_swap_cache - | tlb_flush_mmu_free - | | - | |--90.12%-- unmap_single_vma - | | unmap_vmas - | | unmap_region - | | do_munmap - | | vm_munmap - | | sys_munmap - | | system_call_fastpath - | | __GI___munmap - | | - | --9.88%-- tlb_flush_mmu - | tlb_finish_mmu - | unmap_region - | do_munmap - | vm_munmap - | sys_munmap - | system_call_fastpath - | __GI___munmap - -In his case the load was running in the root memcg and that part -has been handled by reverting 05b843012335 ("mm: memcontrol: use -root_mem_cgroup res_counter") because this is a clear regression, -but the problem remains inside dedicated memcgs. - -There is no reason to limit release_pages to PAGEVEC_SIZE batches other -than lru_lock held times. This logic, however, can be moved inside the -function. mem_cgroup_uncharge_list and free_hot_cold_page_list do not -hold any lock for the whole pages_to_free list so it is safe to call -them in a single run. - -In release_pages, break the lock at least every SWAP_CLUSTER_MAX (32) -pages, then remove the batching from free_pages_and_swap_cache. - -Also update the grossly out-of-date release_pages documentation. - -Reported-by: Dave Hansen <dave@sr71.net> -Signed-off-by: Michal Hocko <mhocko@suse.cz> -Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> ---- - mm/swap.c | 31 ++++++++++++++++++++----------- - mm/swap_state.c | 14 ++++---------- - 2 files changed, 24 insertions(+), 21 deletions(-) - -diff --git a/mm/swap.c b/mm/swap.c -index 6b2dc3897cd5..39affa1932ce 100644 ---- a/mm/swap.c -+++ b/mm/swap.c -@@ -887,18 +887,14 @@ void lru_add_drain_all(void) - mutex_unlock(&lock); - } - --/* -- * Batched page_cache_release(). Decrement the reference count on all the -- * passed pages. If it fell to zero then remove the page from the LRU and -- * free it. -- * -- * Avoid taking zone->lru_lock if possible, but if it is taken, retain it -- * for the remainder of the operation. -+/** -+ * release_pages - batched page_cache_release() -+ * @pages: array of pages to release -+ * @nr: number of pages -+ * @cold: whether the pages are cache cold - * -- * The locking in this function is against shrink_inactive_list(): we recheck -- * the page count inside the lock to see whether shrink_inactive_list() -- * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() -- * will free it. -+ * Decrement the reference count on all the pages in @pages. If it -+ * fell to zero, remove the page from the LRU and free it. - */ - void release_pages(struct page **pages, int nr, bool cold) - { -@@ -907,6 +903,7 @@ void release_pages(struct page **pages, int nr, bool cold) - struct zone *zone = NULL; - struct lruvec *lruvec; - unsigned long uninitialized_var(flags); -+ unsigned int uninitialized_var(lock_batch); - - for (i = 0; i < nr; i++) { - struct page *page = pages[i]; -@@ -914,6 +911,7 @@ void release_pages(struct page **pages, int nr, bool cold) - if (unlikely(PageCompound(page))) { - if (zone) { - spin_unlock_irqrestore(&zone->lru_lock, flags); -+ lock_batch = 0; - zone = NULL; - } - put_compound_page(page); -@@ -930,6 +928,7 @@ void release_pages(struct page **pages, int nr, bool cold) - if (zone) - spin_unlock_irqrestore(&zone->lru_lock, - flags); -+ lock_batch = 0; - zone = pagezone; - spin_lock_irqsave(&zone->lru_lock, flags); - } -@@ -938,6 +937,16 @@ void release_pages(struct page **pages, int nr, bool cold) - VM_BUG_ON_PAGE(!PageLRU(page), page); - __ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, page_off_lru(page)); -+ -+ /* -+ * Make sure the IRQ-safe lock-holding time -+ * does not get excessive with a continuous -+ * string of pages from the same zone. -+ */ -+ if (++lock_batch == SWAP_CLUSTER_MAX) { -+ spin_unlock_irqrestore(&zone->lru_lock, flags); -+ zone = NULL; -+ } - } - - /* Clear Active bit in case of parallel mark_page_accessed */ -diff --git a/mm/swap_state.c b/mm/swap_state.c -index ef1f39139b71..154444918685 100644 ---- a/mm/swap_state.c -+++ b/mm/swap_state.c -@@ -265,18 +265,12 @@ void free_page_and_swap_cache(struct page *page) - void free_pages_and_swap_cache(struct page **pages, int nr) - { - struct page **pagep = pages; -+ int i; - - lru_add_drain(); -- while (nr) { -- int todo = min(nr, PAGEVEC_SIZE); -- int i; -- -- for (i = 0; i < todo; i++) -- free_swap_cache(pagep[i]); -- release_pages(pagep, todo, false); -- pagep += todo; -- nr -= todo; -- } -+ for (i = 0; i < nr; i++) -+ free_swap_cache(pagep[i]); -+ release_pages(pagep, nr, false); - } - - /* --- -2.1.0 - --- -To unsubscribe, send a message with 'unsubscribe linux-mm' in -the body to majordomo@kvack.org. For more info on Linux MM, -see: http://www.linux-mm.org/ . -Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> diff --git a/a/content_digest b/N1/content_digest index ee7c584..28719d6 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -59,179 +59,6 @@ "Even then, the end result is more concise and busts the lock where\n" "it's actually taken, making the whole thing a bit more obvious:\n" "\n" - "---\n" - "From 367f3dcf25141ff6c30400c00ec09cc3c5486f94 Mon Sep 17 00:00:00 2001\n" - "From: Michal Hocko <mhocko@suse.cz>\n" - "Date: Fri, 5 Sep 2014 11:16:17 +0200\n" - "Subject: [patch] mm: memcontrol: do not kill uncharge batching in\n" - " free_pages_and_swap_cache\n" - "\n" - "free_pages_and_swap_cache limits release_pages to PAGEVEC_SIZE chunks.\n" - "This is not a big deal for the normal release path but it completely\n" - "kills memcg uncharge batching which reduces res_counter spin_lock\n" - "contention. Dave has noticed this with his page fault scalability test\n" - "case on a large machine when the lock was basically dominating on all\n" - "CPUs:\n" - " 80.18% 80.18% [kernel] [k] _raw_spin_lock\n" - " |\n" - " --- _raw_spin_lock\n" - " |\n" - " |--66.59%-- res_counter_uncharge_until\n" - " | res_counter_uncharge\n" - " | uncharge_batch\n" - " | uncharge_list\n" - " | mem_cgroup_uncharge_list\n" - " | release_pages\n" - " | free_pages_and_swap_cache\n" - " | tlb_flush_mmu_free\n" - " | |\n" - " | |--90.12%-- unmap_single_vma\n" - " | | unmap_vmas\n" - " | | unmap_region\n" - " | | do_munmap\n" - " | | vm_munmap\n" - " | | sys_munmap\n" - " | | system_call_fastpath\n" - " | | __GI___munmap\n" - " | |\n" - " | --9.88%-- tlb_flush_mmu\n" - " | tlb_finish_mmu\n" - " | unmap_region\n" - " | do_munmap\n" - " | vm_munmap\n" - " | sys_munmap\n" - " | system_call_fastpath\n" - " | __GI___munmap\n" - "\n" - "In his case the load was running in the root memcg and that part\n" - "has been handled by reverting 05b843012335 (\"mm: memcontrol: use\n" - "root_mem_cgroup res_counter\") because this is a clear regression,\n" - "but the problem remains inside dedicated memcgs.\n" - "\n" - "There is no reason to limit release_pages to PAGEVEC_SIZE batches other\n" - "than lru_lock held times. This logic, however, can be moved inside the\n" - "function. mem_cgroup_uncharge_list and free_hot_cold_page_list do not\n" - "hold any lock for the whole pages_to_free list so it is safe to call\n" - "them in a single run.\n" - "\n" - "In release_pages, break the lock at least every SWAP_CLUSTER_MAX (32)\n" - "pages, then remove the batching from free_pages_and_swap_cache.\n" - "\n" - "Also update the grossly out-of-date release_pages documentation.\n" - "\n" - "Reported-by: Dave Hansen <dave@sr71.net>\n" - "Signed-off-by: Michal Hocko <mhocko@suse.cz>\n" - "Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>\n" - "---\n" - " mm/swap.c | 31 ++++++++++++++++++++-----------\n" - " mm/swap_state.c | 14 ++++----------\n" - " 2 files changed, 24 insertions(+), 21 deletions(-)\n" - "\n" - "diff --git a/mm/swap.c b/mm/swap.c\n" - "index 6b2dc3897cd5..39affa1932ce 100644\n" - "--- a/mm/swap.c\n" - "+++ b/mm/swap.c\n" - "@@ -887,18 +887,14 @@ void lru_add_drain_all(void)\n" - " \tmutex_unlock(&lock);\n" - " }\n" - " \n" - "-/*\n" - "- * Batched page_cache_release(). Decrement the reference count on all the\n" - "- * passed pages. If it fell to zero then remove the page from the LRU and\n" - "- * free it.\n" - "- *\n" - "- * Avoid taking zone->lru_lock if possible, but if it is taken, retain it\n" - "- * for the remainder of the operation.\n" - "+/**\n" - "+ * release_pages - batched page_cache_release()\n" - "+ * @pages: array of pages to release\n" - "+ * @nr: number of pages\n" - "+ * @cold: whether the pages are cache cold\n" - " *\n" - "- * The locking in this function is against shrink_inactive_list(): we recheck\n" - "- * the page count inside the lock to see whether shrink_inactive_list()\n" - "- * grabbed the page via the LRU. If it did, give up: shrink_inactive_list()\n" - "- * will free it.\n" - "+ * Decrement the reference count on all the pages in @pages. If it\n" - "+ * fell to zero, remove the page from the LRU and free it.\n" - " */\n" - " void release_pages(struct page **pages, int nr, bool cold)\n" - " {\n" - "@@ -907,6 +903,7 @@ void release_pages(struct page **pages, int nr, bool cold)\n" - " \tstruct zone *zone = NULL;\n" - " \tstruct lruvec *lruvec;\n" - " \tunsigned long uninitialized_var(flags);\n" - "+\tunsigned int uninitialized_var(lock_batch);\n" - " \n" - " \tfor (i = 0; i < nr; i++) {\n" - " \t\tstruct page *page = pages[i];\n" - "@@ -914,6 +911,7 @@ void release_pages(struct page **pages, int nr, bool cold)\n" - " \t\tif (unlikely(PageCompound(page))) {\n" - " \t\t\tif (zone) {\n" - " \t\t\t\tspin_unlock_irqrestore(&zone->lru_lock, flags);\n" - "+\t\t\t\tlock_batch = 0;\n" - " \t\t\t\tzone = NULL;\n" - " \t\t\t}\n" - " \t\t\tput_compound_page(page);\n" - "@@ -930,6 +928,7 @@ void release_pages(struct page **pages, int nr, bool cold)\n" - " \t\t\t\tif (zone)\n" - " \t\t\t\t\tspin_unlock_irqrestore(&zone->lru_lock,\n" - " \t\t\t\t\t\t\t\t\tflags);\n" - "+\t\t\t\tlock_batch = 0;\n" - " \t\t\t\tzone = pagezone;\n" - " \t\t\t\tspin_lock_irqsave(&zone->lru_lock, flags);\n" - " \t\t\t}\n" - "@@ -938,6 +937,16 @@ void release_pages(struct page **pages, int nr, bool cold)\n" - " \t\t\tVM_BUG_ON_PAGE(!PageLRU(page), page);\n" - " \t\t\t__ClearPageLRU(page);\n" - " \t\t\tdel_page_from_lru_list(page, lruvec, page_off_lru(page));\n" - "+\n" - "+\t\t\t/*\n" - "+\t\t\t * Make sure the IRQ-safe lock-holding time\n" - "+\t\t\t * does not get excessive with a continuous\n" - "+\t\t\t * string of pages from the same zone.\n" - "+\t\t\t */\n" - "+\t\t\tif (++lock_batch == SWAP_CLUSTER_MAX) {\n" - "+\t\t\t\tspin_unlock_irqrestore(&zone->lru_lock, flags);\n" - "+\t\t\t\tzone = NULL;\n" - "+\t\t\t}\n" - " \t\t}\n" - " \n" - " \t\t/* Clear Active bit in case of parallel mark_page_accessed */\n" - "diff --git a/mm/swap_state.c b/mm/swap_state.c\n" - "index ef1f39139b71..154444918685 100644\n" - "--- a/mm/swap_state.c\n" - "+++ b/mm/swap_state.c\n" - "@@ -265,18 +265,12 @@ void free_page_and_swap_cache(struct page *page)\n" - " void free_pages_and_swap_cache(struct page **pages, int nr)\n" - " {\n" - " \tstruct page **pagep = pages;\n" - "+\tint i;\n" - " \n" - " \tlru_add_drain();\n" - "-\twhile (nr) {\n" - "-\t\tint todo = min(nr, PAGEVEC_SIZE);\n" - "-\t\tint i;\n" - "-\n" - "-\t\tfor (i = 0; i < todo; i++)\n" - "-\t\t\tfree_swap_cache(pagep[i]);\n" - "-\t\trelease_pages(pagep, todo, false);\n" - "-\t\tpagep += todo;\n" - "-\t\tnr -= todo;\n" - "-\t}\n" - "+\tfor (i = 0; i < nr; i++)\n" - "+\t\tfree_swap_cache(pagep[i]);\n" - "+\trelease_pages(pagep, nr, false);\n" - " }\n" - " \n" - " /*\n" - "-- \n" - "2.1.0\n" - "\n" - "--\n" - "To unsubscribe, send a message with 'unsubscribe linux-mm' in\n" - "the body to majordomo@kvack.org. For more info on Linux MM,\n" - "see: http://www.linux-mm.org/ .\n" - "Don't email: <a href=mailto:\"dont@kvack.org\"> email@kvack.org </a>" + --- -e52bffaef8463ee5770eb375b1ce637899bd0f5c2c25591ba917095270ea8d4e +dd9a59aeb132bafeeff6d71299efaa2d9720d866a76422ca816f0f0f93906691
diff --git a/a/1.txt b/N2/1.txt index 5523308..c63c1b8 100644 --- a/a/1.txt +++ b/N2/1.txt @@ -44,7 +44,7 @@ Even then, the end result is more concise and busts the lock where it's actually taken, making the whole thing a bit more obvious: --- -From 367f3dcf25141ff6c30400c00ec09cc3c5486f94 Mon Sep 17 00:00:00 2001 +>From 367f3dcf25141ff6c30400c00ec09cc3c5486f94 Mon Sep 17 00:00:00 2001 From: Michal Hocko <mhocko@suse.cz> Date: Fri, 5 Sep 2014 11:16:17 +0200 Subject: [patch] mm: memcontrol: do not kill uncharge batching in @@ -211,9 +211,3 @@ index ef1f39139b71..154444918685 100644 /* -- 2.1.0 - --- -To unsubscribe, send a message with 'unsubscribe linux-mm' in -the body to majordomo@kvack.org. For more info on Linux MM, -see: http://www.linux-mm.org/ . -Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> diff --git a/a/content_digest b/N2/content_digest index ee7c584..549140c 100644 --- a/a/content_digest +++ b/N2/content_digest @@ -60,7 +60,7 @@ "it's actually taken, making the whole thing a bit more obvious:\n" "\n" "---\n" - "From 367f3dcf25141ff6c30400c00ec09cc3c5486f94 Mon Sep 17 00:00:00 2001\n" + ">From 367f3dcf25141ff6c30400c00ec09cc3c5486f94 Mon Sep 17 00:00:00 2001\n" "From: Michal Hocko <mhocko@suse.cz>\n" "Date: Fri, 5 Sep 2014 11:16:17 +0200\n" "Subject: [patch] mm: memcontrol: do not kill uncharge batching in\n" @@ -226,12 +226,6 @@ " \n" " /*\n" "-- \n" - "2.1.0\n" - "\n" - "--\n" - "To unsubscribe, send a message with 'unsubscribe linux-mm' in\n" - "the body to majordomo@kvack.org. For more info on Linux MM,\n" - "see: http://www.linux-mm.org/ .\n" - "Don't email: <a href=mailto:\"dont@kvack.org\"> email@kvack.org </a>" + 2.1.0 -e52bffaef8463ee5770eb375b1ce637899bd0f5c2c25591ba917095270ea8d4e +946f848fa17e53b253cf683ff3e3a0a0dbe1987f8fe9fbcbf7f57cea78149fe2
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.