diff for duplicates of <20121202151232.GB12911@gmail.com> diff --git a/a/1.txt b/N1/1.txt index a1b17ff..4e28120 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -20,3 +20,450 @@ Thanks, Ingo -----------------------> +>From 21469dcb225b9cf3160f839b7a823448f5ce5afa Mon Sep 17 00:00:00 2001 +From: Ingo Molnar <mingo@kernel.org> +Date: Sat, 1 Dec 2012 21:15:38 +0100 +Subject: [PATCH] mm/rmap, migration: Make rmap_walk_anon() and + try_to_unmap_anon() more scalable + +rmap_walk_anon() and try_to_unmap_anon() appears to be too +careful about locking the anon vma: while it needs protection +against anon vma list modifications, it does not need exclusive +access to the list itself. + +Transforming this exclusive lock to a read-locked rwsem removes +a global lock from the hot path of page-migration intense +threaded workloads which can cause pathological performance like +this: + + 96.43% process 0 [kernel.kallsyms] [k] perf_trace_sched_switch + | + --- perf_trace_sched_switch + __schedule + schedule + schedule_preempt_disabled + __mutex_lock_common.isra.6 + __mutex_lock_slowpath + mutex_lock + | + |--50.61%-- rmap_walk + | move_to_new_page + | migrate_pages + | migrate_misplaced_page + | __do_numa_page.isra.69 + | handle_pte_fault + | handle_mm_fault + | __do_page_fault + | do_page_fault + | page_fault + | __memset_sse2 + | | + | --100.00%-- worker_thread + | | + | --100.00%-- start_thread + | + --49.39%-- page_lock_anon_vma + try_to_unmap_anon + try_to_unmap + migrate_pages + migrate_misplaced_page + __do_numa_page.isra.69 + handle_pte_fault + handle_mm_fault + __do_page_fault + do_page_fault + page_fault + __memset_sse2 + | + --100.00%-- worker_thread + start_thread + +With this change applied the profile is now nicely flat +and there's no anon-vma related scheduling/blocking. + +Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(), +to make it clearer that it's an exclusive write-lock in +that case - suggested by Rik van Riel. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> +Cc: Paul Turner <pjt@google.com> +Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> +Cc: Christoph Lameter <cl@linux.com> +Cc: Rik van Riel <riel@redhat.com> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Andrea Arcangeli <aarcange@redhat.com> +Cc: Johannes Weiner <hannes@cmpxchg.org> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +--- + include/linux/huge_mm.h | 2 +- + include/linux/rmap.h | 17 ++++++++++++++--- + mm/huge_memory.c | 6 +++--- + mm/ksm.c | 6 +++--- + mm/memory-failure.c | 4 ++-- + mm/migrate.c | 2 +- + mm/mmap.c | 2 +- + mm/mremap.c | 2 +- + mm/rmap.c | 48 ++++++++++++++++++++++++------------------------ + 9 files changed, 50 insertions(+), 39 deletions(-) + +diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h +index 7f5a552..81a9dee 100644 +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -101,7 +101,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); + #define wait_split_huge_page(__anon_vma, __pmd) \ + do { \ + pmd_t *____pmd = (__pmd); \ +- anon_vma_lock(__anon_vma); \ ++ anon_vma_lock_write(__anon_vma); \ + anon_vma_unlock(__anon_vma); \ + BUG_ON(pmd_trans_splitting(*____pmd) || \ + pmd_trans_huge(*____pmd)); \ +diff --git a/include/linux/rmap.h b/include/linux/rmap.h +index f3f41d2..c20635c 100644 +--- a/include/linux/rmap.h ++++ b/include/linux/rmap.h +@@ -118,7 +118,7 @@ static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) + up_write(&anon_vma->root->rwsem); + } + +-static inline void anon_vma_lock(struct anon_vma *anon_vma) ++static inline void anon_vma_lock_write(struct anon_vma *anon_vma) + { + down_write(&anon_vma->root->rwsem); + } +@@ -128,6 +128,17 @@ static inline void anon_vma_unlock(struct anon_vma *anon_vma) + up_write(&anon_vma->root->rwsem); + } + ++static inline void anon_vma_lock_read(struct anon_vma *anon_vma) ++{ ++ down_read(&anon_vma->root->rwsem); ++} ++ ++static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) ++{ ++ up_read(&anon_vma->root->rwsem); ++} ++ ++ + /* + * anon_vma helper functions. + */ +@@ -220,8 +231,8 @@ int try_to_munlock(struct page *); + /* + * Called by memory-failure.c to kill processes. + */ +-struct anon_vma *page_lock_anon_vma(struct page *page); +-void page_unlock_anon_vma(struct anon_vma *anon_vma); ++struct anon_vma *page_lock_anon_vma_read(struct page *page); ++void page_unlock_anon_vma_read(struct anon_vma *anon_vma); + int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); + + /* +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 25929c1..265667e 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1644,7 +1644,7 @@ int split_huge_page(struct page *page) + int ret = 1; + + BUG_ON(!PageAnon(page)); +- anon_vma = page_lock_anon_vma(page); ++ anon_vma = page_lock_anon_vma_read(page); + if (!anon_vma) + goto out; + ret = 0; +@@ -1657,7 +1657,7 @@ int split_huge_page(struct page *page) + + BUG_ON(PageCompound(page)); + out_unlock: +- page_unlock_anon_vma(anon_vma); ++ page_unlock_anon_vma_read(anon_vma); + out: + return ret; + } +@@ -2169,7 +2169,7 @@ static void collapse_huge_page(struct mm_struct *mm, + if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) + goto out; + +- anon_vma_lock(vma->anon_vma); ++ anon_vma_lock_write(vma->anon_vma); + + pte = pte_offset_map(pmd, address); + ptl = pte_lockptr(mm, pmd); +diff --git a/mm/ksm.c b/mm/ksm.c +index ae539f0..7fa37de 100644 +--- a/mm/ksm.c ++++ b/mm/ksm.c +@@ -1634,7 +1634,7 @@ again: + struct anon_vma_chain *vmac; + struct vm_area_struct *vma; + +- anon_vma_lock(anon_vma); ++ anon_vma_lock_write(anon_vma); + anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, + 0, ULONG_MAX) { + vma = vmac->vma; +@@ -1688,7 +1688,7 @@ again: + struct anon_vma_chain *vmac; + struct vm_area_struct *vma; + +- anon_vma_lock(anon_vma); ++ anon_vma_lock_write(anon_vma); + anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, + 0, ULONG_MAX) { + vma = vmac->vma; +@@ -1741,7 +1741,7 @@ again: + struct anon_vma_chain *vmac; + struct vm_area_struct *vma; + +- anon_vma_lock(anon_vma); ++ anon_vma_lock_write(anon_vma); + anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, + 0, ULONG_MAX) { + vma = vmac->vma; +diff --git a/mm/memory-failure.c b/mm/memory-failure.c +index 6c5899b..6b4460c 100644 +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -402,7 +402,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, + struct anon_vma *av; + pgoff_t pgoff; + +- av = page_lock_anon_vma(page); ++ av = page_lock_anon_vma_read(page); + if (av == NULL) /* Not actually mapped anymore */ + return; + +@@ -423,7 +423,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, + } + } + read_unlock(&tasklist_lock); +- page_unlock_anon_vma(av); ++ page_unlock_anon_vma_read(av); + } + + /* +diff --git a/mm/migrate.c b/mm/migrate.c +index 3db0543..138cb34 100644 +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -751,7 +751,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, + */ + if (PageAnon(page)) { + /* +- * Only page_lock_anon_vma() understands the subtleties of ++ * Only page_lock_anon_vma_read() understands the subtleties of + * getting a hold on an anon_vma from outside one of its mms. + */ + anon_vma = page_get_anon_vma(page); +diff --git a/mm/mmap.c b/mm/mmap.c +index 27951e4..964a85c 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -600,7 +600,7 @@ again: remove_next = 1 + (end > next->vm_end); + if (anon_vma) { + VM_BUG_ON(adjust_next && next->anon_vma && + anon_vma != next->anon_vma); +- anon_vma_lock(anon_vma); ++ anon_vma_lock_write(anon_vma); + anon_vma_interval_tree_pre_update_vma(vma); + if (adjust_next) + anon_vma_interval_tree_pre_update_vma(next); +diff --git a/mm/mremap.c b/mm/mremap.c +index 1b61c2d..3dabd17 100644 +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -104,7 +104,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, + } + if (vma->anon_vma) { + anon_vma = vma->anon_vma; +- anon_vma_lock(anon_vma); ++ anon_vma_lock_write(anon_vma); + } + } + +diff --git a/mm/rmap.c b/mm/rmap.c +index 6e3ee3b..b0f612d 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -87,24 +87,24 @@ static inline void anon_vma_free(struct anon_vma *anon_vma) + VM_BUG_ON(atomic_read(&anon_vma->refcount)); + + /* +- * Synchronize against page_lock_anon_vma() such that ++ * Synchronize against page_lock_anon_vma_read() such that + * we can safely hold the lock without the anon_vma getting + * freed. + * + * Relies on the full mb implied by the atomic_dec_and_test() from + * put_anon_vma() against the acquire barrier implied by +- * mutex_trylock() from page_lock_anon_vma(). This orders: ++ * down_read_trylock() from page_lock_anon_vma_read(). This orders: + * +- * page_lock_anon_vma() VS put_anon_vma() +- * mutex_trylock() atomic_dec_and_test() ++ * page_lock_anon_vma_read() VS put_anon_vma() ++ * down_read_trylock() atomic_dec_and_test() + * LOCK MB +- * atomic_read() mutex_is_locked() ++ * atomic_read() rwsem_is_locked() + * + * LOCK should suffice since the actual taking of the lock must + * happen _before_ what follows. + */ + if (rwsem_is_locked(&anon_vma->root->rwsem)) { +- anon_vma_lock(anon_vma); ++ anon_vma_lock_write(anon_vma); + anon_vma_unlock(anon_vma); + } + +@@ -146,7 +146,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, + * allocate a new one. + * + * Anon-vma allocations are very subtle, because we may have +- * optimistically looked up an anon_vma in page_lock_anon_vma() ++ * optimistically looked up an anon_vma in page_lock_anon_vma_read() + * and that may actually touch the spinlock even in the newly + * allocated vma (it depends on RCU to make sure that the + * anon_vma isn't actually destroyed). +@@ -181,7 +181,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) + allocated = anon_vma; + } + +- anon_vma_lock(anon_vma); ++ anon_vma_lock_write(anon_vma); + /* page_table_lock to protect against threads */ + spin_lock(&mm->page_table_lock); + if (likely(!vma->anon_vma)) { +@@ -306,7 +306,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) + get_anon_vma(anon_vma->root); + /* Mark this anon_vma as the one where our new (COWed) pages go. */ + vma->anon_vma = anon_vma; +- anon_vma_lock(anon_vma); ++ anon_vma_lock_write(anon_vma); + anon_vma_chain_link(vma, avc, anon_vma); + anon_vma_unlock(anon_vma); + +@@ -442,7 +442,7 @@ out: + * atomic op -- the trylock. If we fail the trylock, we fall back to getting a + * reference like with page_get_anon_vma() and then block on the mutex. + */ +-struct anon_vma *page_lock_anon_vma(struct page *page) ++struct anon_vma *page_lock_anon_vma_read(struct page *page) + { + struct anon_vma *anon_vma = NULL; + struct anon_vma *root_anon_vma; +@@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page) + + anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); + root_anon_vma = ACCESS_ONCE(anon_vma->root); +- if (down_write_trylock(&root_anon_vma->rwsem)) { ++ if (down_read_trylock(&root_anon_vma->rwsem)) { + /* + * If the page is still mapped, then this anon_vma is still + * its anon_vma, and holding the mutex ensures that it will + * not go away, see anon_vma_free(). + */ + if (!page_mapped(page)) { +- up_write(&root_anon_vma->rwsem); ++ up_read(&root_anon_vma->rwsem); + anon_vma = NULL; + } + goto out; +@@ -484,15 +484,15 @@ struct anon_vma *page_lock_anon_vma(struct page *page) + + /* we pinned the anon_vma, its safe to sleep */ + rcu_read_unlock(); +- anon_vma_lock(anon_vma); ++ anon_vma_lock_read(anon_vma); + + if (atomic_dec_and_test(&anon_vma->refcount)) { + /* + * Oops, we held the last refcount, release the lock + * and bail -- can't simply use put_anon_vma() because +- * we'll deadlock on the anon_vma_lock() recursion. ++ * we'll deadlock on the anon_vma_lock_write() recursion. + */ +- anon_vma_unlock(anon_vma); ++ anon_vma_unlock_read(anon_vma); + __put_anon_vma(anon_vma); + anon_vma = NULL; + } +@@ -504,9 +504,9 @@ out: + return anon_vma; + } + +-void page_unlock_anon_vma(struct anon_vma *anon_vma) ++void page_unlock_anon_vma_read(struct anon_vma *anon_vma) + { +- anon_vma_unlock(anon_vma); ++ anon_vma_unlock_read(anon_vma); + } + + /* +@@ -732,7 +732,7 @@ static int page_referenced_anon(struct page *page, + struct anon_vma_chain *avc; + int referenced = 0; + +- anon_vma = page_lock_anon_vma(page); ++ anon_vma = page_lock_anon_vma_read(page); + if (!anon_vma) + return referenced; + +@@ -754,7 +754,7 @@ static int page_referenced_anon(struct page *page, + break; + } + +- page_unlock_anon_vma(anon_vma); ++ page_unlock_anon_vma_read(anon_vma); + return referenced; + } + +@@ -1474,7 +1474,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) + struct anon_vma_chain *avc; + int ret = SWAP_AGAIN; + +- anon_vma = page_lock_anon_vma(page); ++ anon_vma = page_lock_anon_vma_read(page); + if (!anon_vma) + return ret; + +@@ -1501,7 +1501,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) + break; + } + +- page_unlock_anon_vma(anon_vma); ++ page_unlock_anon_vma_read(anon_vma); + return ret; + } + +@@ -1696,7 +1696,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, + int ret = SWAP_AGAIN; + + /* +- * Note: remove_migration_ptes() cannot use page_lock_anon_vma() ++ * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() + * because that depends on page_mapped(); but not all its usages + * are holding mmap_sem. Users without mmap_sem are required to + * take a reference count to prevent the anon_vma disappearing +@@ -1704,7 +1704,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, + anon_vma = page_anon_vma(page); + if (!anon_vma) + return ret; +- anon_vma_lock(anon_vma); ++ anon_vma_lock_read(anon_vma); + anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { + struct vm_area_struct *vma = avc->vma; + unsigned long address = vma_address(page, vma); +@@ -1712,7 +1712,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, + if (ret != SWAP_AGAIN) + break; + } +- anon_vma_unlock(anon_vma); ++ anon_vma_unlock_read(anon_vma); + return ret; + } diff --git a/a/content_digest b/N1/content_digest index 4954232..353d98f 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -47,6 +47,453 @@ "\n" "\tIngo\n" "\n" - -----------------------> + "----------------------->\n" + ">From 21469dcb225b9cf3160f839b7a823448f5ce5afa Mon Sep 17 00:00:00 2001\n" + "From: Ingo Molnar <mingo@kernel.org>\n" + "Date: Sat, 1 Dec 2012 21:15:38 +0100\n" + "Subject: [PATCH] mm/rmap, migration: Make rmap_walk_anon() and\n" + " try_to_unmap_anon() more scalable\n" + "\n" + "rmap_walk_anon() and try_to_unmap_anon() appears to be too\n" + "careful about locking the anon vma: while it needs protection\n" + "against anon vma list modifications, it does not need exclusive\n" + "access to the list itself.\n" + "\n" + "Transforming this exclusive lock to a read-locked rwsem removes\n" + "a global lock from the hot path of page-migration intense\n" + "threaded workloads which can cause pathological performance like\n" + "this:\n" + "\n" + " 96.43% process 0 [kernel.kallsyms] [k] perf_trace_sched_switch\n" + " |\n" + " --- perf_trace_sched_switch\n" + " __schedule\n" + " schedule\n" + " schedule_preempt_disabled\n" + " __mutex_lock_common.isra.6\n" + " __mutex_lock_slowpath\n" + " mutex_lock\n" + " |\n" + " |--50.61%-- rmap_walk\n" + " | move_to_new_page\n" + " | migrate_pages\n" + " | migrate_misplaced_page\n" + " | __do_numa_page.isra.69\n" + " | handle_pte_fault\n" + " | handle_mm_fault\n" + " | __do_page_fault\n" + " | do_page_fault\n" + " | page_fault\n" + " | __memset_sse2\n" + " | |\n" + " | --100.00%-- worker_thread\n" + " | |\n" + " | --100.00%-- start_thread\n" + " |\n" + " --49.39%-- page_lock_anon_vma\n" + " try_to_unmap_anon\n" + " try_to_unmap\n" + " migrate_pages\n" + " migrate_misplaced_page\n" + " __do_numa_page.isra.69\n" + " handle_pte_fault\n" + " handle_mm_fault\n" + " __do_page_fault\n" + " do_page_fault\n" + " page_fault\n" + " __memset_sse2\n" + " |\n" + " --100.00%-- worker_thread\n" + " start_thread\n" + "\n" + "With this change applied the profile is now nicely flat\n" + "and there's no anon-vma related scheduling/blocking.\n" + "\n" + "Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(),\n" + "to make it clearer that it's an exclusive write-lock in\n" + "that case - suggested by Rik van Riel.\n" + "\n" + "Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>\n" + "Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>\n" + "Cc: Paul Turner <pjt@google.com>\n" + "Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>\n" + "Cc: Christoph Lameter <cl@linux.com>\n" + "Cc: Rik van Riel <riel@redhat.com>\n" + "Cc: Mel Gorman <mgorman@suse.de>\n" + "Cc: Andrea Arcangeli <aarcange@redhat.com>\n" + "Cc: Johannes Weiner <hannes@cmpxchg.org>\n" + "Cc: Hugh Dickins <hughd@google.com>\n" + "Signed-off-by: Ingo Molnar <mingo@kernel.org>\n" + "---\n" + " include/linux/huge_mm.h | 2 +-\n" + " include/linux/rmap.h | 17 ++++++++++++++---\n" + " mm/huge_memory.c | 6 +++---\n" + " mm/ksm.c | 6 +++---\n" + " mm/memory-failure.c | 4 ++--\n" + " mm/migrate.c | 2 +-\n" + " mm/mmap.c | 2 +-\n" + " mm/mremap.c | 2 +-\n" + " mm/rmap.c | 48 ++++++++++++++++++++++++------------------------\n" + " 9 files changed, 50 insertions(+), 39 deletions(-)\n" + "\n" + "diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h\n" + "index 7f5a552..81a9dee 100644\n" + "--- a/include/linux/huge_mm.h\n" + "+++ b/include/linux/huge_mm.h\n" + "@@ -101,7 +101,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);\n" + " #define wait_split_huge_page(__anon_vma, __pmd)\t\t\t\t\\\n" + " \tdo {\t\t\t\t\t\t\t\t\\\n" + " \t\tpmd_t *____pmd = (__pmd);\t\t\t\t\\\n" + "-\t\tanon_vma_lock(__anon_vma);\t\t\t\t\\\n" + "+\t\tanon_vma_lock_write(__anon_vma);\t\t\t\\\n" + " \t\tanon_vma_unlock(__anon_vma);\t\t\t\t\\\n" + " \t\tBUG_ON(pmd_trans_splitting(*____pmd) ||\t\t\t\\\n" + " \t\t pmd_trans_huge(*____pmd));\t\t\t\\\n" + "diff --git a/include/linux/rmap.h b/include/linux/rmap.h\n" + "index f3f41d2..c20635c 100644\n" + "--- a/include/linux/rmap.h\n" + "+++ b/include/linux/rmap.h\n" + "@@ -118,7 +118,7 @@ static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)\n" + " \t\tup_write(&anon_vma->root->rwsem);\n" + " }\n" + " \n" + "-static inline void anon_vma_lock(struct anon_vma *anon_vma)\n" + "+static inline void anon_vma_lock_write(struct anon_vma *anon_vma)\n" + " {\n" + " \tdown_write(&anon_vma->root->rwsem);\n" + " }\n" + "@@ -128,6 +128,17 @@ static inline void anon_vma_unlock(struct anon_vma *anon_vma)\n" + " \tup_write(&anon_vma->root->rwsem);\n" + " }\n" + " \n" + "+static inline void anon_vma_lock_read(struct anon_vma *anon_vma)\n" + "+{\n" + "+\tdown_read(&anon_vma->root->rwsem);\n" + "+}\n" + "+\n" + "+static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)\n" + "+{\n" + "+\tup_read(&anon_vma->root->rwsem);\n" + "+}\n" + "+\n" + "+\n" + " /*\n" + " * anon_vma helper functions.\n" + " */\n" + "@@ -220,8 +231,8 @@ int try_to_munlock(struct page *);\n" + " /*\n" + " * Called by memory-failure.c to kill processes.\n" + " */\n" + "-struct anon_vma *page_lock_anon_vma(struct page *page);\n" + "-void page_unlock_anon_vma(struct anon_vma *anon_vma);\n" + "+struct anon_vma *page_lock_anon_vma_read(struct page *page);\n" + "+void page_unlock_anon_vma_read(struct anon_vma *anon_vma);\n" + " int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);\n" + " \n" + " /*\n" + "diff --git a/mm/huge_memory.c b/mm/huge_memory.c\n" + "index 25929c1..265667e 100644\n" + "--- a/mm/huge_memory.c\n" + "+++ b/mm/huge_memory.c\n" + "@@ -1644,7 +1644,7 @@ int split_huge_page(struct page *page)\n" + " \tint ret = 1;\n" + " \n" + " \tBUG_ON(!PageAnon(page));\n" + "-\tanon_vma = page_lock_anon_vma(page);\n" + "+\tanon_vma = page_lock_anon_vma_read(page);\n" + " \tif (!anon_vma)\n" + " \t\tgoto out;\n" + " \tret = 0;\n" + "@@ -1657,7 +1657,7 @@ int split_huge_page(struct page *page)\n" + " \n" + " \tBUG_ON(PageCompound(page));\n" + " out_unlock:\n" + "-\tpage_unlock_anon_vma(anon_vma);\n" + "+\tpage_unlock_anon_vma_read(anon_vma);\n" + " out:\n" + " \treturn ret;\n" + " }\n" + "@@ -2169,7 +2169,7 @@ static void collapse_huge_page(struct mm_struct *mm,\n" + " \tif (!pmd_present(*pmd) || pmd_trans_huge(*pmd))\n" + " \t\tgoto out;\n" + " \n" + "-\tanon_vma_lock(vma->anon_vma);\n" + "+\tanon_vma_lock_write(vma->anon_vma);\n" + " \n" + " \tpte = pte_offset_map(pmd, address);\n" + " \tptl = pte_lockptr(mm, pmd);\n" + "diff --git a/mm/ksm.c b/mm/ksm.c\n" + "index ae539f0..7fa37de 100644\n" + "--- a/mm/ksm.c\n" + "+++ b/mm/ksm.c\n" + "@@ -1634,7 +1634,7 @@ again:\n" + " \t\tstruct anon_vma_chain *vmac;\n" + " \t\tstruct vm_area_struct *vma;\n" + " \n" + "-\t\tanon_vma_lock(anon_vma);\n" + "+\t\tanon_vma_lock_write(anon_vma);\n" + " \t\tanon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,\n" + " \t\t\t\t\t 0, ULONG_MAX) {\n" + " \t\t\tvma = vmac->vma;\n" + "@@ -1688,7 +1688,7 @@ again:\n" + " \t\tstruct anon_vma_chain *vmac;\n" + " \t\tstruct vm_area_struct *vma;\n" + " \n" + "-\t\tanon_vma_lock(anon_vma);\n" + "+\t\tanon_vma_lock_write(anon_vma);\n" + " \t\tanon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,\n" + " \t\t\t\t\t 0, ULONG_MAX) {\n" + " \t\t\tvma = vmac->vma;\n" + "@@ -1741,7 +1741,7 @@ again:\n" + " \t\tstruct anon_vma_chain *vmac;\n" + " \t\tstruct vm_area_struct *vma;\n" + " \n" + "-\t\tanon_vma_lock(anon_vma);\n" + "+\t\tanon_vma_lock_write(anon_vma);\n" + " \t\tanon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,\n" + " \t\t\t\t\t 0, ULONG_MAX) {\n" + " \t\t\tvma = vmac->vma;\n" + "diff --git a/mm/memory-failure.c b/mm/memory-failure.c\n" + "index 6c5899b..6b4460c 100644\n" + "--- a/mm/memory-failure.c\n" + "+++ b/mm/memory-failure.c\n" + "@@ -402,7 +402,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,\n" + " \tstruct anon_vma *av;\n" + " \tpgoff_t pgoff;\n" + " \n" + "-\tav = page_lock_anon_vma(page);\n" + "+\tav = page_lock_anon_vma_read(page);\n" + " \tif (av == NULL)\t/* Not actually mapped anymore */\n" + " \t\treturn;\n" + " \n" + "@@ -423,7 +423,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,\n" + " \t\t}\n" + " \t}\n" + " \tread_unlock(&tasklist_lock);\n" + "-\tpage_unlock_anon_vma(av);\n" + "+\tpage_unlock_anon_vma_read(av);\n" + " }\n" + " \n" + " /*\n" + "diff --git a/mm/migrate.c b/mm/migrate.c\n" + "index 3db0543..138cb34 100644\n" + "--- a/mm/migrate.c\n" + "+++ b/mm/migrate.c\n" + "@@ -751,7 +751,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,\n" + " \t */\n" + " \tif (PageAnon(page)) {\n" + " \t\t/*\n" + "-\t\t * Only page_lock_anon_vma() understands the subtleties of\n" + "+\t\t * Only page_lock_anon_vma_read() understands the subtleties of\n" + " \t\t * getting a hold on an anon_vma from outside one of its mms.\n" + " \t\t */\n" + " \t\tanon_vma = page_get_anon_vma(page);\n" + "diff --git a/mm/mmap.c b/mm/mmap.c\n" + "index 27951e4..964a85c 100644\n" + "--- a/mm/mmap.c\n" + "+++ b/mm/mmap.c\n" + "@@ -600,7 +600,7 @@ again:\t\t\tremove_next = 1 + (end > next->vm_end);\n" + " \tif (anon_vma) {\n" + " \t\tVM_BUG_ON(adjust_next && next->anon_vma &&\n" + " \t\t\t anon_vma != next->anon_vma);\n" + "-\t\tanon_vma_lock(anon_vma);\n" + "+\t\tanon_vma_lock_write(anon_vma);\n" + " \t\tanon_vma_interval_tree_pre_update_vma(vma);\n" + " \t\tif (adjust_next)\n" + " \t\t\tanon_vma_interval_tree_pre_update_vma(next);\n" + "diff --git a/mm/mremap.c b/mm/mremap.c\n" + "index 1b61c2d..3dabd17 100644\n" + "--- a/mm/mremap.c\n" + "+++ b/mm/mremap.c\n" + "@@ -104,7 +104,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,\n" + " \t\t}\n" + " \t\tif (vma->anon_vma) {\n" + " \t\t\tanon_vma = vma->anon_vma;\n" + "-\t\t\tanon_vma_lock(anon_vma);\n" + "+\t\t\tanon_vma_lock_write(anon_vma);\n" + " \t\t}\n" + " \t}\n" + " \n" + "diff --git a/mm/rmap.c b/mm/rmap.c\n" + "index 6e3ee3b..b0f612d 100644\n" + "--- a/mm/rmap.c\n" + "+++ b/mm/rmap.c\n" + "@@ -87,24 +87,24 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)\n" + " \tVM_BUG_ON(atomic_read(&anon_vma->refcount));\n" + " \n" + " \t/*\n" + "-\t * Synchronize against page_lock_anon_vma() such that\n" + "+\t * Synchronize against page_lock_anon_vma_read() such that\n" + " \t * we can safely hold the lock without the anon_vma getting\n" + " \t * freed.\n" + " \t *\n" + " \t * Relies on the full mb implied by the atomic_dec_and_test() from\n" + " \t * put_anon_vma() against the acquire barrier implied by\n" + "-\t * mutex_trylock() from page_lock_anon_vma(). This orders:\n" + "+\t * down_read_trylock() from page_lock_anon_vma_read(). This orders:\n" + " \t *\n" + "-\t * page_lock_anon_vma()\t\tVS\tput_anon_vma()\n" + "-\t * mutex_trylock()\t\t\t atomic_dec_and_test()\n" + "+\t * page_lock_anon_vma_read()\tVS\tput_anon_vma()\n" + "+\t * down_read_trylock()\t\t atomic_dec_and_test()\n" + " \t * LOCK\t\t\t\t MB\n" + "-\t * atomic_read()\t\t\t mutex_is_locked()\n" + "+\t * atomic_read()\t\t\t rwsem_is_locked()\n" + " \t *\n" + " \t * LOCK should suffice since the actual taking of the lock must\n" + " \t * happen _before_ what follows.\n" + " \t */\n" + " \tif (rwsem_is_locked(&anon_vma->root->rwsem)) {\n" + "-\t\tanon_vma_lock(anon_vma);\n" + "+\t\tanon_vma_lock_write(anon_vma);\n" + " \t\tanon_vma_unlock(anon_vma);\n" + " \t}\n" + " \n" + "@@ -146,7 +146,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,\n" + " * allocate a new one.\n" + " *\n" + " * Anon-vma allocations are very subtle, because we may have\n" + "- * optimistically looked up an anon_vma in page_lock_anon_vma()\n" + "+ * optimistically looked up an anon_vma in page_lock_anon_vma_read()\n" + " * and that may actually touch the spinlock even in the newly\n" + " * allocated vma (it depends on RCU to make sure that the\n" + " * anon_vma isn't actually destroyed).\n" + "@@ -181,7 +181,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)\n" + " \t\t\tallocated = anon_vma;\n" + " \t\t}\n" + " \n" + "-\t\tanon_vma_lock(anon_vma);\n" + "+\t\tanon_vma_lock_write(anon_vma);\n" + " \t\t/* page_table_lock to protect against threads */\n" + " \t\tspin_lock(&mm->page_table_lock);\n" + " \t\tif (likely(!vma->anon_vma)) {\n" + "@@ -306,7 +306,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)\n" + " \tget_anon_vma(anon_vma->root);\n" + " \t/* Mark this anon_vma as the one where our new (COWed) pages go. */\n" + " \tvma->anon_vma = anon_vma;\n" + "-\tanon_vma_lock(anon_vma);\n" + "+\tanon_vma_lock_write(anon_vma);\n" + " \tanon_vma_chain_link(vma, avc, anon_vma);\n" + " \tanon_vma_unlock(anon_vma);\n" + " \n" + "@@ -442,7 +442,7 @@ out:\n" + " * atomic op -- the trylock. If we fail the trylock, we fall back to getting a\n" + " * reference like with page_get_anon_vma() and then block on the mutex.\n" + " */\n" + "-struct anon_vma *page_lock_anon_vma(struct page *page)\n" + "+struct anon_vma *page_lock_anon_vma_read(struct page *page)\n" + " {\n" + " \tstruct anon_vma *anon_vma = NULL;\n" + " \tstruct anon_vma *root_anon_vma;\n" + "@@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page)\n" + " \n" + " \tanon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);\n" + " \troot_anon_vma = ACCESS_ONCE(anon_vma->root);\n" + "-\tif (down_write_trylock(&root_anon_vma->rwsem)) {\n" + "+\tif (down_read_trylock(&root_anon_vma->rwsem)) {\n" + " \t\t/*\n" + " \t\t * If the page is still mapped, then this anon_vma is still\n" + " \t\t * its anon_vma, and holding the mutex ensures that it will\n" + " \t\t * not go away, see anon_vma_free().\n" + " \t\t */\n" + " \t\tif (!page_mapped(page)) {\n" + "-\t\t\tup_write(&root_anon_vma->rwsem);\n" + "+\t\t\tup_read(&root_anon_vma->rwsem);\n" + " \t\t\tanon_vma = NULL;\n" + " \t\t}\n" + " \t\tgoto out;\n" + "@@ -484,15 +484,15 @@ struct anon_vma *page_lock_anon_vma(struct page *page)\n" + " \n" + " \t/* we pinned the anon_vma, its safe to sleep */\n" + " \trcu_read_unlock();\n" + "-\tanon_vma_lock(anon_vma);\n" + "+\tanon_vma_lock_read(anon_vma);\n" + " \n" + " \tif (atomic_dec_and_test(&anon_vma->refcount)) {\n" + " \t\t/*\n" + " \t\t * Oops, we held the last refcount, release the lock\n" + " \t\t * and bail -- can't simply use put_anon_vma() because\n" + "-\t\t * we'll deadlock on the anon_vma_lock() recursion.\n" + "+\t\t * we'll deadlock on the anon_vma_lock_write() recursion.\n" + " \t\t */\n" + "-\t\tanon_vma_unlock(anon_vma);\n" + "+\t\tanon_vma_unlock_read(anon_vma);\n" + " \t\t__put_anon_vma(anon_vma);\n" + " \t\tanon_vma = NULL;\n" + " \t}\n" + "@@ -504,9 +504,9 @@ out:\n" + " \treturn anon_vma;\n" + " }\n" + " \n" + "-void page_unlock_anon_vma(struct anon_vma *anon_vma)\n" + "+void page_unlock_anon_vma_read(struct anon_vma *anon_vma)\n" + " {\n" + "-\tanon_vma_unlock(anon_vma);\n" + "+\tanon_vma_unlock_read(anon_vma);\n" + " }\n" + " \n" + " /*\n" + "@@ -732,7 +732,7 @@ static int page_referenced_anon(struct page *page,\n" + " \tstruct anon_vma_chain *avc;\n" + " \tint referenced = 0;\n" + " \n" + "-\tanon_vma = page_lock_anon_vma(page);\n" + "+\tanon_vma = page_lock_anon_vma_read(page);\n" + " \tif (!anon_vma)\n" + " \t\treturn referenced;\n" + " \n" + "@@ -754,7 +754,7 @@ static int page_referenced_anon(struct page *page,\n" + " \t\t\tbreak;\n" + " \t}\n" + " \n" + "-\tpage_unlock_anon_vma(anon_vma);\n" + "+\tpage_unlock_anon_vma_read(anon_vma);\n" + " \treturn referenced;\n" + " }\n" + " \n" + "@@ -1474,7 +1474,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)\n" + " \tstruct anon_vma_chain *avc;\n" + " \tint ret = SWAP_AGAIN;\n" + " \n" + "-\tanon_vma = page_lock_anon_vma(page);\n" + "+\tanon_vma = page_lock_anon_vma_read(page);\n" + " \tif (!anon_vma)\n" + " \t\treturn ret;\n" + " \n" + "@@ -1501,7 +1501,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)\n" + " \t\t\tbreak;\n" + " \t}\n" + " \n" + "-\tpage_unlock_anon_vma(anon_vma);\n" + "+\tpage_unlock_anon_vma_read(anon_vma);\n" + " \treturn ret;\n" + " }\n" + " \n" + "@@ -1696,7 +1696,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,\n" + " \tint ret = SWAP_AGAIN;\n" + " \n" + " \t/*\n" + "-\t * Note: remove_migration_ptes() cannot use page_lock_anon_vma()\n" + "+\t * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()\n" + " \t * because that depends on page_mapped(); but not all its usages\n" + " \t * are holding mmap_sem. Users without mmap_sem are required to\n" + " \t * take a reference count to prevent the anon_vma disappearing\n" + "@@ -1704,7 +1704,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,\n" + " \tanon_vma = page_anon_vma(page);\n" + " \tif (!anon_vma)\n" + " \t\treturn ret;\n" + "-\tanon_vma_lock(anon_vma);\n" + "+\tanon_vma_lock_read(anon_vma);\n" + " \tanon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {\n" + " \t\tstruct vm_area_struct *vma = avc->vma;\n" + " \t\tunsigned long address = vma_address(page, vma);\n" + "@@ -1712,7 +1712,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,\n" + " \t\tif (ret != SWAP_AGAIN)\n" + " \t\t\tbreak;\n" + " \t}\n" + "-\tanon_vma_unlock(anon_vma);\n" + "+\tanon_vma_unlock_read(anon_vma);\n" + " \treturn ret;\n" + } -c7ed1a1cdd00257b812f3d152e702d2e7e744c9e7c769eafa226922380684808 +6b49c223540e939da9560284d74dafa1467cc1672de923f01952be5ec9ffdd78
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.