diff for duplicates of <20111021225008.GK608@redhat.com> diff --git a/a/1.txt b/N1/1.txt index db4bd42..0d6dba9 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -24,117 +24,3 @@ I've to pack luggage and prepare to fly to KS tomorrow so I may not be responsive in the next few days. === ->From f2898ff06b5a9a14b9d957c7696137f42a2438e9 Mon Sep 17 00:00:00 2001 -From: Andrea Arcangeli <aarcange@redhat.com> -Date: Sat, 22 Oct 2011 00:11:49 +0200 -Subject: [PATCH] mremap: enforce rmap src/dst vma ordering in case of - vma_merge succeeding in copy_vma - -migrate was doing a rmap_walk with speculative lock-less access on -pagetables. That could lead it to not serialize properly against -mremap PT locks. But a second problem remains in the order of vmas in -the same_anon_vma list used by the rmap_walk. - -If vma_merge would succeed in copy_vma, the src vma could be placed -after the dst vma in the same_anon_vma list. That could still lead -migrate to miss some pte. - -This patch adds a anon_vma_order_tail() function to force the dst vma -at the end of the list before mremap starts to solve the problem. - -If the mremap is very large and there are a lots of parents or childs -sharing the anon_vma root lock, this should still scale better than -taking the anon_vma root lock around every pte copy practically for -the whole duration of mremap. ---- - include/linux/rmap.h | 1 + - mm/mmap.c | 8 ++++++++ - mm/rmap.c | 43 +++++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 52 insertions(+), 0 deletions(-) - -diff --git a/include/linux/rmap.h b/include/linux/rmap.h -index 2148b12..45eb098 100644 ---- a/include/linux/rmap.h -+++ b/include/linux/rmap.h -@@ -120,6 +120,7 @@ void anon_vma_init(void); /* create anon_vma_cachep */ - int anon_vma_prepare(struct vm_area_struct *); - void unlink_anon_vmas(struct vm_area_struct *); - int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); -+void anon_vma_order_tail(struct vm_area_struct *); - int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); - void __anon_vma_link(struct vm_area_struct *); - -diff --git a/mm/mmap.c b/mm/mmap.c -index a65efd4..a5858dc 100644 ---- a/mm/mmap.c -+++ b/mm/mmap.c -@@ -2339,7 +2339,15 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, - */ - if (vma_start >= new_vma->vm_start && - vma_start < new_vma->vm_end) -+ /* -+ * No need to call anon_vma_order_tail() in -+ * this case because the same PT lock will -+ * serialize the rmap_walk against both src -+ * and dst vmas. -+ */ - *vmap = new_vma; -+ else -+ anon_vma_order_tail(new_vma); - } else { - new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (new_vma) { -diff --git a/mm/rmap.c b/mm/rmap.c -index 8005080..170cece 100644 ---- a/mm/rmap.c -+++ b/mm/rmap.c -@@ -272,6 +272,49 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) - } - - /* -+ * Some rmap walk that needs to find all ptes/hugepmds without false -+ * negatives (like migrate and split_huge_page) running concurrent -+ * with operations that copy or move pagetables (like mremap() and -+ * fork()) to be safe depends the anon_vma "same_anon_vma" list to be -+ * in a certain order: the dst_vma must be placed after the src_vma in -+ * the list. This is always guaranteed by fork() but mremap() needs to -+ * call this function to enforce it in case the dst_vma isn't newly -+ * allocated and chained with the anon_vma_clone() function but just -+ * an extension of a pre-existing vma through vma_merge. -+ * -+ * NOTE: the same_anon_vma list can still changed by other processes -+ * while mremap runs because mremap doesn't hold the anon_vma mutex to -+ * prevent modifications to the list while it runs. All we need to -+ * enforce is that the relative order of this process vmas isn't -+ * changing (we don't care about other vmas order). Each vma -+ * corresponds to an anon_vma_chain structure so there's no risk that -+ * other processes calling anon_vma_order_tail() and changing the -+ * same_anon_vma list under mremap() will screw with the relative -+ * order of this process vmas in the list, because we won't alter the -+ * order of any vma that isn't belonging to this process. And there -+ * can't be another anon_vma_order_tail running concurrently with -+ * mremap() coming from this process because we hold the mmap_sem for -+ * the whole mremap(). fork() ordering dependency also shouldn't be -+ * affected because we only care that the parent vmas are placed in -+ * the list before the child vmas and anon_vma_order_tail won't reorder -+ * vmas from either the fork parent or child. -+ */ -+void anon_vma_order_tail(struct vm_area_struct *dst) -+{ -+ struct anon_vma_chain *pavc; -+ struct anon_vma *root = NULL; -+ -+ list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) { -+ struct anon_vma *anon_vma = pavc->anon_vma; -+ VM_BUG_ON(pavc->vma != dst); -+ root = lock_anon_vma_root(root, anon_vma); -+ list_del(&pavc->same_anon_vma); -+ list_add_tail(&pavc->same_anon_vma, &anon_vma->head); -+ } -+ unlock_anon_vma_root(root); -+} -+ -+/* - * Attach vma to its own anon_vma, as well as to the anon_vmas that - * the corresponding VMA in the parent process is attached to. - * Returns 0 on success, non-zero on failure. diff --git a/a/content_digest b/N1/content_digest index 496f420..297e09b 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -44,120 +44,6 @@ "I've to pack luggage and prepare to fly to KS tomorrow so I may not be\n" "responsive in the next few days.\n" "\n" - "===\n" - ">From f2898ff06b5a9a14b9d957c7696137f42a2438e9 Mon Sep 17 00:00:00 2001\n" - "From: Andrea Arcangeli <aarcange@redhat.com>\n" - "Date: Sat, 22 Oct 2011 00:11:49 +0200\n" - "Subject: [PATCH] mremap: enforce rmap src/dst vma ordering in case of\n" - " vma_merge succeeding in copy_vma\n" - "\n" - "migrate was doing a rmap_walk with speculative lock-less access on\n" - "pagetables. That could lead it to not serialize properly against\n" - "mremap PT locks. But a second problem remains in the order of vmas in\n" - "the same_anon_vma list used by the rmap_walk.\n" - "\n" - "If vma_merge would succeed in copy_vma, the src vma could be placed\n" - "after the dst vma in the same_anon_vma list. That could still lead\n" - "migrate to miss some pte.\n" - "\n" - "This patch adds a anon_vma_order_tail() function to force the dst vma\n" - "at the end of the list before mremap starts to solve the problem.\n" - "\n" - "If the mremap is very large and there are a lots of parents or childs\n" - "sharing the anon_vma root lock, this should still scale better than\n" - "taking the anon_vma root lock around every pte copy practically for\n" - "the whole duration of mremap.\n" - "---\n" - " include/linux/rmap.h | 1 +\n" - " mm/mmap.c | 8 ++++++++\n" - " mm/rmap.c | 43 +++++++++++++++++++++++++++++++++++++++++++\n" - " 3 files changed, 52 insertions(+), 0 deletions(-)\n" - "\n" - "diff --git a/include/linux/rmap.h b/include/linux/rmap.h\n" - "index 2148b12..45eb098 100644\n" - "--- a/include/linux/rmap.h\n" - "+++ b/include/linux/rmap.h\n" - "@@ -120,6 +120,7 @@ void anon_vma_init(void);\t/* create anon_vma_cachep */\n" - " int anon_vma_prepare(struct vm_area_struct *);\n" - " void unlink_anon_vmas(struct vm_area_struct *);\n" - " int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);\n" - "+void anon_vma_order_tail(struct vm_area_struct *);\n" - " int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);\n" - " void __anon_vma_link(struct vm_area_struct *);\n" - " \n" - "diff --git a/mm/mmap.c b/mm/mmap.c\n" - "index a65efd4..a5858dc 100644\n" - "--- a/mm/mmap.c\n" - "+++ b/mm/mmap.c\n" - "@@ -2339,7 +2339,15 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,\n" - " \t\t */\n" - " \t\tif (vma_start >= new_vma->vm_start &&\n" - " \t\t vma_start < new_vma->vm_end)\n" - "+\t\t\t/*\n" - "+\t\t\t * No need to call anon_vma_order_tail() in\n" - "+\t\t\t * this case because the same PT lock will\n" - "+\t\t\t * serialize the rmap_walk against both src\n" - "+\t\t\t * and dst vmas.\n" - "+\t\t\t */\n" - " \t\t\t*vmap = new_vma;\n" - "+\t\telse\n" - "+\t\t\tanon_vma_order_tail(new_vma);\n" - " \t} else {\n" - " \t\tnew_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);\n" - " \t\tif (new_vma) {\n" - "diff --git a/mm/rmap.c b/mm/rmap.c\n" - "index 8005080..170cece 100644\n" - "--- a/mm/rmap.c\n" - "+++ b/mm/rmap.c\n" - "@@ -272,6 +272,49 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)\n" - " }\n" - " \n" - " /*\n" - "+ * Some rmap walk that needs to find all ptes/hugepmds without false\n" - "+ * negatives (like migrate and split_huge_page) running concurrent\n" - "+ * with operations that copy or move pagetables (like mremap() and\n" - "+ * fork()) to be safe depends the anon_vma \"same_anon_vma\" list to be\n" - "+ * in a certain order: the dst_vma must be placed after the src_vma in\n" - "+ * the list. This is always guaranteed by fork() but mremap() needs to\n" - "+ * call this function to enforce it in case the dst_vma isn't newly\n" - "+ * allocated and chained with the anon_vma_clone() function but just\n" - "+ * an extension of a pre-existing vma through vma_merge.\n" - "+ *\n" - "+ * NOTE: the same_anon_vma list can still changed by other processes\n" - "+ * while mremap runs because mremap doesn't hold the anon_vma mutex to\n" - "+ * prevent modifications to the list while it runs. All we need to\n" - "+ * enforce is that the relative order of this process vmas isn't\n" - "+ * changing (we don't care about other vmas order). Each vma\n" - "+ * corresponds to an anon_vma_chain structure so there's no risk that\n" - "+ * other processes calling anon_vma_order_tail() and changing the\n" - "+ * same_anon_vma list under mremap() will screw with the relative\n" - "+ * order of this process vmas in the list, because we won't alter the\n" - "+ * order of any vma that isn't belonging to this process. And there\n" - "+ * can't be another anon_vma_order_tail running concurrently with\n" - "+ * mremap() coming from this process because we hold the mmap_sem for\n" - "+ * the whole mremap(). fork() ordering dependency also shouldn't be\n" - "+ * affected because we only care that the parent vmas are placed in\n" - "+ * the list before the child vmas and anon_vma_order_tail won't reorder\n" - "+ * vmas from either the fork parent or child.\n" - "+ */\n" - "+void anon_vma_order_tail(struct vm_area_struct *dst)\n" - "+{\n" - "+\tstruct anon_vma_chain *pavc;\n" - "+\tstruct anon_vma *root = NULL;\n" - "+\n" - "+\tlist_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {\n" - "+\t\tstruct anon_vma *anon_vma = pavc->anon_vma;\n" - "+\t\tVM_BUG_ON(pavc->vma != dst);\n" - "+\t\troot = lock_anon_vma_root(root, anon_vma);\n" - "+\t\tlist_del(&pavc->same_anon_vma);\n" - "+\t\tlist_add_tail(&pavc->same_anon_vma, &anon_vma->head);\n" - "+\t}\n" - "+\tunlock_anon_vma_root(root);\n" - "+}\n" - "+\n" - "+/*\n" - " * Attach vma to its own anon_vma, as well as to the anon_vmas that\n" - " * the corresponding VMA in the parent process is attached to.\n" - * Returns 0 on success, non-zero on failure. + === -f39968bf379836c11cafc595045a3a6a1081cce54ff8f38196f0e752cd9c5bcf +5f411adb03a985ac26e9df1b97124ad11969374753f5f7aee19666bb60a20238
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.