diff for duplicates of <52372349.6030308@suse.cz> diff --git a/a/1.txt b/N1/1.txt index 7646063..3acc19e 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -38,3 +38,204 @@ Vlastimil -----8<----- +>From 979cbdeaaed76e25a9e08c7ccadba5baf5e7c619 Mon Sep 17 00:00:00 2001 +From: Vlastimil Babka <vbabka@suse.cz> +Date: Mon, 16 Sep 2013 17:06:12 +0200 +Subject: [PATCH] Revert "mm: munlock: manual pte walk in fast path instead of + follow_page_mask()" + +This reverts commit 7a8010cd36273ff5f6fea5201ef9232f30cebbd9 for testing. +--- + include/linux/mm.h | 12 +++--- + mm/mlock.c | 110 +++++++++++++++-------------------------------------- + 2 files changed, 37 insertions(+), 85 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 8b6e55e..e9bab9c 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -630,12 +630,12 @@ static inline enum zone_type page_zonenum(const struct page *page) + #endif + + /* +- * The identification function is mainly used by the buddy allocator for +- * determining if two pages could be buddies. We are not really identifying +- * the zone since we could be using the section number id if we do not have +- * node id available in page flags. +- * We only guarantee that it will return the same value for two combinable +- * pages in a zone. ++ * The identification function is only used by the buddy allocator for ++ * determining if two pages could be buddies. We are not really ++ * identifying a zone since we could be using a the section number ++ * id if we have not node id available in page flags. ++ * We guarantee only that it will return the same value for two ++ * combinable pages in a zone. + */ + static inline int page_zone_id(struct page *page) + { +diff --git a/mm/mlock.c b/mm/mlock.c +index d638026..19a934d 100644 +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -280,7 +280,8 @@ static void __putback_lru_fast(struct pagevec *pvec, int pgrescued) + * The second phase finishes the munlock only for pages where isolation + * succeeded. + * +- * Note that the pagevec may be modified during the process. ++ * Note that pvec is modified during the process. Before returning ++ * pagevec_reinit() is called on it. + */ + static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) + { +@@ -355,60 +356,8 @@ skip_munlock: + */ + if (pagevec_count(&pvec_putback)) + __putback_lru_fast(&pvec_putback, pgrescued); +-} +- +-/* +- * Fill up pagevec for __munlock_pagevec using pte walk +- * +- * The function expects that the struct page corresponding to @start address is +- * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone. +- * +- * The rest of @pvec is filled by subsequent pages within the same pmd and same +- * zone, as long as the pte's are present and vm_normal_page() succeeds. These +- * pages also get pinned. +- * +- * Returns the address of the next page that should be scanned. This equals +- * @start + PAGE_SIZE when no page could be added by the pte walk. +- */ +-static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, +- struct vm_area_struct *vma, int zoneid, unsigned long start, +- unsigned long end) +-{ +- pte_t *pte; +- spinlock_t *ptl; +- +- /* +- * Initialize pte walk starting at the already pinned page where we +- * are sure that there is a pte. +- */ +- pte = get_locked_pte(vma->vm_mm, start, &ptl); +- end = min(end, pmd_addr_end(start, end)); +- +- /* The page next to the pinned page is the first we will try to get */ +- start += PAGE_SIZE; +- while (start < end) { +- struct page *page = NULL; +- pte++; +- if (pte_present(*pte)) +- page = vm_normal_page(vma, start, *pte); +- /* +- * Break if page could not be obtained or the page's node+zone does not +- * match +- */ +- if (!page || page_zone_id(page) != zoneid) +- break; + +- get_page(page); +- /* +- * Increase the address that will be returned *before* the +- * eventual break due to pvec becoming full by adding the page +- */ +- start += PAGE_SIZE; +- if (pagevec_add(pvec, page) == 0) +- break; +- } +- pte_unmap_unlock(pte, ptl); +- return start; ++ pagevec_reinit(pvec); + } + + /* +@@ -432,16 +381,17 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, + void munlock_vma_pages_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) + { ++ struct pagevec pvec; ++ struct zone *zone = NULL; ++ ++ pagevec_init(&pvec, 0); + vma->vm_flags &= ~VM_LOCKED; + + while (start < end) { +- struct page *page = NULL; ++ struct page *page; + unsigned int page_mask, page_increm; +- struct pagevec pvec; +- struct zone *zone; +- int zoneid; ++ struct zone *pagezone; + +- pagevec_init(&pvec, 0); + /* + * Although FOLL_DUMP is intended for get_dump_page(), + * it just so happens that its special treatment of the +@@ -450,10 +400,22 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, + * has sneaked into the range, we won't oops here: great). + */ + page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, +- &page_mask); +- ++ &page_mask); + if (page && !IS_ERR(page)) { ++ pagezone = page_zone(page); ++ /* The whole pagevec must be in the same zone */ ++ if (pagezone != zone) { ++ if (pagevec_count(&pvec)) ++ __munlock_pagevec(&pvec, zone); ++ zone = pagezone; ++ } + if (PageTransHuge(page)) { ++ /* ++ * THP pages are not handled by pagevec due ++ * to their possible split (see below). ++ */ ++ if (pagevec_count(&pvec)) ++ __munlock_pagevec(&pvec, zone); + lock_page(page); + /* + * Any THP page found by follow_page_mask() may +@@ -466,31 +428,21 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, + put_page(page); /* follow_page_mask() */ + } else { + /* +- * Non-huge pages are handled in batches via +- * pagevec. The pin from follow_page_mask() +- * prevents them from collapsing by THP. +- */ +- pagevec_add(&pvec, page); +- zone = page_zone(page); +- zoneid = page_zone_id(page); +- +- /* +- * Try to fill the rest of pagevec using fast +- * pte walk. This will also update start to +- * the next page to process. Then munlock the +- * pagevec. ++ * Non-huge pages are handled in batches ++ * via pagevec. The pin from ++ * follow_page_mask() prevents them from ++ * collapsing by THP. + */ +- start = __munlock_pagevec_fill(&pvec, vma, +- zoneid, start, end); +- __munlock_pagevec(&pvec, zone); +- goto next; ++ if (pagevec_add(&pvec, page) == 0) ++ __munlock_pagevec(&pvec, zone); + } + } + page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + start += page_increm * PAGE_SIZE; +-next: + cond_resched(); + } ++ if (pagevec_count(&pvec)) ++ __munlock_pagevec(&pvec, zone); + } + + /* +-- +1.8.1.4 diff --git a/a/content_digest b/N1/content_digest index fadd1c3..e8340a2 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -46,6 +46,207 @@ "Vlastimil\n" "\n" "\n" - -----8<----- + "-----8<-----\n" + ">From 979cbdeaaed76e25a9e08c7ccadba5baf5e7c619 Mon Sep 17 00:00:00 2001\n" + "From: Vlastimil Babka <vbabka@suse.cz>\n" + "Date: Mon, 16 Sep 2013 17:06:12 +0200\n" + "Subject: [PATCH] Revert \"mm: munlock: manual pte walk in fast path instead of\n" + " follow_page_mask()\"\n" + "\n" + "This reverts commit 7a8010cd36273ff5f6fea5201ef9232f30cebbd9 for testing.\n" + "---\n" + " include/linux/mm.h | 12 +++---\n" + " mm/mlock.c | 110 +++++++++++++++--------------------------------------\n" + " 2 files changed, 37 insertions(+), 85 deletions(-)\n" + "\n" + "diff --git a/include/linux/mm.h b/include/linux/mm.h\n" + "index 8b6e55e..e9bab9c 100644\n" + "--- a/include/linux/mm.h\n" + "+++ b/include/linux/mm.h\n" + "@@ -630,12 +630,12 @@ static inline enum zone_type page_zonenum(const struct page *page)\n" + " #endif\n" + " \n" + " /*\n" + "- * The identification function is mainly used by the buddy allocator for\n" + "- * determining if two pages could be buddies. We are not really identifying\n" + "- * the zone since we could be using the section number id if we do not have\n" + "- * node id available in page flags.\n" + "- * We only guarantee that it will return the same value for two combinable\n" + "- * pages in a zone.\n" + "+ * The identification function is only used by the buddy allocator for\n" + "+ * determining if two pages could be buddies. We are not really\n" + "+ * identifying a zone since we could be using a the section number\n" + "+ * id if we have not node id available in page flags.\n" + "+ * We guarantee only that it will return the same value for two\n" + "+ * combinable pages in a zone.\n" + " */\n" + " static inline int page_zone_id(struct page *page)\n" + " {\n" + "diff --git a/mm/mlock.c b/mm/mlock.c\n" + "index d638026..19a934d 100644\n" + "--- a/mm/mlock.c\n" + "+++ b/mm/mlock.c\n" + "@@ -280,7 +280,8 @@ static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)\n" + " * The second phase finishes the munlock only for pages where isolation\n" + " * succeeded.\n" + " *\n" + "- * Note that the pagevec may be modified during the process.\n" + "+ * Note that pvec is modified during the process. Before returning\n" + "+ * pagevec_reinit() is called on it.\n" + " */\n" + " static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)\n" + " {\n" + "@@ -355,60 +356,8 @@ skip_munlock:\n" + " \t */\n" + " \tif (pagevec_count(&pvec_putback))\n" + " \t\t__putback_lru_fast(&pvec_putback, pgrescued);\n" + "-}\n" + "-\n" + "-/*\n" + "- * Fill up pagevec for __munlock_pagevec using pte walk\n" + "- *\n" + "- * The function expects that the struct page corresponding to @start address is\n" + "- * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.\n" + "- *\n" + "- * The rest of @pvec is filled by subsequent pages within the same pmd and same\n" + "- * zone, as long as the pte's are present and vm_normal_page() succeeds. These\n" + "- * pages also get pinned.\n" + "- *\n" + "- * Returns the address of the next page that should be scanned. This equals\n" + "- * @start + PAGE_SIZE when no page could be added by the pte walk.\n" + "- */\n" + "-static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,\n" + "-\t\tstruct vm_area_struct *vma, int zoneid,\tunsigned long start,\n" + "-\t\tunsigned long end)\n" + "-{\n" + "-\tpte_t *pte;\n" + "-\tspinlock_t *ptl;\n" + "-\n" + "-\t/*\n" + "-\t * Initialize pte walk starting at the already pinned page where we\n" + "-\t * are sure that there is a pte.\n" + "-\t */\n" + "-\tpte = get_locked_pte(vma->vm_mm, start,\t&ptl);\n" + "-\tend = min(end, pmd_addr_end(start, end));\n" + "-\n" + "-\t/* The page next to the pinned page is the first we will try to get */\n" + "-\tstart += PAGE_SIZE;\n" + "-\twhile (start < end) {\n" + "-\t\tstruct page *page = NULL;\n" + "-\t\tpte++;\n" + "-\t\tif (pte_present(*pte))\n" + "-\t\t\tpage = vm_normal_page(vma, start, *pte);\n" + "-\t\t/*\n" + "-\t\t * Break if page could not be obtained or the page's node+zone does not\n" + "-\t\t * match\n" + "-\t\t */\n" + "-\t\tif (!page || page_zone_id(page) != zoneid)\n" + "-\t\t\tbreak;\n" + " \n" + "-\t\tget_page(page);\n" + "-\t\t/*\n" + "-\t\t * Increase the address that will be returned *before* the\n" + "-\t\t * eventual break due to pvec becoming full by adding the page\n" + "-\t\t */\n" + "-\t\tstart += PAGE_SIZE;\n" + "-\t\tif (pagevec_add(pvec, page) == 0)\n" + "-\t\t\tbreak;\n" + "-\t}\n" + "-\tpte_unmap_unlock(pte, ptl);\n" + "-\treturn start;\n" + "+\tpagevec_reinit(pvec);\n" + " }\n" + " \n" + " /*\n" + "@@ -432,16 +381,17 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,\n" + " void munlock_vma_pages_range(struct vm_area_struct *vma,\n" + " \t\t\t unsigned long start, unsigned long end)\n" + " {\n" + "+\tstruct pagevec pvec;\n" + "+\tstruct zone *zone = NULL;\n" + "+\n" + "+\tpagevec_init(&pvec, 0);\n" + " \tvma->vm_flags &= ~VM_LOCKED;\n" + " \n" + " \twhile (start < end) {\n" + "-\t\tstruct page *page = NULL;\n" + "+\t\tstruct page *page;\n" + " \t\tunsigned int page_mask, page_increm;\n" + "-\t\tstruct pagevec pvec;\n" + "-\t\tstruct zone *zone;\n" + "-\t\tint zoneid;\n" + "+\t\tstruct zone *pagezone;\n" + " \n" + "-\t\tpagevec_init(&pvec, 0);\n" + " \t\t/*\n" + " \t\t * Although FOLL_DUMP is intended for get_dump_page(),\n" + " \t\t * it just so happens that its special treatment of the\n" + "@@ -450,10 +400,22 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,\n" + " \t\t * has sneaked into the range, we won't oops here: great).\n" + " \t\t */\n" + " \t\tpage = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,\n" + "-\t\t\t\t&page_mask);\n" + "-\n" + "+\t\t\t\t\t&page_mask);\n" + " \t\tif (page && !IS_ERR(page)) {\n" + "+\t\t\tpagezone = page_zone(page);\n" + "+\t\t\t/* The whole pagevec must be in the same zone */\n" + "+\t\t\tif (pagezone != zone) {\n" + "+\t\t\t\tif (pagevec_count(&pvec))\n" + "+\t\t\t\t\t__munlock_pagevec(&pvec, zone);\n" + "+\t\t\t\tzone = pagezone;\n" + "+\t\t\t}\n" + " \t\t\tif (PageTransHuge(page)) {\n" + "+\t\t\t\t/*\n" + "+\t\t\t\t * THP pages are not handled by pagevec due\n" + "+\t\t\t\t * to their possible split (see below).\n" + "+\t\t\t\t */\n" + "+\t\t\t\tif (pagevec_count(&pvec))\n" + "+\t\t\t\t\t__munlock_pagevec(&pvec, zone);\n" + " \t\t\t\tlock_page(page);\n" + " \t\t\t\t/*\n" + " \t\t\t\t * Any THP page found by follow_page_mask() may\n" + "@@ -466,31 +428,21 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,\n" + " \t\t\t\tput_page(page); /* follow_page_mask() */\n" + " \t\t\t} else {\n" + " \t\t\t\t/*\n" + "-\t\t\t\t * Non-huge pages are handled in batches via\n" + "-\t\t\t\t * pagevec. The pin from follow_page_mask()\n" + "-\t\t\t\t * prevents them from collapsing by THP.\n" + "-\t\t\t\t */\n" + "-\t\t\t\tpagevec_add(&pvec, page);\n" + "-\t\t\t\tzone = page_zone(page);\n" + "-\t\t\t\tzoneid = page_zone_id(page);\n" + "-\n" + "-\t\t\t\t/*\n" + "-\t\t\t\t * Try to fill the rest of pagevec using fast\n" + "-\t\t\t\t * pte walk. This will also update start to\n" + "-\t\t\t\t * the next page to process. Then munlock the\n" + "-\t\t\t\t * pagevec.\n" + "+\t\t\t\t * Non-huge pages are handled in batches\n" + "+\t\t\t\t * via pagevec. The pin from\n" + "+\t\t\t\t * follow_page_mask() prevents them from\n" + "+\t\t\t\t * collapsing by THP.\n" + " \t\t\t\t */\n" + "-\t\t\t\tstart = __munlock_pagevec_fill(&pvec, vma,\n" + "-\t\t\t\t\t\tzoneid, start, end);\n" + "-\t\t\t\t__munlock_pagevec(&pvec, zone);\n" + "-\t\t\t\tgoto next;\n" + "+\t\t\t\tif (pagevec_add(&pvec, page) == 0)\n" + "+\t\t\t\t\t__munlock_pagevec(&pvec, zone);\n" + " \t\t\t}\n" + " \t\t}\n" + " \t\tpage_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);\n" + " \t\tstart += page_increm * PAGE_SIZE;\n" + "-next:\n" + " \t\tcond_resched();\n" + " \t}\n" + "+\tif (pagevec_count(&pvec))\n" + "+\t\t__munlock_pagevec(&pvec, zone);\n" + " }\n" + " \n" + " /*\n" + "-- \n" + 1.8.1.4 -23a3d995761bf2806c18395f8f4969aee753f6da4ced73060026e20ce6f0787c +848e8c9c87304a1c6e544a7f0bde53aa30bcd7c2088f9ff1a7701c6c301d3903
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.