diff for duplicates of <20180403155509.GD5501@dhcp22.suse.cz> diff --git a/a/1.txt b/N1/1.txt index 5eb14e8..a9817f7 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -34,3 +34,109 @@ quick fix, we shouldn't keep it longterm for 4.17+ IMHO. Does you ack apply to that patch as well? --- +>From 3e1b127dd6107a0e51654e90d9faef7f196f5a10 Mon Sep 17 00:00:00 2001 +From: Michal Hocko <mhocko@suse.com> +Date: Wed, 21 Mar 2018 10:10:37 +0100 +Subject: [PATCH] memcg, thp: do not invoke oom killer on thp charges + +David has noticed that THP memcg charge can trigger the oom killer +since 2516035499b9 ("mm, thp: remove __GFP_NORETRY from khugepaged and +madvised allocations"). We have used an explicit __GFP_NORETRY +previously which ruled the OOM killer automagically. + +Memcg charge path should be semantically compliant with the allocation +path and that means that if we do not trigger the OOM killer for +costly orders which should do the same in the memcg charge path as +well. Otherwise we are forcing callers to distinguish the two and use +different gfp masks which is both non-intuitive and bug prone. As soon +as we get a costly high order kmalloc user we even do not have any means +to tell the memcg specific gfp mask to prevent from OOM because the +charging is deep within guts of the slab allocator. + +The unexpected memcg OOM on THP has already been fixed upstream by +9d3c3354bb85 ("mm, thp: do not cause memcg oom for thp") but this is +one-off fix rather than a generic solution. Teach mem_cgroup_oom to bail +out on costly order requests to fix the THP issue as well as any other +costly OOM eligible allocations to be added in future. + +Also revert 9d3c3354bb85 because special gfp for THP is no longer +needed. + +Fixes: 2516035499b9 ("mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations") +Reported-by: David Rientjes <rientjes@google.com> +Signed-off-by: Michal Hocko <mhocko@suse.com> +--- + mm/huge_memory.c | 5 ++--- + mm/khugepaged.c | 8 ++------ + mm/memcontrol.c | 2 +- + 3 files changed, 5 insertions(+), 10 deletions(-) + +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 2297dd9cc7c3..0cc62405de9c 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -555,8 +555,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, + + VM_BUG_ON_PAGE(!PageCompound(page), page); + +- if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg, +- true)) { ++ if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) { + put_page(page); + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; +@@ -1317,7 +1316,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) + } + + if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, +- huge_gfp | __GFP_NORETRY, &memcg, true))) { ++ huge_gfp, &memcg, true))) { + put_page(new_page); + split_huge_pmd(vma, vmf->pmd, vmf->address); + if (page) +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index 214e614b62b0..b7e2268dfc9a 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -960,9 +960,7 @@ static void collapse_huge_page(struct mm_struct *mm, + goto out_nolock; + } + +- /* Do not oom kill for khugepaged charges */ +- if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, +- &memcg, true))) { ++ if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { + result = SCAN_CGROUP_CHARGE_FAIL; + goto out_nolock; + } +@@ -1321,9 +1319,7 @@ static void collapse_shmem(struct mm_struct *mm, + goto out; + } + +- /* Do not oom kill for khugepaged charges */ +- if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, +- &memcg, true))) { ++ if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { + result = SCAN_CGROUP_CHARGE_FAIL; + goto out; + } +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index d1a917b5b7b7..08accbcd1a18 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -1493,7 +1493,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) + + static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) + { +- if (!current->memcg_may_oom) ++ if (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER) + return; + /* + * We are in the middle of the charge context here, so we +-- +2.16.3 + + +-- +Michal Hocko +SUSE Labs diff --git a/a/content_digest b/N1/content_digest index 15104df..a81c49a 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -47,6 +47,112 @@ "quick fix, we shouldn't keep it longterm for 4.17+ IMHO.\n" "\n" "Does you ack apply to that patch as well?\n" - --- + "---\n" + ">From 3e1b127dd6107a0e51654e90d9faef7f196f5a10 Mon Sep 17 00:00:00 2001\n" + "From: Michal Hocko <mhocko@suse.com>\n" + "Date: Wed, 21 Mar 2018 10:10:37 +0100\n" + "Subject: [PATCH] memcg, thp: do not invoke oom killer on thp charges\n" + "\n" + "David has noticed that THP memcg charge can trigger the oom killer\n" + "since 2516035499b9 (\"mm, thp: remove __GFP_NORETRY from khugepaged and\n" + "madvised allocations\"). We have used an explicit __GFP_NORETRY\n" + "previously which ruled the OOM killer automagically.\n" + "\n" + "Memcg charge path should be semantically compliant with the allocation\n" + "path and that means that if we do not trigger the OOM killer for\n" + "costly orders which should do the same in the memcg charge path as\n" + "well. Otherwise we are forcing callers to distinguish the two and use\n" + "different gfp masks which is both non-intuitive and bug prone. As soon\n" + "as we get a costly high order kmalloc user we even do not have any means\n" + "to tell the memcg specific gfp mask to prevent from OOM because the\n" + "charging is deep within guts of the slab allocator.\n" + "\n" + "The unexpected memcg OOM on THP has already been fixed upstream by\n" + "9d3c3354bb85 (\"mm, thp: do not cause memcg oom for thp\") but this is\n" + "one-off fix rather than a generic solution. Teach mem_cgroup_oom to bail\n" + "out on costly order requests to fix the THP issue as well as any other\n" + "costly OOM eligible allocations to be added in future.\n" + "\n" + "Also revert 9d3c3354bb85 because special gfp for THP is no longer\n" + "needed.\n" + "\n" + "Fixes: 2516035499b9 (\"mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations\")\n" + "Reported-by: David Rientjes <rientjes@google.com>\n" + "Signed-off-by: Michal Hocko <mhocko@suse.com>\n" + "---\n" + " mm/huge_memory.c | 5 ++---\n" + " mm/khugepaged.c | 8 ++------\n" + " mm/memcontrol.c | 2 +-\n" + " 3 files changed, 5 insertions(+), 10 deletions(-)\n" + "\n" + "diff --git a/mm/huge_memory.c b/mm/huge_memory.c\n" + "index 2297dd9cc7c3..0cc62405de9c 100644\n" + "--- a/mm/huge_memory.c\n" + "+++ b/mm/huge_memory.c\n" + "@@ -555,8 +555,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,\n" + " \n" + " \tVM_BUG_ON_PAGE(!PageCompound(page), page);\n" + " \n" + "-\tif (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,\n" + "-\t\t\t\t true)) {\n" + "+\tif (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {\n" + " \t\tput_page(page);\n" + " \t\tcount_vm_event(THP_FAULT_FALLBACK);\n" + " \t\treturn VM_FAULT_FALLBACK;\n" + "@@ -1317,7 +1316,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)\n" + " \t}\n" + " \n" + " \tif (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,\n" + "-\t\t\t\thuge_gfp | __GFP_NORETRY, &memcg, true))) {\n" + "+\t\t\t\t\thuge_gfp, &memcg, true))) {\n" + " \t\tput_page(new_page);\n" + " \t\tsplit_huge_pmd(vma, vmf->pmd, vmf->address);\n" + " \t\tif (page)\n" + "diff --git a/mm/khugepaged.c b/mm/khugepaged.c\n" + "index 214e614b62b0..b7e2268dfc9a 100644\n" + "--- a/mm/khugepaged.c\n" + "+++ b/mm/khugepaged.c\n" + "@@ -960,9 +960,7 @@ static void collapse_huge_page(struct mm_struct *mm,\n" + " \t\tgoto out_nolock;\n" + " \t}\n" + " \n" + "-\t/* Do not oom kill for khugepaged charges */\n" + "-\tif (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,\n" + "-\t\t\t\t\t &memcg, true))) {\n" + "+\tif (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {\n" + " \t\tresult = SCAN_CGROUP_CHARGE_FAIL;\n" + " \t\tgoto out_nolock;\n" + " \t}\n" + "@@ -1321,9 +1319,7 @@ static void collapse_shmem(struct mm_struct *mm,\n" + " \t\tgoto out;\n" + " \t}\n" + " \n" + "-\t/* Do not oom kill for khugepaged charges */\n" + "-\tif (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,\n" + "-\t\t\t\t\t &memcg, true))) {\n" + "+\tif (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {\n" + " \t\tresult = SCAN_CGROUP_CHARGE_FAIL;\n" + " \t\tgoto out;\n" + " \t}\n" + "diff --git a/mm/memcontrol.c b/mm/memcontrol.c\n" + "index d1a917b5b7b7..08accbcd1a18 100644\n" + "--- a/mm/memcontrol.c\n" + "+++ b/mm/memcontrol.c\n" + "@@ -1493,7 +1493,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)\n" + " \n" + " static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)\n" + " {\n" + "-\tif (!current->memcg_may_oom)\n" + "+\tif (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)\n" + " \t\treturn;\n" + " \t/*\n" + " \t * We are in the middle of the charge context here, so we\n" + "-- \n" + "2.16.3\n" + "\n" + "\n" + "-- \n" + "Michal Hocko\n" + SUSE Labs -4ef6a0ee3f2e32a82208301149e4d28044ad1342a595b2f0a109c94b103a6d43 +257e9db6757556f78336d7b578ce1b75fa3d3768f0405400d22e6b1be1d43c75
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.