All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20180403155509.GD5501@dhcp22.suse.cz>

diff --git a/a/1.txt b/N1/1.txt
index 5eb14e8..a9817f7 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -34,3 +34,109 @@ quick fix, we shouldn't keep it longterm for 4.17+ IMHO.
 
 Does you ack apply to that patch as well?
 ---
+>From 3e1b127dd6107a0e51654e90d9faef7f196f5a10 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Wed, 21 Mar 2018 10:10:37 +0100
+Subject: [PATCH] memcg, thp: do not invoke oom killer on thp charges
+
+David has noticed that THP memcg charge can trigger the oom killer
+since 2516035499b9 ("mm, thp: remove __GFP_NORETRY from khugepaged and
+madvised allocations"). We have used an explicit __GFP_NORETRY
+previously which ruled the OOM killer automagically.
+
+Memcg charge path should be semantically compliant with the allocation
+path and that means that if we do not trigger the OOM killer for
+costly orders which should do the same in the memcg charge path as
+well.  Otherwise we are forcing callers to distinguish the two and use
+different gfp masks which is both non-intuitive and bug prone. As soon
+as we get a costly high order kmalloc user we even do not have any means
+to tell the memcg specific gfp mask to prevent from OOM because the
+charging is deep within guts of the slab allocator.
+
+The unexpected memcg OOM on THP has already been fixed upstream by
+9d3c3354bb85 ("mm, thp: do not cause memcg oom for thp") but this is
+one-off fix rather than a generic solution. Teach mem_cgroup_oom to bail
+out on costly order requests to fix the THP issue as well as any other
+costly OOM eligible allocations to be added in future.
+
+Also revert 9d3c3354bb85 because special gfp for THP is no longer
+needed.
+
+Fixes: 2516035499b9 ("mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations")
+Reported-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+---
+ mm/huge_memory.c | 5 ++---
+ mm/khugepaged.c  | 8 ++------
+ mm/memcontrol.c  | 2 +-
+ 3 files changed, 5 insertions(+), 10 deletions(-)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 2297dd9cc7c3..0cc62405de9c 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -555,8 +555,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
+ 
+ 	VM_BUG_ON_PAGE(!PageCompound(page), page);
+ 
+-	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
+-				  true)) {
++	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+ 		put_page(page);
+ 		count_vm_event(THP_FAULT_FALLBACK);
+ 		return VM_FAULT_FALLBACK;
+@@ -1317,7 +1316,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
+ 	}
+ 
+ 	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
+-				huge_gfp | __GFP_NORETRY, &memcg, true))) {
++					huge_gfp, &memcg, true))) {
+ 		put_page(new_page);
+ 		split_huge_pmd(vma, vmf->pmd, vmf->address);
+ 		if (page)
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 214e614b62b0..b7e2268dfc9a 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -960,9 +960,7 @@ static void collapse_huge_page(struct mm_struct *mm,
+ 		goto out_nolock;
+ 	}
+ 
+-	/* Do not oom kill for khugepaged charges */
+-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+-					   &memcg, true))) {
++	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+ 		result = SCAN_CGROUP_CHARGE_FAIL;
+ 		goto out_nolock;
+ 	}
+@@ -1321,9 +1319,7 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		goto out;
+ 	}
+ 
+-	/* Do not oom kill for khugepaged charges */
+-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+-					   &memcg, true))) {
++	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+ 		result = SCAN_CGROUP_CHARGE_FAIL;
+ 		goto out;
+ 	}
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index d1a917b5b7b7..08accbcd1a18 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -1493,7 +1493,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
+ 
+ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
+ {
+-	if (!current->memcg_may_oom)
++	if (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)
+ 		return;
+ 	/*
+ 	 * We are in the middle of the charge context here, so we
+-- 
+2.16.3
+
+
+-- 
+Michal Hocko
+SUSE Labs
diff --git a/a/content_digest b/N1/content_digest
index 15104df..a81c49a 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -47,6 +47,112 @@
  "quick fix, we shouldn't keep it longterm for 4.17+ IMHO.\n"
  "\n"
  "Does you ack apply to that patch as well?\n"
- ---
+ "---\n"
+ ">From 3e1b127dd6107a0e51654e90d9faef7f196f5a10 Mon Sep 17 00:00:00 2001\n"
+ "From: Michal Hocko <mhocko@suse.com>\n"
+ "Date: Wed, 21 Mar 2018 10:10:37 +0100\n"
+ "Subject: [PATCH] memcg, thp: do not invoke oom killer on thp charges\n"
+ "\n"
+ "David has noticed that THP memcg charge can trigger the oom killer\n"
+ "since 2516035499b9 (\"mm, thp: remove __GFP_NORETRY from khugepaged and\n"
+ "madvised allocations\"). We have used an explicit __GFP_NORETRY\n"
+ "previously which ruled the OOM killer automagically.\n"
+ "\n"
+ "Memcg charge path should be semantically compliant with the allocation\n"
+ "path and that means that if we do not trigger the OOM killer for\n"
+ "costly orders which should do the same in the memcg charge path as\n"
+ "well.  Otherwise we are forcing callers to distinguish the two and use\n"
+ "different gfp masks which is both non-intuitive and bug prone. As soon\n"
+ "as we get a costly high order kmalloc user we even do not have any means\n"
+ "to tell the memcg specific gfp mask to prevent from OOM because the\n"
+ "charging is deep within guts of the slab allocator.\n"
+ "\n"
+ "The unexpected memcg OOM on THP has already been fixed upstream by\n"
+ "9d3c3354bb85 (\"mm, thp: do not cause memcg oom for thp\") but this is\n"
+ "one-off fix rather than a generic solution. Teach mem_cgroup_oom to bail\n"
+ "out on costly order requests to fix the THP issue as well as any other\n"
+ "costly OOM eligible allocations to be added in future.\n"
+ "\n"
+ "Also revert 9d3c3354bb85 because special gfp for THP is no longer\n"
+ "needed.\n"
+ "\n"
+ "Fixes: 2516035499b9 (\"mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations\")\n"
+ "Reported-by: David Rientjes <rientjes@google.com>\n"
+ "Signed-off-by: Michal Hocko <mhocko@suse.com>\n"
+ "---\n"
+ " mm/huge_memory.c | 5 ++---\n"
+ " mm/khugepaged.c  | 8 ++------\n"
+ " mm/memcontrol.c  | 2 +-\n"
+ " 3 files changed, 5 insertions(+), 10 deletions(-)\n"
+ "\n"
+ "diff --git a/mm/huge_memory.c b/mm/huge_memory.c\n"
+ "index 2297dd9cc7c3..0cc62405de9c 100644\n"
+ "--- a/mm/huge_memory.c\n"
+ "+++ b/mm/huge_memory.c\n"
+ "@@ -555,8 +555,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,\n"
+ " \n"
+ " \tVM_BUG_ON_PAGE(!PageCompound(page), page);\n"
+ " \n"
+ "-\tif (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,\n"
+ "-\t\t\t\t  true)) {\n"
+ "+\tif (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {\n"
+ " \t\tput_page(page);\n"
+ " \t\tcount_vm_event(THP_FAULT_FALLBACK);\n"
+ " \t\treturn VM_FAULT_FALLBACK;\n"
+ "@@ -1317,7 +1316,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)\n"
+ " \t}\n"
+ " \n"
+ " \tif (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,\n"
+ "-\t\t\t\thuge_gfp | __GFP_NORETRY, &memcg, true))) {\n"
+ "+\t\t\t\t\thuge_gfp, &memcg, true))) {\n"
+ " \t\tput_page(new_page);\n"
+ " \t\tsplit_huge_pmd(vma, vmf->pmd, vmf->address);\n"
+ " \t\tif (page)\n"
+ "diff --git a/mm/khugepaged.c b/mm/khugepaged.c\n"
+ "index 214e614b62b0..b7e2268dfc9a 100644\n"
+ "--- a/mm/khugepaged.c\n"
+ "+++ b/mm/khugepaged.c\n"
+ "@@ -960,9 +960,7 @@ static void collapse_huge_page(struct mm_struct *mm,\n"
+ " \t\tgoto out_nolock;\n"
+ " \t}\n"
+ " \n"
+ "-\t/* Do not oom kill for khugepaged charges */\n"
+ "-\tif (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,\n"
+ "-\t\t\t\t\t   &memcg, true))) {\n"
+ "+\tif (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {\n"
+ " \t\tresult = SCAN_CGROUP_CHARGE_FAIL;\n"
+ " \t\tgoto out_nolock;\n"
+ " \t}\n"
+ "@@ -1321,9 +1319,7 @@ static void collapse_shmem(struct mm_struct *mm,\n"
+ " \t\tgoto out;\n"
+ " \t}\n"
+ " \n"
+ "-\t/* Do not oom kill for khugepaged charges */\n"
+ "-\tif (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,\n"
+ "-\t\t\t\t\t   &memcg, true))) {\n"
+ "+\tif (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {\n"
+ " \t\tresult = SCAN_CGROUP_CHARGE_FAIL;\n"
+ " \t\tgoto out;\n"
+ " \t}\n"
+ "diff --git a/mm/memcontrol.c b/mm/memcontrol.c\n"
+ "index d1a917b5b7b7..08accbcd1a18 100644\n"
+ "--- a/mm/memcontrol.c\n"
+ "+++ b/mm/memcontrol.c\n"
+ "@@ -1493,7 +1493,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)\n"
+ " \n"
+ " static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)\n"
+ " {\n"
+ "-\tif (!current->memcg_may_oom)\n"
+ "+\tif (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)\n"
+ " \t\treturn;\n"
+ " \t/*\n"
+ " \t * We are in the middle of the charge context here, so we\n"
+ "-- \n"
+ "2.16.3\n"
+ "\n"
+ "\n"
+ "-- \n"
+ "Michal Hocko\n"
+ SUSE Labs
 
-4ef6a0ee3f2e32a82208301149e4d28044ad1342a595b2f0a109c94b103a6d43
+257e9db6757556f78336d7b578ce1b75fa3d3768f0405400d22e6b1be1d43c75

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.