diff for duplicates of <20170502051452.GA27264@bbox> diff --git a/a/1.txt b/N1/1.txt index b882a42..f897683 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,3 +1,158 @@ Oops, forgot to add lkml and linux-mm. Sorry for that. Send it again. + +>From 8ddf1c8aa15baf085bc6e8c62ce705459d57ea4c Mon Sep 17 00:00:00 2001 +From: Minchan Kim <minchan@kernel.org> +Date: Tue, 2 May 2017 12:34:05 +0900 +Subject: [PATCH] vmscan: scan pages until it founds eligible pages + +On Tue, May 02, 2017 at 01:40:38PM +0900, Minchan Kim wrote: +There are premature OOM happening. Although there are a ton of free +swap and anonymous LRU list of elgible zones, OOM happened. + +With investigation, skipping page of isolate_lru_pages makes reclaim +void because it returns zero nr_taken easily so LRU shrinking is +effectively nothing and just increases priority aggressively. +Finally, OOM happens. + +This patch makes isolate_lru_pages try to scan pages until it +encounters eligible zones's pages or too much scan happen(ie, +node's LRU size). + +balloon invoked oom-killer: gfp_mask=0x17080c0(GFP_KERNEL_ACCOUNT|__GFP_ZERO|__GFP_NOTRACK), nodemask=(null), order=0, oom_score_adj=0 +CPU: 7 PID: 1138 Comm: balloon Not tainted 4.11.0-rc6-mm1-zram-00289-ge228d67e9677-dirty #17 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +Call Trace: + dump_stack+0x65/0x87 + dump_header.isra.19+0x8f/0x20f + ? preempt_count_add+0x9e/0xb0 + ? _raw_spin_unlock_irqrestore+0x24/0x40 + oom_kill_process+0x21d/0x3f0 + ? has_capability_noaudit+0x17/0x20 + out_of_memory+0xd8/0x390 + __alloc_pages_slowpath+0xbc1/0xc50 + ? anon_vma_interval_tree_insert+0x84/0x90 + __alloc_pages_nodemask+0x1a5/0x1c0 + pte_alloc_one+0x20/0x50 + __pte_alloc+0x1e/0x110 + __handle_mm_fault+0x919/0x960 + handle_mm_fault+0x77/0x120 + __do_page_fault+0x27a/0x550 + trace_do_page_fault+0x43/0x150 + do_async_page_fault+0x2c/0x90 + async_page_fault+0x28/0x30 +RIP: 0033:0x7fc4636bacb8 +RSP: 002b:00007fff97c9c4c0 EFLAGS: 00010202 +RAX: 00007fc3e818d000 RBX: 00007fc4639f8760 RCX: 00007fc46372e9ca +RDX: 0000000000101002 RSI: 0000000000101000 RDI: 0000000000000000 +RBP: 0000000000100010 R08: 00000000ffffffff R09: 0000000000000000 +R10: 0000000000000022 R11: 00000000000a3901 R12: 00007fc3e818d010 +R13: 0000000000101000 R14: 00007fc4639f87b8 R15: 00007fc4639f87b8 +Mem-Info: +active_anon:424716 inactive_anon:65314 isolated_anon:0 + active_file:52 inactive_file:46 isolated_file:0 + unevictable:0 dirty:27 writeback:0 unstable:0 + slab_reclaimable:3967 slab_unreclaimable:4125 + mapped:133 shmem:43 pagetables:1674 bounce:0 + free:4637 free_pcp:225 free_cma:0 +Node 0 active_anon:1698864kB inactive_anon:261256kB active_file:208kB inactive_file:184kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:532kB dirty:108kB writeback:0kB shmem:172kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no +DMA free:7316kB min:32kB low:44kB high:56kB active_anon:8064kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15992kB managed:15908kB mlocked:0kB slab_reclaimable:464kB slab_unreclaimable:40kB kernel_stack:0kB pagetables:24kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB +lowmem_reserve[]: 0 992 992 1952 +DMA32 free:9088kB min:2048kB low:3064kB high:4080kB active_anon:952176kB inactive_anon:0kB active_file:36kB inactive_file:0kB unevictable:0kB writepending:88kB present:1032192kB managed:1019388kB mlocked:0kB slab_reclaimable:13532kB slab_unreclaimable:16460kB kernel_stack:3552kB pagetables:6672kB bounce:0kB free_pcp:56kB local_pcp:24kB free_cma:0kB +lowmem_reserve[]: 0 0 0 959 +Movable free:3644kB min:1980kB low:2960kB high:3940kB active_anon:738560kB inactive_anon:261340kB active_file:188kB inactive_file:640kB unevictable:0kB writepending:20kB present:1048444kB managed:1010816kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:832kB local_pcp:60kB free_cma:0kB +lowmem_reserve[]: 0 0 0 0 +DMA: 1*4kB (E) 0*8kB 18*16kB (E) 10*32kB (E) 10*64kB (E) 9*128kB (ME) 8*256kB (E) 2*512kB (E) 2*1024kB (E) 0*2048kB 0*4096kB = 7524kB +DMA32: 417*4kB (UMEH) 181*8kB (UMEH) 68*16kB (UMEH) 48*32kB (UMEH) 14*64kB (MH) 3*128kB (M) 1*256kB (H) 1*512kB (M) 2*1024kB (M) 0*2048kB 0*4096kB = 9836kB +Movable: 1*4kB (M) 1*8kB (M) 1*16kB (M) 1*32kB (M) 0*64kB 1*128kB (M) 2*256kB (M) 4*512kB (M) 1*1024kB (M) 0*2048kB 0*4096kB = 3772kB +378 total pagecache pages +17 pages in swap cache +Swap cache stats: add 17325, delete 17302, find 0/27 +Free swap = 978940kB +Total swap = 1048572kB +524157 pages RAM +0 pages HighMem/MovableOnly +12629 pages reserved +0 pages cma reserved +0 pages hwpoisoned +[ pid ] uid tgid total_vm rss nr_ptes nr_pmds swapents oom_score_adj name +[ 433] 0 433 4904 5 14 3 82 0 upstart-udev-br +[ 438] 0 438 12371 5 27 3 191 -1000 systemd-udevd +... + +Signed-off-by: Minchan Kim <minchan@kernel.org> +--- + mm/vmscan.c | 33 +++++++++++++++++++++++++++++---- + 1 file changed, 29 insertions(+), 4 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 2314aca47d12..1fec21d155b3 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1488,12 +1488,20 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + unsigned long nr_taken = 0; + unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; + unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; ++ unsigned long total_skipped = 0; + unsigned long skipped = 0; + unsigned long scan, nr_pages; ++ unsigned long lru_size; + LIST_HEAD(pages_skipped); + ++ if (!mem_cgroup_disabled()) ++ lru_size = mem_cgroup_get_lru_size(lruvec, lru); ++ else ++ lru_size = node_page_state(lruvec_pgdat(lruvec), ++ NR_LRU_BASE + lru); ++ + for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && +- !list_empty(src); scan++) { ++ !list_empty(src) && (scan + total_skipped < lru_size); scan++) { + struct page *page; + + page = lru_to_page(src); +@@ -1502,8 +1510,25 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + VM_BUG_ON_PAGE(!PageLRU(page), page); + + if (page_zonenum(page) > sc->reclaim_idx) { ++ if (skipped > SWAP_CLUSTER_MAX) { ++ int zid; ++ ++ list_splice_init(&pages_skipped, src); ++ for (zid = 0; zid < MAX_NR_ZONES; zid++) { ++ if (!nr_skipped[zid]) ++ continue; ++ __count_zid_vm_events(PGSCAN_SKIP, zid, ++ nr_skipped[zid]); ++ total_skipped += nr_skipped[zid]; ++ nr_skipped[zid] = 0; ++ } ++ skipped = 0; ++ } ++ + list_move(&page->lru, &pages_skipped); + nr_skipped[page_zonenum(page)]++; ++ skipped++; ++ scan--; + continue; + } + +@@ -1541,12 +1566,12 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + continue; + + __count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]); +- skipped += nr_skipped[zid]; ++ total_skipped += nr_skipped[zid]; + } + } +- *nr_scanned = scan; ++ *nr_scanned = scan + total_skipped; + trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, +- scan, skipped, nr_taken, mode, lru); ++ scan, total_skipped, nr_taken, mode, lru); + update_lru_sizes(lruvec, lru, nr_zone_taken); + return nr_taken; + } +-- +2.7.4 diff --git a/a/content_digest b/N1/content_digest index ab88d77..4f16188 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -6,13 +6,168 @@ "Cc\0Johannes Weiner <hannes@cmpxchg.org>" Mel Gorman <mgorman@techsingularity.net> Michal Hocko <mhocko@suse.com> - kernel-team@lge.com - linux-kernel@vger.kernel.org - " linux-mm@kvack.org\0" + <kernel-team@lge.com> + <linux-kernel@vger.kernel.org> + " <linux-mm@kvack.org>\0" "\00:1\0" "b\0" "Oops, forgot to add lkml and linux-mm.\n" "Sorry for that.\n" - Send it again. + "Send it again.\n" + "\n" + ">From 8ddf1c8aa15baf085bc6e8c62ce705459d57ea4c Mon Sep 17 00:00:00 2001\n" + "From: Minchan Kim <minchan@kernel.org>\n" + "Date: Tue, 2 May 2017 12:34:05 +0900\n" + "Subject: [PATCH] vmscan: scan pages until it founds eligible pages\n" + "\n" + "On Tue, May 02, 2017 at 01:40:38PM +0900, Minchan Kim wrote:\n" + "There are premature OOM happening. Although there are a ton of free\n" + "swap and anonymous LRU list of elgible zones, OOM happened.\n" + "\n" + "With investigation, skipping page of isolate_lru_pages makes reclaim\n" + "void because it returns zero nr_taken easily so LRU shrinking is\n" + "effectively nothing and just increases priority aggressively.\n" + "Finally, OOM happens.\n" + "\n" + "This patch makes isolate_lru_pages try to scan pages until it\n" + "encounters eligible zones's pages or too much scan happen(ie,\n" + "node's LRU size).\n" + "\n" + "balloon invoked oom-killer: gfp_mask=0x17080c0(GFP_KERNEL_ACCOUNT|__GFP_ZERO|__GFP_NOTRACK), nodemask=(null), order=0, oom_score_adj=0\n" + "CPU: 7 PID: 1138 Comm: balloon Not tainted 4.11.0-rc6-mm1-zram-00289-ge228d67e9677-dirty #17\n" + "Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014\n" + "Call Trace:\n" + " dump_stack+0x65/0x87\n" + " dump_header.isra.19+0x8f/0x20f\n" + " ? preempt_count_add+0x9e/0xb0\n" + " ? _raw_spin_unlock_irqrestore+0x24/0x40\n" + " oom_kill_process+0x21d/0x3f0\n" + " ? has_capability_noaudit+0x17/0x20\n" + " out_of_memory+0xd8/0x390\n" + " __alloc_pages_slowpath+0xbc1/0xc50\n" + " ? anon_vma_interval_tree_insert+0x84/0x90\n" + " __alloc_pages_nodemask+0x1a5/0x1c0\n" + " pte_alloc_one+0x20/0x50\n" + " __pte_alloc+0x1e/0x110\n" + " __handle_mm_fault+0x919/0x960\n" + " handle_mm_fault+0x77/0x120\n" + " __do_page_fault+0x27a/0x550\n" + " trace_do_page_fault+0x43/0x150\n" + " do_async_page_fault+0x2c/0x90\n" + " async_page_fault+0x28/0x30\n" + "RIP: 0033:0x7fc4636bacb8\n" + "RSP: 002b:00007fff97c9c4c0 EFLAGS: 00010202\n" + "RAX: 00007fc3e818d000 RBX: 00007fc4639f8760 RCX: 00007fc46372e9ca\n" + "RDX: 0000000000101002 RSI: 0000000000101000 RDI: 0000000000000000\n" + "RBP: 0000000000100010 R08: 00000000ffffffff R09: 0000000000000000\n" + "R10: 0000000000000022 R11: 00000000000a3901 R12: 00007fc3e818d010\n" + "R13: 0000000000101000 R14: 00007fc4639f87b8 R15: 00007fc4639f87b8\n" + "Mem-Info:\n" + "active_anon:424716 inactive_anon:65314 isolated_anon:0\n" + " active_file:52 inactive_file:46 isolated_file:0\n" + " unevictable:0 dirty:27 writeback:0 unstable:0\n" + " slab_reclaimable:3967 slab_unreclaimable:4125\n" + " mapped:133 shmem:43 pagetables:1674 bounce:0\n" + " free:4637 free_pcp:225 free_cma:0\n" + "Node 0 active_anon:1698864kB inactive_anon:261256kB active_file:208kB inactive_file:184kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:532kB dirty:108kB writeback:0kB shmem:172kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no\n" + "DMA free:7316kB min:32kB low:44kB high:56kB active_anon:8064kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15992kB managed:15908kB mlocked:0kB slab_reclaimable:464kB slab_unreclaimable:40kB kernel_stack:0kB pagetables:24kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB\n" + "lowmem_reserve[]: 0 992 992 1952\n" + "DMA32 free:9088kB min:2048kB low:3064kB high:4080kB active_anon:952176kB inactive_anon:0kB active_file:36kB inactive_file:0kB unevictable:0kB writepending:88kB present:1032192kB managed:1019388kB mlocked:0kB slab_reclaimable:13532kB slab_unreclaimable:16460kB kernel_stack:3552kB pagetables:6672kB bounce:0kB free_pcp:56kB local_pcp:24kB free_cma:0kB\n" + "lowmem_reserve[]: 0 0 0 959\n" + "Movable free:3644kB min:1980kB low:2960kB high:3940kB active_anon:738560kB inactive_anon:261340kB active_file:188kB inactive_file:640kB unevictable:0kB writepending:20kB present:1048444kB managed:1010816kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:832kB local_pcp:60kB free_cma:0kB\n" + "lowmem_reserve[]: 0 0 0 0\n" + "DMA: 1*4kB (E) 0*8kB 18*16kB (E) 10*32kB (E) 10*64kB (E) 9*128kB (ME) 8*256kB (E) 2*512kB (E) 2*1024kB (E) 0*2048kB 0*4096kB = 7524kB\n" + "DMA32: 417*4kB (UMEH) 181*8kB (UMEH) 68*16kB (UMEH) 48*32kB (UMEH) 14*64kB (MH) 3*128kB (M) 1*256kB (H) 1*512kB (M) 2*1024kB (M) 0*2048kB 0*4096kB = 9836kB\n" + "Movable: 1*4kB (M) 1*8kB (M) 1*16kB (M) 1*32kB (M) 0*64kB 1*128kB (M) 2*256kB (M) 4*512kB (M) 1*1024kB (M) 0*2048kB 0*4096kB = 3772kB\n" + "378 total pagecache pages\n" + "17 pages in swap cache\n" + "Swap cache stats: add 17325, delete 17302, find 0/27\n" + "Free swap = 978940kB\n" + "Total swap = 1048572kB\n" + "524157 pages RAM\n" + "0 pages HighMem/MovableOnly\n" + "12629 pages reserved\n" + "0 pages cma reserved\n" + "0 pages hwpoisoned\n" + "[ pid ] uid tgid total_vm rss nr_ptes nr_pmds swapents oom_score_adj name\n" + "[ 433] 0 433 4904 5 14 3 82 0 upstart-udev-br\n" + "[ 438] 0 438 12371 5 27 3 191 -1000 systemd-udevd\n" + "...\n" + "\n" + "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" + "---\n" + " mm/vmscan.c | 33 +++++++++++++++++++++++++++++----\n" + " 1 file changed, 29 insertions(+), 4 deletions(-)\n" + "\n" + "diff --git a/mm/vmscan.c b/mm/vmscan.c\n" + "index 2314aca47d12..1fec21d155b3 100644\n" + "--- a/mm/vmscan.c\n" + "+++ b/mm/vmscan.c\n" + "@@ -1488,12 +1488,20 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,\n" + " \tunsigned long nr_taken = 0;\n" + " \tunsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };\n" + " \tunsigned long nr_skipped[MAX_NR_ZONES] = { 0, };\n" + "+\tunsigned long total_skipped = 0;\n" + " \tunsigned long skipped = 0;\n" + " \tunsigned long scan, nr_pages;\n" + "+\tunsigned long lru_size;\n" + " \tLIST_HEAD(pages_skipped);\n" + " \n" + "+\tif (!mem_cgroup_disabled())\n" + "+\t\tlru_size = mem_cgroup_get_lru_size(lruvec, lru);\n" + "+\telse\n" + "+\t\tlru_size = node_page_state(lruvec_pgdat(lruvec),\n" + "+\t\t\t\t\t\tNR_LRU_BASE + lru);\n" + "+\n" + " \tfor (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&\n" + "-\t\t\t\t\t!list_empty(src); scan++) {\n" + "+\t\t!list_empty(src) && (scan + total_skipped < lru_size); scan++) {\n" + " \t\tstruct page *page;\n" + " \n" + " \t\tpage = lru_to_page(src);\n" + "@@ -1502,8 +1510,25 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,\n" + " \t\tVM_BUG_ON_PAGE(!PageLRU(page), page);\n" + " \n" + " \t\tif (page_zonenum(page) > sc->reclaim_idx) {\n" + "+\t\t\tif (skipped > SWAP_CLUSTER_MAX) {\n" + "+\t\t\t\tint zid;\n" + "+\n" + "+\t\t\t\tlist_splice_init(&pages_skipped, src);\n" + "+\t\t\t\tfor (zid = 0; zid < MAX_NR_ZONES; zid++) {\n" + "+\t\t\t\t\tif (!nr_skipped[zid])\n" + "+\t\t\t\t\t\tcontinue;\n" + "+\t\t\t\t\t__count_zid_vm_events(PGSCAN_SKIP, zid,\n" + "+\t\t\t\t\t\t\tnr_skipped[zid]);\n" + "+\t\t\t\t\ttotal_skipped += nr_skipped[zid];\n" + "+\t\t\t\t\tnr_skipped[zid] = 0;\n" + "+\t\t\t\t}\n" + "+\t\t\t\tskipped = 0;\n" + "+\t\t\t}\n" + "+\n" + " \t\t\tlist_move(&page->lru, &pages_skipped);\n" + " \t\t\tnr_skipped[page_zonenum(page)]++;\n" + "+\t\t\tskipped++;\n" + "+\t\t\tscan--;\n" + " \t\t\tcontinue;\n" + " \t\t}\n" + " \n" + "@@ -1541,12 +1566,12 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,\n" + " \t\t\t\tcontinue;\n" + " \n" + " \t\t\t__count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]);\n" + "-\t\t\tskipped += nr_skipped[zid];\n" + "+\t\t\ttotal_skipped += nr_skipped[zid];\n" + " \t\t}\n" + " \t}\n" + "-\t*nr_scanned = scan;\n" + "+\t*nr_scanned = scan + total_skipped;\n" + " \ttrace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,\n" + "-\t\t\t\t scan, skipped, nr_taken, mode, lru);\n" + "+\t\t\t\t scan, total_skipped, nr_taken, mode, lru);\n" + " \tupdate_lru_sizes(lruvec, lru, nr_zone_taken);\n" + " \treturn nr_taken;\n" + " }\n" + "-- \n" + 2.7.4 -85771555d42a1b590a926464788093b00bb7418d7167edfd3c1cf4fcc491c34e +01668ef05204bd7fe91b401bd5d3c3df0cbfa61a9ce848ca6a2727939516996c
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.