diff for duplicates of <20170113013724.GA23494@bbox> diff --git a/a/1.txt b/N1/1.txt index dc560b9..462ae6f 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -108,3 +108,201 @@ usecase of all lru size in that particular tracepoint. Yes, that's why I said "it's okay if you love your version". However, we can do refactoring to remove "bool trace" and even, it makes code more readable, I believe. + +>From 06eb7201d781155a8dee7e72fbb8423ec8175223 Mon Sep 17 00:00:00 2001 +From: Minchan Kim <minchan@kernel.org> +Date: Fri, 13 Jan 2017 10:13:36 +0900 +Subject: [PATCH] mm: refactoring inactive_list_is_low + +Recently, Michal Hocko added tracepoint into inactive_list_is_low +for catching why VM decided to age the active list to know +active/inacive balancing problem. With that, unfortunately, it +added "bool trace" to inactlive_list_is_low to control some place +should be prohibited tracing. It is not elegant to me so this patch +try to clean it up. + +Normally, most inactive_list_is_low is used for deciding active list +demotion but one site(i.e., get_scan_count) uses for other purpose +which reclaim file LRU forcefully. Sites for deactivation calls it +with shrink_active_list. It means inactive_list_is_low could be +located in shrink_active_list. + +One more thing this patch does is to remove "ratio" in the tracepoint +because we can get it by post processing in script via simple math. + +Signed-off-by: Minchan Kim <minchan@kernel.org> +--- + include/trace/events/vmscan.h | 9 +++----- + mm/vmscan.c | 51 ++++++++++++++++++++++++------------------- + 2 files changed, 31 insertions(+), 29 deletions(-) + +diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h +index 27e8a5c..406ea95 100644 +--- a/include/trace/events/vmscan.h ++++ b/include/trace/events/vmscan.h +@@ -432,9 +432,9 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low, + TP_PROTO(int nid, int reclaim_idx, + unsigned long total_inactive, unsigned long inactive, + unsigned long total_active, unsigned long active, +- unsigned long ratio, int file), ++ int file), + +- TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file), ++ TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, file), + + TP_STRUCT__entry( + __field(int, nid) +@@ -443,7 +443,6 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low, + __field(unsigned long, inactive) + __field(unsigned long, total_active) + __field(unsigned long, active) +- __field(unsigned long, ratio) + __field(int, reclaim_flags) + ), + +@@ -454,16 +453,14 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low, + __entry->inactive = inactive; + __entry->total_active = total_active; + __entry->active = active; +- __entry->ratio = ratio; + __entry->reclaim_flags = trace_shrink_flags(file) & RECLAIM_WB_LRU; + ), + +- TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s", ++ TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld flags=%s", + __entry->nid, + __entry->reclaim_idx, + __entry->total_inactive, __entry->inactive, + __entry->total_active, __entry->active, +- __entry->ratio, + show_reclaim_flags(__entry->reclaim_flags)) + ); + #endif /* _TRACE_VMSCAN_H */ +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 75cdf68..6890c21 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -150,6 +150,7 @@ unsigned long vm_total_pages; + + static LIST_HEAD(shrinker_list); + static DECLARE_RWSEM(shrinker_rwsem); ++static bool inactive_list_is_low(bool file, unsigned long, unsigned long); + + #ifdef CONFIG_MEMCG + static bool global_reclaim(struct scan_control *sc) +@@ -1962,6 +1963,22 @@ static void shrink_active_list(unsigned long nr_to_scan, + isolate_mode_t isolate_mode = 0; + int file = is_file_lru(lru); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); ++ unsigned long inactive, active; ++ enum lru_list inactive_lru = file * LRU_FILE; ++ enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; ++ bool deactivate; ++ ++ inactive = lruvec_lru_size_eligibe_zones(lruvec, file * LRU_FILE, ++ sc->reclaim_idx); ++ active = lruvec_lru_size_eligibe_zones(lruvec, file * LRU_FILE + ++ LRU_ACTIVE, sc->reclaim_idx); ++ deactivate = inactive_list_is_low(file, inactive, active); ++ trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, ++ sc->reclaim_idx, ++ lruvec_lru_size(lruvec, inactive_lru), inactive, ++ lruvec_lru_size(lruvec, active_lru), active, file); ++ if (!deactivate) ++ return; + + lru_add_drain(); + +@@ -2073,13 +2090,10 @@ static void shrink_active_list(unsigned long nr_to_scan, + * 1TB 101 10GB + * 10TB 320 32GB + */ +-static bool inactive_list_is_low(struct lruvec *lruvec, bool file, +- struct scan_control *sc, bool trace) ++static bool inactive_list_is_low(bool file, ++ unsigned long inactive, unsigned long active) + { + unsigned long inactive_ratio; +- unsigned long inactive, active; +- enum lru_list inactive_lru = file * LRU_FILE; +- enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; + unsigned long gb; + + /* +@@ -2089,22 +2103,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, + if (!file && !total_swap_pages) + return false; + +- inactive = lruvec_lru_size_eligibe_zones(lruvec, inactive_lru, sc->reclaim_idx); +- active = lruvec_lru_size_eligibe_zones(lruvec, active_lru, sc->reclaim_idx); +- + gb = (inactive + active) >> (30 - PAGE_SHIFT); + if (gb) + inactive_ratio = int_sqrt(10 * gb); + else + inactive_ratio = 1; + +- if (trace) +- trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id, +- sc->reclaim_idx, +- lruvec_lru_size(lruvec, inactive_lru), inactive, +- lruvec_lru_size(lruvec, active_lru), active, +- inactive_ratio, file); +- + return inactive * inactive_ratio < active; + } + +@@ -2112,8 +2116,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, + struct lruvec *lruvec, struct scan_control *sc) + { + if (is_active_lru(lru)) { +- if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) +- shrink_active_list(nr_to_scan, lruvec, sc, lru); ++ shrink_active_list(nr_to_scan, lruvec, sc, lru); + return 0; + } + +@@ -2153,6 +2156,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, + enum lru_list lru; + bool some_scanned; + int pass; ++ unsigned long inactive, active; + + /* + * If the zone or memcg is small, nr[l] can be 0. This +@@ -2243,7 +2247,11 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, + * lruvec even if it has plenty of old anonymous pages unless the + * system is under heavy pressure. + */ +- if (!inactive_list_is_low(lruvec, true, sc, false) && ++ inactive = lruvec_lru_size_eligibe_zones(lruvec, ++ LRU_FILE, sc->reclaim_idx); ++ active = lruvec_lru_size_eligibe_zones(lruvec, ++ LRU_FILE + LRU_ACTIVE, sc->reclaim_idx); ++ if (!inactive_list_is_low(true, inactive, active) && + lruvec_lru_size_eligibe_zones(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { + scan_balance = SCAN_FILE; + goto out; +@@ -2468,9 +2476,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc + * Even if we did not try to evict anon pages at all, we want to + * rebalance the anon lru active/inactive ratio. + */ +- if (inactive_list_is_low(lruvec, false, sc, true)) +- shrink_active_list(SWAP_CLUSTER_MAX, lruvec, +- sc, LRU_ACTIVE_ANON); ++ shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); + } + + /* Use reclaim/compaction for costly allocs or under memory pressure */ +@@ -3118,8 +3124,7 @@ static void age_active_anon(struct pglist_data *pgdat, + do { + struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); + +- if (inactive_list_is_low(lruvec, false, sc, true)) +- shrink_active_list(SWAP_CLUSTER_MAX, lruvec, ++ shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); + + memcg = mem_cgroup_iter(NULL, memcg, NULL); +-- +2.7.4 diff --git a/a/content_digest b/N1/content_digest index ccfd830..d4dc791 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -126,6 +126,204 @@ "\n" "Yes, that's why I said \"it's okay if you love your version\". However,\n" "we can do refactoring to remove \"bool trace\" and even, it makes code\n" - more readable, I believe. + "more readable, I believe.\n" + "\n" + ">From 06eb7201d781155a8dee7e72fbb8423ec8175223 Mon Sep 17 00:00:00 2001\n" + "From: Minchan Kim <minchan@kernel.org>\n" + "Date: Fri, 13 Jan 2017 10:13:36 +0900\n" + "Subject: [PATCH] mm: refactoring inactive_list_is_low\n" + "\n" + "Recently, Michal Hocko added tracepoint into inactive_list_is_low\n" + "for catching why VM decided to age the active list to know\n" + "active/inacive balancing problem. With that, unfortunately, it\n" + "added \"bool trace\" to inactlive_list_is_low to control some place\n" + "should be prohibited tracing. It is not elegant to me so this patch\n" + "try to clean it up.\n" + "\n" + "Normally, most inactive_list_is_low is used for deciding active list\n" + "demotion but one site(i.e., get_scan_count) uses for other purpose\n" + "which reclaim file LRU forcefully. Sites for deactivation calls it\n" + "with shrink_active_list. It means inactive_list_is_low could be\n" + "located in shrink_active_list.\n" + "\n" + "One more thing this patch does is to remove \"ratio\" in the tracepoint\n" + "because we can get it by post processing in script via simple math.\n" + "\n" + "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" + "---\n" + " include/trace/events/vmscan.h | 9 +++-----\n" + " mm/vmscan.c | 51 ++++++++++++++++++++++++-------------------\n" + " 2 files changed, 31 insertions(+), 29 deletions(-)\n" + "\n" + "diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h\n" + "index 27e8a5c..406ea95 100644\n" + "--- a/include/trace/events/vmscan.h\n" + "+++ b/include/trace/events/vmscan.h\n" + "@@ -432,9 +432,9 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low,\n" + " \tTP_PROTO(int nid, int reclaim_idx,\n" + " \t\tunsigned long total_inactive, unsigned long inactive,\n" + " \t\tunsigned long total_active, unsigned long active,\n" + "-\t\tunsigned long ratio, int file),\n" + "+\t\tint file),\n" + " \n" + "-\tTP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file),\n" + "+\tTP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, file),\n" + " \n" + " \tTP_STRUCT__entry(\n" + " \t\t__field(int, nid)\n" + "@@ -443,7 +443,6 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low,\n" + " \t\t__field(unsigned long, inactive)\n" + " \t\t__field(unsigned long, total_active)\n" + " \t\t__field(unsigned long, active)\n" + "-\t\t__field(unsigned long, ratio)\n" + " \t\t__field(int, reclaim_flags)\n" + " \t),\n" + " \n" + "@@ -454,16 +453,14 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low,\n" + " \t\t__entry->inactive = inactive;\n" + " \t\t__entry->total_active = total_active;\n" + " \t\t__entry->active = active;\n" + "-\t\t__entry->ratio = ratio;\n" + " \t\t__entry->reclaim_flags = trace_shrink_flags(file) & RECLAIM_WB_LRU;\n" + " \t),\n" + " \n" + "-\tTP_printk(\"nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s\",\n" + "+\tTP_printk(\"nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld flags=%s\",\n" + " \t\t__entry->nid,\n" + " \t\t__entry->reclaim_idx,\n" + " \t\t__entry->total_inactive, __entry->inactive,\n" + " \t\t__entry->total_active, __entry->active,\n" + "-\t\t__entry->ratio,\n" + " \t\tshow_reclaim_flags(__entry->reclaim_flags))\n" + " );\n" + " #endif /* _TRACE_VMSCAN_H */\n" + "diff --git a/mm/vmscan.c b/mm/vmscan.c\n" + "index 75cdf68..6890c21 100644\n" + "--- a/mm/vmscan.c\n" + "+++ b/mm/vmscan.c\n" + "@@ -150,6 +150,7 @@ unsigned long vm_total_pages;\n" + " \n" + " static LIST_HEAD(shrinker_list);\n" + " static DECLARE_RWSEM(shrinker_rwsem);\n" + "+static bool inactive_list_is_low(bool file, unsigned long, unsigned long);\n" + " \n" + " #ifdef CONFIG_MEMCG\n" + " static bool global_reclaim(struct scan_control *sc)\n" + "@@ -1962,6 +1963,22 @@ static void shrink_active_list(unsigned long nr_to_scan,\n" + " \tisolate_mode_t isolate_mode = 0;\n" + " \tint file = is_file_lru(lru);\n" + " \tstruct pglist_data *pgdat = lruvec_pgdat(lruvec);\n" + "+\tunsigned long inactive, active;\n" + "+\tenum lru_list inactive_lru = file * LRU_FILE;\n" + "+\tenum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;\n" + "+\tbool deactivate;\n" + "+\n" + "+\tinactive = lruvec_lru_size_eligibe_zones(lruvec, file * LRU_FILE,\n" + "+\t\t\t\t\tsc->reclaim_idx);\n" + "+\tactive = lruvec_lru_size_eligibe_zones(lruvec, file * LRU_FILE +\n" + "+\t\t\t\t\tLRU_ACTIVE, sc->reclaim_idx);\n" + "+\tdeactivate = inactive_list_is_low(file, inactive, active);\n" + "+\ttrace_mm_vmscan_inactive_list_is_low(pgdat->node_id,\n" + "+\t\t\tsc->reclaim_idx,\n" + "+\t\t\tlruvec_lru_size(lruvec, inactive_lru), inactive,\n" + "+\t\t\tlruvec_lru_size(lruvec, active_lru), active, file);\n" + "+\tif (!deactivate)\n" + "+\t\treturn;\n" + " \n" + " \tlru_add_drain();\n" + " \n" + "@@ -2073,13 +2090,10 @@ static void shrink_active_list(unsigned long nr_to_scan,\n" + " * 1TB 101 10GB\n" + " * 10TB 320 32GB\n" + " */\n" + "-static bool inactive_list_is_low(struct lruvec *lruvec, bool file,\n" + "-\t\t\t\t\t\tstruct scan_control *sc, bool trace)\n" + "+static bool inactive_list_is_low(bool file,\n" + "+\t\t\tunsigned long inactive, unsigned long active)\n" + " {\n" + " \tunsigned long inactive_ratio;\n" + "-\tunsigned long inactive, active;\n" + "-\tenum lru_list inactive_lru = file * LRU_FILE;\n" + "-\tenum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;\n" + " \tunsigned long gb;\n" + " \n" + " \t/*\n" + "@@ -2089,22 +2103,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,\n" + " \tif (!file && !total_swap_pages)\n" + " \t\treturn false;\n" + " \n" + "-\tinactive = lruvec_lru_size_eligibe_zones(lruvec, inactive_lru, sc->reclaim_idx);\n" + "-\tactive = lruvec_lru_size_eligibe_zones(lruvec, active_lru, sc->reclaim_idx);\n" + "-\n" + " \tgb = (inactive + active) >> (30 - PAGE_SHIFT);\n" + " \tif (gb)\n" + " \t\tinactive_ratio = int_sqrt(10 * gb);\n" + " \telse\n" + " \t\tinactive_ratio = 1;\n" + " \n" + "-\tif (trace)\n" + "-\t\ttrace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id,\n" + "-\t\t\t\tsc->reclaim_idx,\n" + "-\t\t\t\tlruvec_lru_size(lruvec, inactive_lru), inactive,\n" + "-\t\t\t\tlruvec_lru_size(lruvec, active_lru), active,\n" + "-\t\t\t\tinactive_ratio, file);\n" + "-\n" + " \treturn inactive * inactive_ratio < active;\n" + " }\n" + " \n" + "@@ -2112,8 +2116,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,\n" + " \t\t\t\t struct lruvec *lruvec, struct scan_control *sc)\n" + " {\n" + " \tif (is_active_lru(lru)) {\n" + "-\t\tif (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))\n" + "-\t\t\tshrink_active_list(nr_to_scan, lruvec, sc, lru);\n" + "+\t\tshrink_active_list(nr_to_scan, lruvec, sc, lru);\n" + " \t\treturn 0;\n" + " \t}\n" + " \n" + "@@ -2153,6 +2156,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,\n" + " \tenum lru_list lru;\n" + " \tbool some_scanned;\n" + " \tint pass;\n" + "+\tunsigned long inactive, active;\n" + " \n" + " \t/*\n" + " \t * If the zone or memcg is small, nr[l] can be 0. This\n" + "@@ -2243,7 +2247,11 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,\n" + " \t * lruvec even if it has plenty of old anonymous pages unless the\n" + " \t * system is under heavy pressure.\n" + " \t */\n" + "-\tif (!inactive_list_is_low(lruvec, true, sc, false) &&\n" + "+\tinactive = lruvec_lru_size_eligibe_zones(lruvec,\n" + "+\t\t\t\tLRU_FILE, sc->reclaim_idx);\n" + "+\tactive = lruvec_lru_size_eligibe_zones(lruvec,\n" + "+\t\t\t\tLRU_FILE + LRU_ACTIVE, sc->reclaim_idx);\n" + "+\tif (!inactive_list_is_low(true, inactive, active) &&\n" + " \t lruvec_lru_size_eligibe_zones(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {\n" + " \t\tscan_balance = SCAN_FILE;\n" + " \t\tgoto out;\n" + "@@ -2468,9 +2476,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc\n" + " \t * Even if we did not try to evict anon pages at all, we want to\n" + " \t * rebalance the anon lru active/inactive ratio.\n" + " \t */\n" + "-\tif (inactive_list_is_low(lruvec, false, sc, true))\n" + "-\t\tshrink_active_list(SWAP_CLUSTER_MAX, lruvec,\n" + "-\t\t\t\t sc, LRU_ACTIVE_ANON);\n" + "+\tshrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON);\n" + " }\n" + " \n" + " /* Use reclaim/compaction for costly allocs or under memory pressure */\n" + "@@ -3118,8 +3124,7 @@ static void age_active_anon(struct pglist_data *pgdat,\n" + " \tdo {\n" + " \t\tstruct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);\n" + " \n" + "-\t\tif (inactive_list_is_low(lruvec, false, sc, true))\n" + "-\t\t\tshrink_active_list(SWAP_CLUSTER_MAX, lruvec,\n" + "+\t\tshrink_active_list(SWAP_CLUSTER_MAX, lruvec,\n" + " \t\t\t\t\t sc, LRU_ACTIVE_ANON);\n" + " \n" + " \t\tmemcg = mem_cgroup_iter(NULL, memcg, NULL);\n" + "-- \n" + 2.7.4 -6fa4c2ceaa717638878af0313871bb3dfbfae7a30ee08568ae7f2e467552ead6 +8874818b0631bfbfacc053468eecee43eea48f404e2d11a13b27df8ac10d11b3
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.