stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Johannes Weiner <hannes@cmpxchg.org>,
	Shakeel Butt <shakeelb@google.com>, Roman Gushchin <guro@fb.com>,
	Michal Hocko <mhocko@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Sasha Levin <sashal@kernel.org>,
	linux-mm@kvack.org
Subject: [PATCH AUTOSEL 4.19 41/81] mm: fix inactive list balancing between NUMA nodes and cgroups
Date: Tue,  7 May 2019 01:35:12 -0400	[thread overview]
Message-ID: <20190507053554.30848-41-sashal@kernel.org> (raw)
In-Reply-To: <20190507053554.30848-1-sashal@kernel.org>

From: Johannes Weiner <hannes@cmpxchg.org>

[ Upstream commit 3b991208b897f52507168374033771a984b947b1 ]

During !CONFIG_CGROUP reclaim, we expand the inactive list size if it's
thrashing on the node that is about to be reclaimed.  But when cgroups
are enabled, we suddenly ignore the node scope and use the cgroup scope
only.  The result is that pressure bleeds between NUMA nodes depending
on whether cgroups are merely compiled into Linux.  This behavioral
difference is unexpected and undesirable.

When the refault adaptivity of the inactive list was first introduced,
there were no statistics at the lruvec level - the intersection of node
and memcg - so it was better than nothing.

But now that we have that infrastructure, use lruvec_page_state() to
make the list balancing decision always NUMA aware.

[hannes@cmpxchg.org: fix bisection hole]
  Link: http://lkml.kernel.org/r/20190417155241.GB23013@cmpxchg.org
Link: http://lkml.kernel.org/r/20190412144438.2645-1-hannes@cmpxchg.org
Fixes: 2a2e48854d70 ("mm: vmscan: fix IO/refault regression in cache workingset transition")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 mm/vmscan.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3830066018c1..ee545d1e9894 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2190,7 +2190,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
  *   10TB     320        32GB
  */
 static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
-				 struct mem_cgroup *memcg,
 				 struct scan_control *sc, bool actual_reclaim)
 {
 	enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
@@ -2211,16 +2210,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
 	inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
 	active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
 
-	if (memcg)
-		refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
-	else
-		refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
 	/*
 	 * When refaults are being observed, it means a new workingset
 	 * is being established. Disable active list protection to get
 	 * rid of the stale workingset quickly.
 	 */
+	refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
 	if (file && actual_reclaim && lruvec->refaults != refaults) {
 		inactive_ratio = 0;
 	} else {
@@ -2241,12 +2236,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
 }
 
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
-				 struct lruvec *lruvec, struct mem_cgroup *memcg,
-				 struct scan_control *sc)
+				 struct lruvec *lruvec, struct scan_control *sc)
 {
 	if (is_active_lru(lru)) {
-		if (inactive_list_is_low(lruvec, is_file_lru(lru),
-					 memcg, sc, true))
+		if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
 			shrink_active_list(nr_to_scan, lruvec, sc, lru);
 		return 0;
 	}
@@ -2346,7 +2339,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 			 * anonymous pages on the LRU in eligible zones.
 			 * Otherwise, the small LRU gets thrashed.
 			 */
-			if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+			if (!inactive_list_is_low(lruvec, false, sc, false) &&
 			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
 					>> sc->priority) {
 				scan_balance = SCAN_ANON;
@@ -2364,7 +2357,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 	 * lruvec even if it has plenty of old anonymous pages unless the
 	 * system is under heavy pressure.
 	 */
-	if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+	if (!inactive_list_is_low(lruvec, true, sc, false) &&
 	    lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
 		scan_balance = SCAN_FILE;
 		goto out;
@@ -2517,7 +2510,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
 				nr[lru] -= nr_to_scan;
 
 				nr_reclaimed += shrink_list(lru, nr_to_scan,
-							    lruvec, memcg, sc);
+							    lruvec, sc);
 			}
 		}
 
@@ -2584,7 +2577,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
 	 * Even if we did not try to evict anon pages at all, we want to
 	 * rebalance the anon lru active/inactive ratio.
 	 */
-	if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+	if (inactive_list_is_low(lruvec, false, sc, true))
 		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
 				   sc, LRU_ACTIVE_ANON);
 }
@@ -2982,12 +2975,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
 		unsigned long refaults;
 		struct lruvec *lruvec;
 
-		if (memcg)
-			refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
-		else
-			refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
 		lruvec = mem_cgroup_lruvec(pgdat, memcg);
+		refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
 		lruvec->refaults = refaults;
 	} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
 }
@@ -3344,7 +3333,7 @@ static void age_active_anon(struct pglist_data *pgdat,
 	do {
 		struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
 
-		if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+		if (inactive_list_is_low(lruvec, false, sc, true))
 			shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
 					   sc, LRU_ACTIVE_ANON);
 
-- 
2.20.1


  parent reply	other threads:[~2019-05-07  5:37 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-07  5:34 [PATCH AUTOSEL 4.19 01/81] iio: adc: xilinx: fix potential use-after-free on remove Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 02/81] iio: adc: xilinx: fix potential use-after-free on probe Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 03/81] iio: adc: xilinx: prevent touching unclocked h/w on remove Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 04/81] acpi/nfit: Always dump _DSM output payload Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 05/81] libnvdimm/namespace: Fix a potential NULL pointer dereference Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 06/81] HID: input: add mapping for Expose/Overview key Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 07/81] HID: input: add mapping for keyboard Brightness Up/Down/Toggle keys Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 08/81] HID: input: add mapping for "Toggle Display" key Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 09/81] libnvdimm/btt: Fix a kmemdup failure check Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 10/81] s390/dasd: Fix capacity calculation for large volumes Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 11/81] mac80211: fix unaligned access in mesh table hash function Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 12/81] mac80211: Increase MAX_MSG_LEN Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 13/81] cfg80211: Handle WMM rules in regulatory domain intersection Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 14/81] mac80211: fix memory accounting with A-MSDU aggregation Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 15/81] nl80211: Add NL80211_FLAG_CLEAR_SKB flag for other NL commands Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 16/81] Input: snvs_pwrkey - initialize necessary driver data before enabling IRQ Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 17/81] libnvdimm/pmem: fix a possible OOB access when read and write pmem Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 18/81] mac80211: Honor SW_CRYPTO_CONTROL for unicast keys in AP VLAN mode Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 19/81] s390/3270: fix lockdep false positive on view->lock Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 20/81] drm/amd/display: extending AUX SW Timeout Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 21/81] clocksource/drivers/npcm: select TIMER_OF Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 22/81] clocksource/drivers/oxnas: Fix OX820 compatible Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 23/81] selftests: fib_tests: Fix 'Command line is not complete' errors Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 24/81] mISDN: Check address length before reading address family Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 25/81] vxge: fix return of a free'd memblock on a failed dma mapping Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 26/81] qede: fix write to free'd pointer error and double free of ptp Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 27/81] afs: Unlock pages for __pagevec_release() Sasha Levin
2019-05-07  5:34 ` [PATCH AUTOSEL 4.19 28/81] drm/amd/display: If one stream full updates, full update all planes Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 29/81] s390/pkey: add one more argument space for debug feature entry Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 30/81] x86/build/lto: Fix truncated .bss with -fdata-sections Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 31/81] x86/reboot, efi: Use EFI reboot for Acer TravelMate X514-51T Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 32/81] x86/mm/tlb: Revert "x86/mm: Align TLB invalidation info" Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 33/81] KVM: x86: Raise #GP when guest vCPU do not support PMU Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 34/81] KVM: fix spectrev1 gadgets Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 35/81] KVM: x86: avoid misreporting level-triggered irqs as edge-triggered in tracing Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 36/81] tools lib traceevent: Fix missing equality check for strcmp Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 37/81] ipmi: ipmi_si_hardcode.c: init si_type array to fix a crash Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 38/81] ocelot: Don't sleep in atomic context (irqs_disabled()) Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 39/81] x86/mm/KASLR: Fix the size of the direct mapping section Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 40/81] scsi: aic7xxx: fix EISA support Sasha Levin
2019-05-07  5:35 ` Sasha Levin [this message]
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 42/81] init: initialize jump labels before command line option parsing Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 43/81] selftests: netfilter: check icmp pkttoobig errors are set as related Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 44/81] ipvs: do not schedule icmp errors from tunnels Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 45/81] netfilter: ctnetlink: don't use conntrack/expect object addresses as id Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 46/81] netfilter: nf_tables: prevent shift wrap in nft_chain_parse_hook() Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 47/81] MIPS: perf: ath79: Fix perfcount IRQ assignment Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 48/81] s390: ctcm: fix ctcm_new_device error return code Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 49/81] drm/sun4i: Set device driver data at bind time for use in unbind Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 50/81] drm/sun4i: Fix component unbinding and component master deletion Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 51/81] selftests/net: correct the return value for run_netsocktests Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 52/81] netfilter: fix nf_l4proto_log_invalid to log invalid packets Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 53/81] gpu: ipu-v3: dp: fix CSC handling Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 54/81] drm/imx: don't skip DP channel disable for background plane Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 55/81] ARM: 8856/1: NOMMU: Fix CCR register faulty initialization when MPU is disabled Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 56/81] spi: Micrel eth switch: declare missing of table Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 57/81] spi: ST ST95HF NFC: " Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 58/81] x86/mm: Fix a crash with kmemleak_scan() Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 59/81] drm/sun4i: Unbind components before releasing DRM and memory Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 60/81] Input: synaptics-rmi4 - fix possible double free Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 61/81] btrfs: Switch memory allocations in async csum calculation path to kvmalloc Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 62/81] RDMA/hns: Bugfix for mapping user db Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 63/81] mm/memory_hotplug.c: drop memory device reference after find_memory_block() Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 64/81] powerpc/smp: Fix NMI IPI timeout Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 65/81] powerpc/smp: Fix NMI IPI xmon timeout Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 66/81] net: dsa: mv88e6xxx: fix few issues in mv88e6390x_port_set_cmode Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 67/81] mm/memory.c: fix modifying of page protection by insert_pfn() Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 68/81] usb: typec: Fix unchecked return value Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 69/81] f2fs: fix to data block override node segment by mistake Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 70/81] netfilter: nf_tables: use-after-free in dynamic operations Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 71/81] netfilter: nf_tables: add missing ->release_ops() in error path of newrule() Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 72/81] net: fec: manage ahb clock in runtime pm Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 73/81] mlxsw: spectrum_switchdev: Add MDB entries in prepare phase Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 74/81] mlxsw: core: Do not use WQ_MEM_RECLAIM for EMAD workqueue Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 75/81] mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw ordered workqueue Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 76/81] mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw workqueue Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 77/81] net/tls: fix the IV leaks Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 78/81] net: strparser: partially revert "strparser: Call skb_unclone conditionally" Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 79/81] NFC: nci: Add some bounds checking in nci_hci_cmd_received() Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 80/81] nfc: nci: Potential off by one in ->pipes[] array Sasha Levin
2019-05-07  5:35 ` [PATCH AUTOSEL 4.19 81/81] x86/kprobes: Avoid kretprobe recursion bug Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190507053554.30848-41-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=shakeelb@google.com \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).