All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
  2021-07-01 21:03 [patch 0/5] optionally sync per-CPU vmstats counter " Marcelo Tosatti
@ 2021-07-01 21:03 ` Marcelo Tosatti
  2021-07-01 23:11     ` kernel test robot
  2021-07-02  6:50     ` kernel test robot
  0 siblings, 2 replies; 11+ messages in thread
From: Marcelo Tosatti @ 2021-07-01 21:03 UTC (permalink / raw)
  To: linux-kernel
  Cc: Christoph Lameter, Thomas Gleixner, Frederic Weisbecker,
	Juri Lelli, Nitesh Lal, Marcelo Tosatti

The logic to disable vmstat worker thread, when entering
nohz full, does not cover all scenarios. For example, it is possible
for the following to happen:

1) enter nohz_full, which calls refresh_cpu_vm_stats, syncing the stats.
2) app runs mlock, which increases counters for mlock'ed pages.
3) start -RT loop

Since refresh_cpu_vm_stats from nohz_full logic can happen _before_
the mlock, vmstat shepherd can restart vmstat worker thread on 
the CPU in question.

To fix this, optionally sync the vmstat counters when returning
from userspace, controllable by a new "vmstat_sync" isolcpus 
flags (default off).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: linux-2.6-vmstat-update/kernel/sched/isolation.c
===================================================================
--- linux-2.6-vmstat-update.orig/kernel/sched/isolation.c
+++ linux-2.6-vmstat-update/kernel/sched/isolation.c
@@ -8,6 +8,7 @@
  *
  */
 #include "sched.h"
+#include <linux/vmstat.h>
 
 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
 EXPORT_SYMBOL_GPL(housekeeping_overridden);
@@ -129,6 +130,9 @@ static int __init housekeeping_setup(cha
 		}
 	}
 
+	if (flags & HK_FLAG_VMSTAT_SYNC)
+		static_branch_enable(&vmstat_sync_enabled);
+
 	housekeeping_flags |= flags;
 
 	free_bootmem_cpumask_var(non_housekeeping_mask);
Index: linux-2.6-vmstat-update/include/linux/vmstat.h
===================================================================
--- linux-2.6-vmstat-update.orig/include/linux/vmstat.h
+++ linux-2.6-vmstat-update/include/linux/vmstat.h
@@ -21,6 +21,15 @@ int sysctl_vm_numa_stat_handler(struct c
 		void *buffer, size_t *length, loff_t *ppos);
 #endif
 
+DECLARE_STATIC_KEY_FALSE(vmstat_sync_enabled);
+
+extern void __sync_vmstat(void);
+static inline void sync_vmstat(void)
+{
+	if (static_branch_unlikely(&vmstat_sync_enabled))
+		__sync_vmstat();
+}
+
 struct reclaim_stat {
 	unsigned nr_dirty;
 	unsigned nr_unqueued_dirty;
Index: linux-2.6-vmstat-update/mm/vmstat.c
===================================================================
--- linux-2.6-vmstat-update.orig/mm/vmstat.c
+++ linux-2.6-vmstat-update/mm/vmstat.c
@@ -28,6 +28,7 @@
 #include <linux/mm_inline.h>
 #include <linux/page_ext.h>
 #include <linux/page_owner.h>
+#include <linux/sched/isolation.h>
 
 #include "internal.h"
 
@@ -308,6 +309,24 @@ void set_pgdat_percpu_threshold(pg_data_
 	}
 }
 
+DEFINE_STATIC_KEY_FALSE(vmstat_sync_enabled);
+static DEFINE_PER_CPU_ALIGNED(bool, vmstat_dirty);
+
+static inline void mark_vmstat_dirty(void)
+{
+	int cpu;
+
+	if (!static_branch_unlikely(&vmstat_sync_enabled))
+		return;
+
+	cpu = smp_processor_id();
+
+	if (housekeeping_cpu(cpu, HK_FLAG_VMSTAT_SYNC))
+		return;
+
+	per_cpu(vmstat_dirty, smp_processor_id()) = true;
+}
+
 /*
  * For use when we know that interrupts are disabled,
  * or when we know that preemption is disabled and that
@@ -330,6 +349,7 @@ void __mod_zone_page_state(struct zone *
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+	mark_vmstat_dirty();
 }
 EXPORT_SYMBOL(__mod_zone_page_state);
 
@@ -361,6 +381,7 @@ void __mod_node_page_state(struct pglist
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+	mark_vmstat_dirty();
 }
 EXPORT_SYMBOL(__mod_node_page_state);
 
@@ -401,6 +422,7 @@ void __inc_zone_state(struct zone *zone,
 		zone_page_state_add(v + overstep, zone, item);
 		__this_cpu_write(*p, -overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -419,6 +441,7 @@ void __inc_node_state(struct pglist_data
 		node_page_state_add(v + overstep, pgdat, item);
 		__this_cpu_write(*p, -overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -447,6 +470,7 @@ void __dec_zone_state(struct zone *zone,
 		zone_page_state_add(v - overstep, zone, item);
 		__this_cpu_write(*p, overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -465,6 +489,7 @@ void __dec_node_state(struct pglist_data
 		node_page_state_add(v - overstep, pgdat, item);
 		__this_cpu_write(*p, overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -528,6 +553,7 @@ static inline void mod_zone_state(struct
 
 	if (z)
 		zone_page_state_add(z, zone, item);
+	mark_vmstat_dirty();
 }
 
 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
@@ -596,6 +622,7 @@ static inline void mod_node_state(struct
 
 	if (z)
 		node_page_state_add(z, pgdat, item);
+	mark_vmstat_dirty();
 }
 
 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
@@ -2006,6 +2033,32 @@ static void vmstat_shepherd(struct work_
 		round_jiffies_relative(sysctl_stat_interval));
 }
 
+void __sync_vmstat(void)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	if (per_cpu(vmstat_dirty, cpu) == false) {
+		put_cpu();
+		return;
+	}
+
+	refresh_cpu_vm_stats(false);
+	per_cpu(vmstat_dirty, cpu) = false;
+	put_cpu();
+
+	/*
+	 * If task is migrated to another CPU between put_cpu
+	 * and cancel_delayed_work_sync, the code below might
+	 * cancel vmstat_update work for a different cpu
+	 * (than the one from which the vmstats were flushed).
+	 *
+	 * However, vmstat shepherd will re-enable it later,
+	 * so its harmless.
+	 */
+	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+}
+
 static void __init start_shepherd_timer(void)
 {
 	int cpu;
Index: linux-2.6-vmstat-update/kernel/entry/common.c
===================================================================
--- linux-2.6-vmstat-update.orig/kernel/entry/common.c
+++ linux-2.6-vmstat-update/kernel/entry/common.c
@@ -6,6 +6,7 @@
 #include <linux/livepatch.h>
 #include <linux/audit.h>
 #include <linux/tick.h>
+#include <linux/vmstat.h>
 
 #include "common.h"
 
@@ -290,6 +291,7 @@ static void syscall_exit_to_user_mode_pr
  */
 static void isolation_exit_to_user_mode_prepare(void)
 {
+	sync_vmstat();
 }
 
 static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
  2021-07-01 21:03 ` [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters " Marcelo Tosatti
@ 2021-07-01 23:11     ` kernel test robot
  2021-07-02  6:50     ` kernel test robot
  1 sibling, 0 replies; 11+ messages in thread
From: kernel test robot @ 2021-07-01 23:11 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2034 bytes --]

Hi Marcelo,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master linus/master v5.13 next-20210701]
[cannot apply to hnaz-linux-mm/master tip/core/entry]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 031e3bd8986fffe31e1ddbf5264cccfe30c9abd7
config: i386-tinyconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/b973a70c0670675073265d2cbee70a36bda3273e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
        git checkout b973a70c0670675073265d2cbee70a36bda3273e
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   ld: kernel/entry/common.o: in function `syscall_exit_to_user_mode_work':
>> common.c:(.text+0x1d8): undefined reference to `vmstat_sync_enabled'
>> ld: common.c:(.text+0x1e1): undefined reference to `__sync_vmstat'
   ld: kernel/entry/common.o: in function `syscall_exit_to_user_mode':
>> common.c:(.noinstr.text+0x71): undefined reference to `vmstat_sync_enabled'
>> ld: common.c:(.noinstr.text+0x7a): undefined reference to `__sync_vmstat'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 7417 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
@ 2021-07-01 23:11     ` kernel test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2021-07-01 23:11 UTC (permalink / raw)
  To: Marcelo Tosatti, linux-kernel
  Cc: kbuild-all, Christoph Lameter, Thomas Gleixner,
	Frederic Weisbecker, Juri Lelli, Nitesh Lal, Marcelo Tosatti

[-- Attachment #1: Type: text/plain, Size: 1993 bytes --]

Hi Marcelo,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master linus/master v5.13 next-20210701]
[cannot apply to hnaz-linux-mm/master tip/core/entry]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 031e3bd8986fffe31e1ddbf5264cccfe30c9abd7
config: i386-tinyconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/b973a70c0670675073265d2cbee70a36bda3273e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
        git checkout b973a70c0670675073265d2cbee70a36bda3273e
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   ld: kernel/entry/common.o: in function `syscall_exit_to_user_mode_work':
>> common.c:(.text+0x1d8): undefined reference to `vmstat_sync_enabled'
>> ld: common.c:(.text+0x1e1): undefined reference to `__sync_vmstat'
   ld: kernel/entry/common.o: in function `syscall_exit_to_user_mode':
>> common.c:(.noinstr.text+0x71): undefined reference to `vmstat_sync_enabled'
>> ld: common.c:(.noinstr.text+0x7a): undefined reference to `__sync_vmstat'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 7417 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
  2021-07-01 21:03 ` [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters " Marcelo Tosatti
@ 2021-07-02  6:50     ` kernel test robot
  2021-07-02  6:50     ` kernel test robot
  1 sibling, 0 replies; 11+ messages in thread
From: kernel test robot @ 2021-07-02  6:50 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2734 bytes --]

Hi Marcelo,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master linus/master v5.13 next-20210701]
[cannot apply to hnaz-linux-mm/master tip/core/entry]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 031e3bd8986fffe31e1ddbf5264cccfe30c9abd7
config: arc-randconfig-r024-20210630 (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/b973a70c0670675073265d2cbee70a36bda3273e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
        git checkout b973a70c0670675073265d2cbee70a36bda3273e
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross O=build_dir ARCH=arc SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arceb-elf-ld: kernel/sched/isolation.o: in function `housekeeping_setup':
   isolation.c:(.init.text+0xa0): undefined reference to `vmstat_sync_enabled'
>> arceb-elf-ld: isolation.c:(.init.text+0xa0): undefined reference to `vmstat_sync_enabled'
   arceb-elf-ld: lib/stackdepot.o: in function `filter_irq_stacks':
   (.text+0x5a): undefined reference to `__irqentry_text_start'
   arceb-elf-ld: (.text+0x5a): undefined reference to `__irqentry_text_start'
   arceb-elf-ld: (.text+0x62): undefined reference to `__irqentry_text_end'
   arceb-elf-ld: (.text+0x62): undefined reference to `__irqentry_text_end'
   arceb-elf-ld: (.text+0x6a): undefined reference to `__softirqentry_text_start'
   arceb-elf-ld: (.text+0x6a): undefined reference to `__softirqentry_text_start'
   arceb-elf-ld: (.text+0x72): undefined reference to `__softirqentry_text_end'
   arceb-elf-ld: (.text+0x72): undefined reference to `__softirqentry_text_end'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 32501 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
@ 2021-07-02  6:50     ` kernel test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2021-07-02  6:50 UTC (permalink / raw)
  To: Marcelo Tosatti, linux-kernel
  Cc: kbuild-all, Christoph Lameter, Thomas Gleixner,
	Frederic Weisbecker, Juri Lelli, Nitesh Lal, Marcelo Tosatti

[-- Attachment #1: Type: text/plain, Size: 2685 bytes --]

Hi Marcelo,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master linus/master v5.13 next-20210701]
[cannot apply to hnaz-linux-mm/master tip/core/entry]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 031e3bd8986fffe31e1ddbf5264cccfe30c9abd7
config: arc-randconfig-r024-20210630 (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/b973a70c0670675073265d2cbee70a36bda3273e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
        git checkout b973a70c0670675073265d2cbee70a36bda3273e
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross O=build_dir ARCH=arc SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arceb-elf-ld: kernel/sched/isolation.o: in function `housekeeping_setup':
   isolation.c:(.init.text+0xa0): undefined reference to `vmstat_sync_enabled'
>> arceb-elf-ld: isolation.c:(.init.text+0xa0): undefined reference to `vmstat_sync_enabled'
   arceb-elf-ld: lib/stackdepot.o: in function `filter_irq_stacks':
   (.text+0x5a): undefined reference to `__irqentry_text_start'
   arceb-elf-ld: (.text+0x5a): undefined reference to `__irqentry_text_start'
   arceb-elf-ld: (.text+0x62): undefined reference to `__irqentry_text_end'
   arceb-elf-ld: (.text+0x62): undefined reference to `__irqentry_text_end'
   arceb-elf-ld: (.text+0x6a): undefined reference to `__softirqentry_text_start'
   arceb-elf-ld: (.text+0x6a): undefined reference to `__softirqentry_text_start'
   arceb-elf-ld: (.text+0x72): undefined reference to `__softirqentry_text_end'
   arceb-elf-ld: (.text+0x72): undefined reference to `__softirqentry_text_end'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 32501 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
@ 2021-07-03  4:54 kernel test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2021-07-03  4:54 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 3220 bytes --]

CC: kbuild-all(a)lists.01.org
In-Reply-To: <20210701210458.350881923@fuller.cnet>
References: <20210701210458.350881923@fuller.cnet>
TO: Marcelo Tosatti <mtosatti@redhat.com>
TO: linux-kernel(a)vger.kernel.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Frederic Weisbecker <frederic@kernel.org>
CC: Juri Lelli <juri.lelli@redhat.com>
CC: Nitesh Lal <nilal@redhat.com>
CC: Marcelo Tosatti <mtosatti@redhat.com>

Hi Marcelo,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on tip/sched/core]
[also build test WARNING on tip/master linus/master v5.13 next-20210701]
[cannot apply to hnaz-linux-mm/master tip/core/entry]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 031e3bd8986fffe31e1ddbf5264cccfe30c9abd7
:::::: branch date: 32 hours ago
:::::: commit date: 32 hours ago
config: x86_64-randconfig-b001-20210630 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 9eb613b2de3163686b1a4bd1160f15ac56a4b083)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # apt-get install iwyu # include-what-you-use
        # https://github.com/0day-ci/linux/commit/b973a70c0670675073265d2cbee70a36bda3273e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Marcelo-Tosatti/optionally-sync-per-CPU-vmstats-counter-on-return-to-userspace/20210702-050826
        git checkout b973a70c0670675073265d2cbee70a36bda3273e
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross C=1 CHECK=iwyu O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


iwyu warnings: (new ones prefixed by >>)
   mm/vmstat.c:18:1: iwyu: warning: superfluous #include <linux/cpu.h>
   mm/vmstat.c:13:1: iwyu: warning: superfluous #include <linux/fs.h>
   mm/vmstat.c:28:1: iwyu: warning: superfluous #include <linux/mm_inline.h>
   mm/vmstat.c:29:1: iwyu: warning: superfluous #include <linux/page_ext.h>
   mm/vmstat.c:30:1: iwyu: warning: superfluous #include <linux/page_owner.h>
>> mm/vmstat.c:31:1: iwyu: warning: superfluous #include <linux/sched/isolation.h>

vim +31 mm/vmstat.c

b973a70c067067 Marcelo Tosatti 2021-07-01 @31  #include <linux/sched/isolation.h>
6e543d5780e36f Lisa Du         2013-09-11  32  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 41897 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
  2021-07-09 17:37 [patch 0/5] optionally perform deferred actions " Marcelo Tosatti
@ 2021-07-09 17:37 ` Marcelo Tosatti
  2021-07-12  9:05   ` Christoph Lameter
  0 siblings, 1 reply; 11+ messages in thread
From: Marcelo Tosatti @ 2021-07-09 17:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: Christoph Lameter, Thomas Gleixner, Frederic Weisbecker,
	Juri Lelli, Nitesh Lal, Peter Zijlstra, Nicolas Saenz,
	Marcelo Tosatti

The logic to disable vmstat worker thread, when entering
nohz full, does not cover all scenarios. For example, it is possible
for the following to happen:

1) enter nohz_full, which calls refresh_cpu_vm_stats, syncing the stats.
2) app runs mlock, which increases counters for mlock'ed pages.
3) start -RT loop

Since refresh_cpu_vm_stats from nohz_full logic can happen _before_
the mlock, vmstat shepherd can restart vmstat worker thread on 
the CPU in question.

To fix this, optionally sync the vmstat counters when returning
from userspace, controllable by a new "quiesce_on_exit_to_usermode" isolcpus 
flags (default off).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: linux-2.6-vmstat-update/kernel/sched/isolation.c
===================================================================
--- linux-2.6-vmstat-update.orig/kernel/sched/isolation.c
+++ linux-2.6-vmstat-update/kernel/sched/isolation.c
@@ -8,6 +8,7 @@
  *
  */
 #include "sched.h"
+#include <linux/vmstat.h>
 
 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
 EXPORT_SYMBOL_GPL(housekeeping_overridden);
@@ -129,6 +130,11 @@ static int __init housekeeping_setup(cha
 		}
 	}
 
+#ifdef CONFIG_SMP
+	if (flags & HK_FLAG_QUIESCE_URET)
+		static_branch_enable(&vmstat_sync_enabled);
+#endif
+
 	housekeeping_flags |= flags;
 
 	free_bootmem_cpumask_var(non_housekeeping_mask);
Index: linux-2.6-vmstat-update/include/linux/vmstat.h
===================================================================
--- linux-2.6-vmstat-update.orig/include/linux/vmstat.h
+++ linux-2.6-vmstat-update/include/linux/vmstat.h
@@ -21,6 +21,23 @@ int sysctl_vm_numa_stat_handler(struct c
 		void *buffer, size_t *length, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_SMP
+DECLARE_STATIC_KEY_FALSE(vmstat_sync_enabled);
+
+extern void __sync_vmstat(void);
+static inline void sync_vmstat(void)
+{
+	if (static_branch_unlikely(&vmstat_sync_enabled))
+		__sync_vmstat();
+}
+#else
+
+static inline void sync_vmstat(void)
+{
+}
+
+#endif
+
 struct reclaim_stat {
 	unsigned nr_dirty;
 	unsigned nr_unqueued_dirty;
Index: linux-2.6-vmstat-update/mm/vmstat.c
===================================================================
--- linux-2.6-vmstat-update.orig/mm/vmstat.c
+++ linux-2.6-vmstat-update/mm/vmstat.c
@@ -28,6 +28,7 @@
 #include <linux/mm_inline.h>
 #include <linux/page_ext.h>
 #include <linux/page_owner.h>
+#include <linux/sched/isolation.h>
 
 #include "internal.h"
 
@@ -308,6 +309,24 @@ void set_pgdat_percpu_threshold(pg_data_
 	}
 }
 
+DEFINE_STATIC_KEY_FALSE(vmstat_sync_enabled);
+static DEFINE_PER_CPU_ALIGNED(bool, vmstat_dirty);
+
+static inline void mark_vmstat_dirty(void)
+{
+	int cpu;
+
+	if (!static_branch_unlikely(&vmstat_sync_enabled))
+		return;
+
+	cpu = smp_processor_id();
+
+	if (housekeeping_cpu(cpu, HK_FLAG_QUIESCE_URET))
+		return;
+
+	per_cpu(vmstat_dirty, smp_processor_id()) = true;
+}
+
 /*
  * For use when we know that interrupts are disabled,
  * or when we know that preemption is disabled and that
@@ -330,6 +349,7 @@ void __mod_zone_page_state(struct zone *
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+	mark_vmstat_dirty();
 }
 EXPORT_SYMBOL(__mod_zone_page_state);
 
@@ -361,6 +381,7 @@ void __mod_node_page_state(struct pglist
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+	mark_vmstat_dirty();
 }
 EXPORT_SYMBOL(__mod_node_page_state);
 
@@ -401,6 +422,7 @@ void __inc_zone_state(struct zone *zone,
 		zone_page_state_add(v + overstep, zone, item);
 		__this_cpu_write(*p, -overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -419,6 +441,7 @@ void __inc_node_state(struct pglist_data
 		node_page_state_add(v + overstep, pgdat, item);
 		__this_cpu_write(*p, -overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -447,6 +470,7 @@ void __dec_zone_state(struct zone *zone,
 		zone_page_state_add(v - overstep, zone, item);
 		__this_cpu_write(*p, overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -465,6 +489,7 @@ void __dec_node_state(struct pglist_data
 		node_page_state_add(v - overstep, pgdat, item);
 		__this_cpu_write(*p, overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -528,6 +553,7 @@ static inline void mod_zone_state(struct
 
 	if (z)
 		zone_page_state_add(z, zone, item);
+	mark_vmstat_dirty();
 }
 
 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
@@ -596,6 +622,7 @@ static inline void mod_node_state(struct
 
 	if (z)
 		node_page_state_add(z, pgdat, item);
+	mark_vmstat_dirty();
 }
 
 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
@@ -2006,6 +2033,32 @@ static void vmstat_shepherd(struct work_
 		round_jiffies_relative(sysctl_stat_interval));
 }
 
+void __sync_vmstat(void)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	if (per_cpu(vmstat_dirty, cpu) == false) {
+		put_cpu();
+		return;
+	}
+
+	refresh_cpu_vm_stats(false);
+	per_cpu(vmstat_dirty, cpu) = false;
+	put_cpu();
+
+	/*
+	 * If task is migrated to another CPU between put_cpu
+	 * and cancel_delayed_work_sync, the code below might
+	 * cancel vmstat_update work for a different cpu
+	 * (than the one from which the vmstats were flushed).
+	 *
+	 * However, vmstat shepherd will re-enable it later,
+	 * so its harmless.
+	 */
+	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+}
+
 static void __init start_shepherd_timer(void)
 {
 	int cpu;
Index: linux-2.6-vmstat-update/kernel/entry/common.c
===================================================================
--- linux-2.6-vmstat-update.orig/kernel/entry/common.c
+++ linux-2.6-vmstat-update/kernel/entry/common.c
@@ -6,6 +6,7 @@
 #include <linux/livepatch.h>
 #include <linux/audit.h>
 #include <linux/tick.h>
+#include <linux/vmstat.h>
 
 #include "common.h"
 
@@ -290,6 +291,7 @@ static void syscall_exit_to_user_mode_pr
  */
 static void isolation_exit_to_user_mode_prepare(void)
 {
+	sync_vmstat();
 }
 
 static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
  2021-07-09 17:37 ` [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters " Marcelo Tosatti
@ 2021-07-12  9:05   ` Christoph Lameter
  2021-07-12 10:30     ` Marcelo Tosatti
  2021-07-13 19:30     ` Marcelo Tosatti
  0 siblings, 2 replies; 11+ messages in thread
From: Christoph Lameter @ 2021-07-12  9:05 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: linux-kernel, Thomas Gleixner, Frederic Weisbecker, Juri Lelli,
	Nitesh Lal, Peter Zijlstra, Nicolas Saenz

On Fri, 9 Jul 2021, Marcelo Tosatti wrote:

> +
> +	if (!static_branch_unlikely(&vmstat_sync_enabled))
> +		return;
> +
> +	cpu = smp_processor_id();
> +
> +	if (housekeeping_cpu(cpu, HK_FLAG_QUIESCE_URET))
> +		return;
> +
> +	per_cpu(vmstat_dirty, smp_processor_id()) = true;
> +}

And you are going to insert this into all the performance critical VM
statistics handling. Inline?

And why do you need to do such things as to determine the processor? At
mininum do this using this cpu operations like the vmstat functions
currently do. And, lucky us, now we also have
more issues why we should disable preemption etc etc while handling vm
counters.



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
  2021-07-12  9:05   ` Christoph Lameter
@ 2021-07-12 10:30     ` Marcelo Tosatti
  2021-07-13 19:30     ` Marcelo Tosatti
  1 sibling, 0 replies; 11+ messages in thread
From: Marcelo Tosatti @ 2021-07-12 10:30 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: linux-kernel, Thomas Gleixner, Frederic Weisbecker, Juri Lelli,
	Nitesh Lal, Peter Zijlstra, Nicolas Saenz

On Mon, Jul 12, 2021 at 11:05:58AM +0200, Christoph Lameter wrote:
> On Fri, 9 Jul 2021, Marcelo Tosatti wrote:
> 
> > +
> > +	if (!static_branch_unlikely(&vmstat_sync_enabled))
> > +		return;
> > +
> > +	cpu = smp_processor_id();
> > +
> > +	if (housekeeping_cpu(cpu, HK_FLAG_QUIESCE_URET))
> > +		return;
> > +
> > +	per_cpu(vmstat_dirty, smp_processor_id()) = true;
> > +}
> 
> And you are going to insert this into all the performance critical VM
> statistics handling. Inline?

Yes, this is what the patch below is supposed to do (maybe it missed
some statistics?).

The alternative would be some equivalent of need_update on return to
userspace (for all system call returns) (when the HK_FLAG_QUIESCE_URET 
flag is enabled).

> And why do you need to do such things as to determine the processor? At
> mininum do this using this cpu operations like the vmstat functions
> currently do.

OK, will do that and resend.

> And, lucky us, now we also have
> more issues why we should disable preemption etc etc while handling vm
> counters.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
  2021-07-12  9:05   ` Christoph Lameter
  2021-07-12 10:30     ` Marcelo Tosatti
@ 2021-07-13 19:30     ` Marcelo Tosatti
  1 sibling, 0 replies; 11+ messages in thread
From: Marcelo Tosatti @ 2021-07-13 19:30 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: linux-kernel, Thomas Gleixner, Frederic Weisbecker, Juri Lelli,
	Nitesh Lal, Peter Zijlstra, Nicolas Saenz

On Mon, Jul 12, 2021 at 11:05:58AM +0200, Christoph Lameter wrote:
> On Fri, 9 Jul 2021, Marcelo Tosatti wrote:
> 
> > +
> > +	if (!static_branch_unlikely(&vmstat_sync_enabled))
> > +		return;
> > +
> > +	cpu = smp_processor_id();
> > +
> > +	if (housekeeping_cpu(cpu, HK_FLAG_QUIESCE_URET))
> > +		return;
> > +
> > +	per_cpu(vmstat_dirty, smp_processor_id()) = true;
> > +}
> 
> And you are going to insert this into all the performance critical VM
> statistics handling. Inline?
> 
> And why do you need to do such things as to determine the processor? At
> mininum do this using this cpu operations like the vmstat functions
> currently do. And, lucky us, now we also have
> more issues why we should disable preemption etc etc while handling vm
> counters.

OK, hopefully this is what you mean.

Any other comments?

Index: linux-2.6-vmstat-update/kernel/sched/isolation.c
===================================================================
--- linux-2.6-vmstat-update.orig/kernel/sched/isolation.c
+++ linux-2.6-vmstat-update/kernel/sched/isolation.c
@@ -8,6 +8,7 @@
  *
  */
 #include "sched.h"
+#include <linux/vmstat.h>
 
 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
 EXPORT_SYMBOL_GPL(housekeeping_overridden);
@@ -129,6 +130,11 @@ static int __init housekeeping_setup(cha
 		}
 	}
 
+#ifdef CONFIG_SMP
+	if (flags & HK_FLAG_QUIESCE_URET)
+		static_branch_enable(&vmstat_sync_enabled);
+#endif
+
 	housekeeping_flags |= flags;
 
 	free_bootmem_cpumask_var(non_housekeeping_mask);
Index: linux-2.6-vmstat-update/include/linux/vmstat.h
===================================================================
--- linux-2.6-vmstat-update.orig/include/linux/vmstat.h
+++ linux-2.6-vmstat-update/include/linux/vmstat.h
@@ -21,6 +21,23 @@ int sysctl_vm_numa_stat_handler(struct c
 		void *buffer, size_t *length, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_SMP
+DECLARE_STATIC_KEY_FALSE(vmstat_sync_enabled);
+
+extern void __sync_vmstat(void);
+static inline void sync_vmstat(void)
+{
+	if (static_branch_unlikely(&vmstat_sync_enabled))
+		__sync_vmstat();
+}
+#else
+
+static inline void sync_vmstat(void)
+{
+}
+
+#endif
+
 struct reclaim_stat {
 	unsigned nr_dirty;
 	unsigned nr_unqueued_dirty;
Index: linux-2.6-vmstat-update/mm/vmstat.c
===================================================================
--- linux-2.6-vmstat-update.orig/mm/vmstat.c
+++ linux-2.6-vmstat-update/mm/vmstat.c
@@ -28,6 +28,7 @@
 #include <linux/mm_inline.h>
 #include <linux/page_ext.h>
 #include <linux/page_owner.h>
+#include <linux/sched/isolation.h>
 
 #include "internal.h"
 
@@ -308,6 +309,17 @@ void set_pgdat_percpu_threshold(pg_data_
 	}
 }
 
+DEFINE_STATIC_KEY_FALSE(vmstat_sync_enabled);
+static DEFINE_PER_CPU_ALIGNED(bool, vmstat_dirty);
+
+static inline void mark_vmstat_dirty(void)
+{
+	if (!static_branch_unlikely(&vmstat_sync_enabled))
+		return;
+
+	raw_cpu_write(vmstat_dirty, true);
+}
+
 /*
  * For use when we know that interrupts are disabled,
  * or when we know that preemption is disabled and that
@@ -330,6 +342,7 @@ void __mod_zone_page_state(struct zone *
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+	mark_vmstat_dirty();
 }
 EXPORT_SYMBOL(__mod_zone_page_state);
 
@@ -361,6 +374,7 @@ void __mod_node_page_state(struct pglist
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+	mark_vmstat_dirty();
 }
 EXPORT_SYMBOL(__mod_node_page_state);
 
@@ -401,6 +415,7 @@ void __inc_zone_state(struct zone *zone,
 		zone_page_state_add(v + overstep, zone, item);
 		__this_cpu_write(*p, -overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -419,6 +434,7 @@ void __inc_node_state(struct pglist_data
 		node_page_state_add(v + overstep, pgdat, item);
 		__this_cpu_write(*p, -overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -447,6 +463,7 @@ void __dec_zone_state(struct zone *zone,
 		zone_page_state_add(v - overstep, zone, item);
 		__this_cpu_write(*p, overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -465,6 +482,7 @@ void __dec_node_state(struct pglist_data
 		node_page_state_add(v - overstep, pgdat, item);
 		__this_cpu_write(*p, overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -528,6 +546,7 @@ static inline void mod_zone_state(struct
 
 	if (z)
 		zone_page_state_add(z, zone, item);
+	mark_vmstat_dirty();
 }
 
 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
@@ -596,6 +615,7 @@ static inline void mod_node_state(struct
 
 	if (z)
 		node_page_state_add(z, pgdat, item);
+	mark_vmstat_dirty();
 }
 
 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
@@ -2006,6 +2026,32 @@ static void vmstat_shepherd(struct work_
 		round_jiffies_relative(sysctl_stat_interval));
 }
 
+void __sync_vmstat(void)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	if (raw_cpu_read(vmstat_dirty) == false) {
+		put_cpu();
+		return;
+	}
+
+	refresh_cpu_vm_stats(false);
+	raw_cpu_write(vmstat_dirty, false);
+	put_cpu();
+
+	/*
+	 * If task is migrated to another CPU between put_cpu
+	 * and cancel_delayed_work_sync, the code below might
+	 * cancel vmstat_update work for a different cpu
+	 * (than the one from which the vmstats were flushed).
+	 *
+	 * However, vmstat shepherd will re-enable it later,
+	 * so its harmless.
+	 */
+	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+}
+
 static void __init start_shepherd_timer(void)
 {
 	int cpu;
Index: linux-2.6-vmstat-update/kernel/entry/common.c
===================================================================
--- linux-2.6-vmstat-update.orig/kernel/entry/common.c
+++ linux-2.6-vmstat-update/kernel/entry/common.c
@@ -6,6 +6,7 @@
 #include <linux/livepatch.h>
 #include <linux/audit.h>
 #include <linux/tick.h>
+#include <linux/vmstat.h>
 
 #include "common.h"
 
@@ -290,6 +291,7 @@ static void syscall_exit_to_user_mode_pr
  */
 static void isolation_exit_to_user_mode_prepare(void)
 {
+	sync_vmstat();
 }
 
 static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace
  2021-07-14 20:42 [patch 0/5] optionally perform deferred actions on return to userspace (v3) Marcelo Tosatti
@ 2021-07-14 20:42 ` Marcelo Tosatti
  0 siblings, 0 replies; 11+ messages in thread
From: Marcelo Tosatti @ 2021-07-14 20:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: Christoph Lameter, Thomas Gleixner, Frederic Weisbecker,
	Juri Lelli, Nitesh Lal, Peter Zijlstra, Nicolas Saenz,
	Marcelo Tosatti

The logic to disable vmstat worker thread, when entering
nohz full, does not cover all scenarios. For example, it is possible
for the following to happen:

1) enter nohz_full, which calls refresh_cpu_vm_stats, syncing the stats.
2) app runs mlock, which increases counters for mlock'ed pages.
3) start -RT loop

Since refresh_cpu_vm_stats from nohz_full logic can happen _before_
the mlock, vmstat shepherd can restart vmstat worker thread on 
the CPU in question.

To fix this, optionally sync the vmstat counters when returning
from userspace, controllable by a new "quiesce_on_exit_to_usermode" isolcpus 
flags (default off).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: linux-2.6-vmstat-update/kernel/sched/isolation.c
===================================================================
--- linux-2.6-vmstat-update.orig/kernel/sched/isolation.c
+++ linux-2.6-vmstat-update/kernel/sched/isolation.c
@@ -8,6 +8,7 @@
  *
  */
 #include "sched.h"
+#include <linux/vmstat.h>
 
 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
 EXPORT_SYMBOL_GPL(housekeeping_overridden);
@@ -129,6 +130,11 @@ static int __init housekeeping_setup(cha
 		}
 	}
 
+#ifdef CONFIG_SMP
+	if (flags & HK_FLAG_QUIESCE_URET)
+		static_branch_enable(&vmstat_sync_enabled);
+#endif
+
 	housekeeping_flags |= flags;
 
 	free_bootmem_cpumask_var(non_housekeeping_mask);
Index: linux-2.6-vmstat-update/include/linux/vmstat.h
===================================================================
--- linux-2.6-vmstat-update.orig/include/linux/vmstat.h
+++ linux-2.6-vmstat-update/include/linux/vmstat.h
@@ -21,6 +21,23 @@ int sysctl_vm_numa_stat_handler(struct c
 		void *buffer, size_t *length, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_SMP
+DECLARE_STATIC_KEY_FALSE(vmstat_sync_enabled);
+
+extern void __sync_vmstat(void);
+static inline void sync_vmstat(void)
+{
+	if (static_branch_unlikely(&vmstat_sync_enabled))
+		__sync_vmstat();
+}
+#else
+
+static inline void sync_vmstat(void)
+{
+}
+
+#endif
+
 struct reclaim_stat {
 	unsigned nr_dirty;
 	unsigned nr_unqueued_dirty;
Index: linux-2.6-vmstat-update/mm/vmstat.c
===================================================================
--- linux-2.6-vmstat-update.orig/mm/vmstat.c
+++ linux-2.6-vmstat-update/mm/vmstat.c
@@ -28,6 +28,7 @@
 #include <linux/mm_inline.h>
 #include <linux/page_ext.h>
 #include <linux/page_owner.h>
+#include <linux/sched/isolation.h>
 
 #include "internal.h"
 
@@ -308,6 +309,17 @@ void set_pgdat_percpu_threshold(pg_data_
 	}
 }
 
+DEFINE_STATIC_KEY_FALSE(vmstat_sync_enabled);
+static DEFINE_PER_CPU_ALIGNED(bool, vmstat_dirty);
+
+static inline void mark_vmstat_dirty(void)
+{
+	if (!static_branch_unlikely(&vmstat_sync_enabled))
+		return;
+
+	raw_cpu_write(vmstat_dirty, true);
+}
+
 /*
  * For use when we know that interrupts are disabled,
  * or when we know that preemption is disabled and that
@@ -330,6 +342,7 @@ void __mod_zone_page_state(struct zone *
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+	mark_vmstat_dirty();
 }
 EXPORT_SYMBOL(__mod_zone_page_state);
 
@@ -361,6 +374,7 @@ void __mod_node_page_state(struct pglist
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+	mark_vmstat_dirty();
 }
 EXPORT_SYMBOL(__mod_node_page_state);
 
@@ -401,6 +415,7 @@ void __inc_zone_state(struct zone *zone,
 		zone_page_state_add(v + overstep, zone, item);
 		__this_cpu_write(*p, -overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -419,6 +434,7 @@ void __inc_node_state(struct pglist_data
 		node_page_state_add(v + overstep, pgdat, item);
 		__this_cpu_write(*p, -overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -447,6 +463,7 @@ void __dec_zone_state(struct zone *zone,
 		zone_page_state_add(v - overstep, zone, item);
 		__this_cpu_write(*p, overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -465,6 +482,7 @@ void __dec_node_state(struct pglist_data
 		node_page_state_add(v - overstep, pgdat, item);
 		__this_cpu_write(*p, overstep);
 	}
+	mark_vmstat_dirty();
 }
 
 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -528,6 +546,7 @@ static inline void mod_zone_state(struct
 
 	if (z)
 		zone_page_state_add(z, zone, item);
+	mark_vmstat_dirty();
 }
 
 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
@@ -596,6 +615,7 @@ static inline void mod_node_state(struct
 
 	if (z)
 		node_page_state_add(z, pgdat, item);
+	mark_vmstat_dirty();
 }
 
 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
@@ -2006,6 +2026,37 @@ static void vmstat_shepherd(struct work_
 		round_jiffies_relative(sysctl_stat_interval));
 }
 
+void __sync_vmstat(void)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	if (housekeeping_cpu(cpu, HK_FLAG_QUIESCE_URET)) {
+		put_cpu();
+		return;
+	}
+
+	if (!raw_cpu_read(vmstat_dirty)) {
+		put_cpu();
+		return;
+	}
+
+	refresh_cpu_vm_stats(false);
+	raw_cpu_write(vmstat_dirty, false);
+	put_cpu();
+
+	/*
+	 * If task is migrated to another CPU between put_cpu
+	 * and cancel_delayed_work_sync, the code below might
+	 * cancel vmstat_update work for a different cpu
+	 * (than the one from which the vmstats were flushed).
+	 *
+	 * However, vmstat shepherd will re-enable it later,
+	 * so its harmless.
+	 */
+	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+}
+
 static void __init start_shepherd_timer(void)
 {
 	int cpu;
Index: linux-2.6-vmstat-update/kernel/entry/common.c
===================================================================
--- linux-2.6-vmstat-update.orig/kernel/entry/common.c
+++ linux-2.6-vmstat-update/kernel/entry/common.c
@@ -6,6 +6,7 @@
 #include <linux/livepatch.h>
 #include <linux/audit.h>
 #include <linux/tick.h>
+#include <linux/vmstat.h>
 
 #include "common.h"
 
@@ -290,6 +291,7 @@ static void syscall_exit_to_user_mode_pr
  */
 static void isolation_exit_to_user_mode_prepare(void)
 {
+	sync_vmstat();
 }
 
 static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)



^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-07-14 20:43 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-07-03  4:54 [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2021-07-14 20:42 [patch 0/5] optionally perform deferred actions on return to userspace (v3) Marcelo Tosatti
2021-07-14 20:42 ` [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters on return to userspace Marcelo Tosatti
2021-07-09 17:37 [patch 0/5] optionally perform deferred actions " Marcelo Tosatti
2021-07-09 17:37 ` [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters " Marcelo Tosatti
2021-07-12  9:05   ` Christoph Lameter
2021-07-12 10:30     ` Marcelo Tosatti
2021-07-13 19:30     ` Marcelo Tosatti
2021-07-01 21:03 [patch 0/5] optionally sync per-CPU vmstats counter " Marcelo Tosatti
2021-07-01 21:03 ` [patch 3/5] mm: vmstat: optionally flush per-CPU vmstat counters " Marcelo Tosatti
2021-07-01 23:11   ` kernel test robot
2021-07-01 23:11     ` kernel test robot
2021-07-02  6:50   ` kernel test robot
2021-07-02  6:50     ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.