All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joe Damato <jdamato@fastly.com>
To: x86@kernel.org, linux-mm@kvack.org,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Andy Lutomirski <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Valentin Schneider <vschneid@redhat.com>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: Joe Damato <jdamato@fastly.com>
Subject: [RFC 1/1] mm: Add per-task struct tlb counters
Date: Tue, 13 Sep 2022 18:51:09 -0700	[thread overview]
Message-ID: <1663120270-2673-2-git-send-email-jdamato@fastly.com> (raw)
In-Reply-To: <1663120270-2673-1-git-send-email-jdamato@fastly.com>

TLB shootdowns are tracked globally, but on a busy system it can be
difficult to disambiguate the source of TLB shootdowns.

Add two counter fields:
	- nrtlbflush: number of tlb flush events received
	- ngtlbflush: number of tlb flush events generated

Expose those fields in /proc/[pid]/stat so that they can be analyzed
alongside similar metrics (e.g. min_flt and maj_flt).

Signed-off-by: Joe Damato <jdamato@fastly.com>
---
 arch/x86/mm/tlb.c            | 2 ++
 fs/proc/array.c              | 9 +++++++++
 include/linux/sched.h        | 6 ++++++
 include/linux/sched/signal.h | 1 +
 kernel/exit.c                | 6 ++++++
 kernel/fork.c                | 1 +
 6 files changed, 25 insertions(+)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c1e31e9..58f7c59 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -745,6 +745,7 @@ static void flush_tlb_func(void *info)
 	if (!local) {
 		inc_irq_stat(irq_tlb_count);
 		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+		current->nrtlbflush++;
 
 		/* Can only happen on remote CPUs */
 		if (f->mm && f->mm != loaded_mm)
@@ -895,6 +896,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
 	 * would not happen.
 	 */
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+	current->ngtlbflush++;
 	if (info->end == TLB_FLUSH_ALL)
 		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
 	else
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 49283b81..435afdc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -469,6 +469,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	unsigned long long start_time;
 	unsigned long cmin_flt = 0, cmaj_flt = 0;
 	unsigned long  min_flt = 0,  maj_flt = 0;
+	unsigned long ngtlbflush = 0, nrtlbflush = 0;
 	u64 cutime, cstime, utime, stime;
 	u64 cgtime, gtime;
 	unsigned long rsslim = 0;
@@ -530,11 +531,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
+				ngtlbflush += t->ngtlbflush;
+				nrtlbflush += t->nrtlbflush;
 				gtime += task_gtime(t);
 			} while_each_thread(task, t);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
+			ngtlbflush += sig->ngtlbflush;
+			nrtlbflush += sig->nrtlbflush;
 			thread_group_cputime_adjusted(task, &utime, &stime);
 			gtime += sig->gtime;
 
@@ -554,6 +559,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	if (!whole) {
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
+		nrtlbflush = task->nrtlbflush;
+		ngtlbflush = task->ngtlbflush;
 		task_cputime_adjusted(task, &utime, &stime);
 		gtime = task_gtime(task);
 	}
@@ -643,6 +650,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	else
 		seq_puts(m, " 0");
 
+	seq_put_decimal_ull(m, " ", ngtlbflush);
+	seq_put_decimal_ull(m, " ", nrtlbflush);
 	seq_putc(m, '\n');
 	if (mm)
 		mmput(mm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5cdf746..2a0d879 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1047,6 +1047,12 @@ struct task_struct {
 	unsigned long			min_flt;
 	unsigned long			maj_flt;
 
+	/* Number of TLB flushes generated by this task */
+	unsigned long			ngtlbflush;
+
+	/* Number of TLB flushes received by this task */
+	unsigned long			nrtlbflush;
+
 	/* Empty if CONFIG_POSIX_CPUTIMERS=n */
 	struct posix_cputimers		posix_cputimers;
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 2009926..4e0b09c 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -189,6 +189,7 @@ struct signal_struct {
 	struct prev_cputime prev_cputime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+	unsigned long ngtlbflush, nrtlbflush;
 	unsigned long inblock, oublock, cinblock, coublock;
 	unsigned long maxrss, cmaxrss;
 	struct task_io_accounting ioac;
diff --git a/kernel/exit.c b/kernel/exit.c
index 35e0a31..5a72755 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -141,6 +141,8 @@ static void __exit_signal(struct task_struct *tsk)
 	sig->gtime += task_gtime(tsk);
 	sig->min_flt += tsk->min_flt;
 	sig->maj_flt += tsk->maj_flt;
+	sig->ngtlbflush += tsk->ngtlbflush;
+	sig->nrtlbflush += tsk->nrtlbflush;
 	sig->nvcsw += tsk->nvcsw;
 	sig->nivcsw += tsk->nivcsw;
 	sig->inblock += task_io_get_inblock(tsk);
@@ -1095,6 +1097,10 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
 			p->maj_flt + sig->maj_flt + sig->cmaj_flt;
+		psig->ngtlbflush +=
+			p->ngtlbflush + sig->ngtlbflush;
+		psig->nrtlbflush +=
+			p->nrtlbflush + sig->nrtlbflush;
 		psig->cnvcsw +=
 			p->nvcsw + sig->nvcsw + sig->cnvcsw;
 		psig->cnivcsw +=
diff --git a/kernel/fork.c b/kernel/fork.c
index b339918..5fa9f64 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1555,6 +1555,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
 	struct mm_struct *mm, *oldmm;
 
 	tsk->min_flt = tsk->maj_flt = 0;
+	tsk->ngtlbflush = tsk->nrtlbflush = 0;
 	tsk->nvcsw = tsk->nivcsw = 0;
 #ifdef CONFIG_DETECT_HUNG_TASK
 	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
-- 
2.7.4


  reply	other threads:[~2022-09-14  1:53 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-14  1:51 [RFC 0/1] mm: Track per-task tlb events Joe Damato
2022-09-14  1:51 ` Joe Damato [this message]
2022-09-14  7:40   ` [RFC 1/1] mm: Add per-task struct tlb counters Dave Hansen
2022-09-14 11:58     ` Peter Zijlstra
2022-09-14 14:23       ` Joe Damato
2022-09-14 14:15     ` Joe Damato
2022-09-14 14:25       ` Joe Damato
2022-09-15  8:50       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1663120270-2673-2-git-send-email-jdamato@fastly.com \
    --to=jdamato@fastly.com \
    --cc=bp@alien8.de \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=hpa@zytor.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.