All of lore.kernel.org
 help / color / mirror / Atom feed
From: Frederic Weisbecker <frederic@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>, Ingo Molnar <mingo@kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Jacek Anaszewski <jacek.anaszewski@gmail.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	"Rafael J . Wysocki" <rjw@rjwysocki.net>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Rik van Riel <riel@surriel.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Yauheni Kaliuta <yauheni.kaliuta@redhat.com>,
	Viresh Kumar <viresh.kumar@linaro.org>,
	Pavel Machek <pavel@ucw.cz>
Subject: [PATCH 2/6] sched/vtime: Bring up complete kcpustat accessor
Date: Thu, 21 Nov 2019 03:44:26 +0100	[thread overview]
Message-ID: <20191121024430.19938-3-frederic@kernel.org> (raw)
In-Reply-To: <20191121024430.19938-1-frederic@kernel.org>

Many callsites want to fetch the values of system, user, user_nice, guest
or guest_nice kcpustat fields altogether or at least a pair of these.

In that case calling kcpustat_field() for each requested field brings
unecessary overhead when we could fetch all of them in a row.

So provide kcpustat_cpu_fetch() that fetches the whole kcpustat array
in a vtime safe way under the same RCU and seqcount block.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h |   7 ++
 kernel/sched/cputime.c      | 136 ++++++++++++++++++++++++++++++------
 2 files changed, 123 insertions(+), 20 deletions(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 79781196eb25..89f0745c096d 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -81,12 +81,19 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 			  enum cpu_usage_stat usage, int cpu);
+extern void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu);
 #else
 static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 				 enum cpu_usage_stat usage, int cpu)
 {
 	return kcpustat->cpustat[usage];
 }
+
+static inline void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
+{
+	*dst = kcpustat_cpu(cpu);
+}
+
 #endif
 
 extern void account_user_time(struct task_struct *, u64);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 27b5406222fc..d43318a489f2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -912,6 +912,30 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 	} while (read_seqcount_retry(&vtime->seqcount, seq));
 }
 
+static int vtime_state_check(struct vtime *vtime, int cpu)
+{
+	/*
+	 * We raced against a context switch, fetch the
+	 * kcpustat task again.
+	 */
+	if (vtime->cpu != cpu && vtime->cpu != -1)
+		return -EAGAIN;
+
+	/*
+	 * Two possible things here:
+	 * 1) We are seeing the scheduling out task (prev) or any past one.
+	 * 2) We are seeing the scheduling in task (next) but it hasn't
+	 *    passed though vtime_task_switch() yet so the pending
+	 *    cputime of the prev task may not be flushed yet.
+	 *
+	 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
+	 */
+	if (vtime->state == VTIME_INACTIVE)
+		return -EAGAIN;
+
+	return 0;
+}
+
 static u64 kcpustat_user_vtime(struct vtime *vtime)
 {
 	if (vtime->state == VTIME_USER)
@@ -933,26 +957,9 @@ static int kcpustat_field_vtime(u64 *cpustat,
 	do {
 		seq = read_seqcount_begin(&vtime->seqcount);
 
-		/*
-		 * We raced against context switch, fetch the
-		 * kcpustat task again.
-		 */
-		if (vtime->cpu != cpu && vtime->cpu != -1)
-			return -EAGAIN;
-
-		/*
-		 * Two possible things here:
-		 * 1) We are seeing the scheduling out task (prev) or any past one.
-		 * 2) We are seeing the scheduling in task (next) but it hasn't
-		 *    passed though vtime_task_switch() yet so the pending
-		 *    cputime of the prev task may not be flushed yet.
-		 *
-		 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
-		 */
-		if (vtime->state == VTIME_INACTIVE)
-			return -EAGAIN;
-
-		err = 0;
+		err = vtime_state_check(vtime, cpu);
+		if (err < 0)
+			return err;
 
 		*val = cpustat[usage];
 
@@ -1025,4 +1032,93 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 	}
 }
 EXPORT_SYMBOL_GPL(kcpustat_field);
+
+static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
+				    const struct kernel_cpustat *src,
+				    struct task_struct *tsk, int cpu)
+{
+	struct vtime *vtime = &tsk->vtime;
+	unsigned int seq;
+	int err;
+
+	do {
+		u64 *cpustat;
+		u64 delta;
+
+		seq = read_seqcount_begin(&vtime->seqcount);
+
+		err = vtime_state_check(vtime, cpu);
+		if (err < 0)
+			return err;
+
+		*dst = *src;
+		cpustat = dst->cpustat;
+
+		/* Task is sleeping, dead or idle, nothing to add */
+		if (vtime->state < VTIME_SYS)
+			continue;
+
+		delta = vtime_delta(vtime);
+
+		/*
+		 * Task runs either in user (including guest) or kernel space,
+		 * add pending nohz time to the right place.
+		 */
+		if (vtime->state == VTIME_SYS) {
+			cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
+		} else if (vtime->state == VTIME_USER) {
+			if (task_nice(tsk) > 0)
+				cpustat[CPUTIME_NICE] += vtime->utime + delta;
+			else
+				cpustat[CPUTIME_USER] += vtime->utime + delta;
+		} else {
+			WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+			if (task_nice(tsk) > 0) {
+				cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
+				cpustat[CPUTIME_NICE] += vtime->gtime + delta;
+			} else {
+				cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
+				cpustat[CPUTIME_USER] += vtime->gtime + delta;
+			}
+		}
+	} while (read_seqcount_retry(&vtime->seqcount, seq));
+
+	return err;
+}
+
+void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
+{
+	const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
+	struct rq *rq;
+	int err;
+
+	if (!vtime_accounting_enabled_cpu(cpu)) {
+		*dst = *src;
+		return;
+	}
+
+	rq = cpu_rq(cpu);
+
+	for (;;) {
+		struct task_struct *curr;
+
+		rcu_read_lock();
+		curr = rcu_dereference(rq->curr);
+		if (WARN_ON_ONCE(!curr)) {
+			rcu_read_unlock();
+			*dst = *src;
+			return;
+		}
+
+		err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
+		rcu_read_unlock();
+
+		if (!err)
+			return;
+
+		cpu_relax();
+	}
+}
+EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
+
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
2.23.0


  parent reply	other threads:[~2019-11-21  2:44 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-21  2:44 [PATCH 0/6] sched/nohz: Make the rest of kcpustat vtime aware v3 Frederic Weisbecker
2019-11-21  2:44 ` [PATCH 1/6] sched/cputime: Support other fields on kcpustat_field() Frederic Weisbecker
2019-11-21 16:48   ` [tip: sched/core] " tip-bot2 for Frederic Weisbecker
2019-11-21  2:44 ` Frederic Weisbecker [this message]
2019-11-21 16:48   ` [tip: sched/core] sched/vtime: Bring up complete kcpustat accessor tip-bot2 for Frederic Weisbecker
2019-12-28 20:56   ` [PATCH 2/6] " Chris Wilson
2019-12-30  1:08     ` Frederic Weisbecker
2019-12-30  9:04       ` [PATCH] sched/vtime: Prevent unstable evaluation of WARN(vtime->state) Chris Wilson
2019-12-30 17:39         ` Frederic Weisbecker
2019-11-21  2:44 ` [PATCH 3/6] procfs: Use all-in-one vtime aware kcpustat accessor Frederic Weisbecker
2019-11-21 16:48   ` [tip: sched/core] " tip-bot2 for Frederic Weisbecker
2019-12-08 15:57   ` [PATCH 3/6] " Paul Orlyk
2019-12-09 15:50     ` Frederic Weisbecker
2019-11-21  2:44 ` [PATCH 4/6] cpufreq: Use vtime aware kcpustat accessors for user time Frederic Weisbecker
2019-11-21 16:48   ` [tip: sched/core] " tip-bot2 for Frederic Weisbecker
2019-11-21  2:44 ` [PATCH 5/6] leds: Use all-in-one vtime aware kcpustat accessor Frederic Weisbecker
2019-11-21  6:58   ` Ingo Molnar
2019-11-21 14:14     ` Frederic Weisbecker
2019-11-21 16:49       ` Ingo Molnar
2019-11-21 16:48   ` [tip: sched/core] " tip-bot2 for Frederic Weisbecker
2019-11-21  2:44 ` [PATCH 6/6] rackmeter: Use " Frederic Weisbecker
2019-11-21 16:48   ` [tip: sched/core] " tip-bot2 for Frederic Weisbecker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191121024430.19938-3-frederic@kernel.org \
    --to=frederic@kernel.org \
    --cc=benh@kernel.crashing.org \
    --cc=jacek.anaszewski@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=pavel@ucw.cz \
    --cc=peterz@infradead.org \
    --cc=riel@surriel.com \
    --cc=rjw@rjwysocki.net \
    --cc=tglx@linutronix.de \
    --cc=viresh.kumar@linaro.org \
    --cc=wanpengli@tencent.com \
    --cc=yauheni.kaliuta@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.