Linux cgroups development
 help / color / mirror / Atom feed
From: Qais Yousef <qyousef@layalina.io>
To: stable@vger.kernel.org
Cc: Juri Lelli <juri.lelli@redhat.com>,
	Waiman Long <longman@redhat.com>, Tejun Heo <tj@kernel.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Ingo Molnar <mingo@kernel.org>, Hao Luo <haoluo@google.com>,
	John Stultz <jstultz@google.com>,
	cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	Qais Yousef <qyousef@layalina.io>
Subject: [PATCH 3/6] sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets
Date: Sun, 20 Aug 2023 16:22:55 +0100	[thread overview]
Message-ID: <20230820152258.518128-4-qyousef@layalina.io> (raw)
In-Reply-To: <20230820152258.518128-1-qyousef@layalina.io>

From: Juri Lelli <juri.lelli@redhat.com>

commit 6c24849f5515e4966d94fa5279bdff4acf2e9489 upstream.

Qais reported that iterating over all tasks when rebuilding root domains
for finding out which ones are DEADLINE and need their bandwidth
correctly restored on such root domains can be a costly operation (10+
ms delays on suspend-resume).

To fix the problem keep track of the number of DEADLINE tasks belonging
to each cpuset and then use this information (followup patch) to only
perform the above iteration if DEADLINE tasks are actually present in
the cpuset for which a corresponding root domain is being rebuilt.

Reported-by: Qais Yousef (Google) <qyousef@layalina.io>
Link: https://lore.kernel.org/lkml/20230206221428.2125324-1-qyousef@layalina.io/
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
(cherry picked from commit 6c24849f5515e4966d94fa5279bdff4acf2e9489)
[Conflict in kernel/cgroup/cpuset.c and kernel/sched/deadline.c due to
pulling new code. Reject new code/fields.]
Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
---
 include/linux/cpuset.h  |  4 ++++
 kernel/cgroup/cgroup.c  |  4 ++++
 kernel/cgroup/cpuset.c  | 25 +++++++++++++++++++++++++
 kernel/sched/deadline.c | 13 +++++++++++++
 4 files changed, 46 insertions(+)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 5a94f5143924..82fb7e24d1cb 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -56,6 +56,8 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
+extern void inc_dl_tasks_cs(struct task_struct *task);
+extern void dec_dl_tasks_cs(struct task_struct *task);
 extern void cpuset_lock(void);
 extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
@@ -179,6 +181,8 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
+static inline void inc_dl_tasks_cs(struct task_struct *task) { }
+static inline void dec_dl_tasks_cs(struct task_struct *task) { }
 static inline void cpuset_lock(void) { }
 static inline void cpuset_unlock(void) { }
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 6ccdbce17399..be467aea457e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -56,6 +56,7 @@
 #include <linux/file.h>
 #include <linux/fs_parser.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/deadline.h>
 #include <linux/psi.h>
 #include <net/sock.h>
 
@@ -6467,6 +6468,9 @@ void cgroup_exit(struct task_struct *tsk)
 	list_add_tail(&tsk->cg_list, &cset->dying_tasks);
 	cset->nr_tasks--;
 
+	if (dl_task(tsk))
+		dec_dl_tasks_cs(tsk);
+
 	WARN_ON_ONCE(cgroup_task_frozen(tsk));
 	if (unlikely(!(tsk->flags & PF_KTHREAD) &&
 		     test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 0eff9fb4e6d3..00821ed3d300 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -162,6 +162,12 @@ struct cpuset {
 	int use_parent_ecpus;
 	int child_ecpus_count;
 
+	/*
+	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
+	 * know when to rebuild associated root domain bandwidth information.
+	 */
+	int nr_deadline_tasks;
+
 	/* Handle for cpuset.cpus.partition */
 	struct cgroup_file partition_file;
 };
@@ -209,6 +215,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
 	return css_cs(cs->css.parent);
 }
 
+void inc_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks++;
+}
+
+void dec_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks--;
+}
+
 /* bits in struct cpuset flags field */
 typedef enum {
 	CS_ONLINE,
@@ -2210,6 +2230,11 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 		ret = security_task_setscheduler(task);
 		if (ret)
 			goto out_unlock;
+
+		if (dl_task(task)) {
+			cs->nr_deadline_tasks++;
+			cpuset_attach_old_cs->nr_deadline_tasks--;
+		}
 	}
 
 	/*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b3e206498395..fced55d6e8da 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -17,6 +17,7 @@
  */
 #include "sched.h"
 #include "pelt.h"
+#include <linux/cpuset.h>
 
 struct dl_bandwidth def_dl_bandwidth;
 
@@ -2446,6 +2447,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 	if (task_on_rq_queued(p) && p->dl.dl_runtime)
 		task_non_contending(p);
 
+	/*
+	 * In case a task is setscheduled out from SCHED_DEADLINE we need to
+	 * keep track of that on its cpuset (for correct bandwidth tracking).
+	 */
+	dec_dl_tasks_cs(p);
+
 	if (!task_on_rq_queued(p)) {
 		/*
 		 * Inactive timer is armed. However, p is leaving DEADLINE and
@@ -2486,6 +2493,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
 		put_task_struct(p);
 
+	/*
+	 * In case a task is setscheduled to SCHED_DEADLINE we need to keep
+	 * track of that on its cpuset (for correct bandwidth tracking).
+	 */
+	inc_dl_tasks_cs(p);
+
 	/* If p is not queued we will update its parameters at next wakeup. */
 	if (!task_on_rq_queued(p)) {
 		add_rq_bw(&p->dl, &rq->dl);
-- 
2.34.1


  parent reply	other threads:[~2023-08-20 15:22 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-20 15:22 [PATCH 0/6] Backport rework of deadline bandwidth restoration for 5.15.y Qais Yousef
2023-08-20 15:22 ` [PATCH 2/6] sched/cpuset: Bring back cpuset_mutex Qais Yousef
2023-08-20 15:22 ` Qais Yousef [this message]
     [not found] ` <20230820152258.518128-1-qyousef-wp2msK0BRk8tq7phqP6ubQ@public.gmane.org>
2023-08-20 15:22   ` [PATCH 1/6] cgroup/cpuset: Rename functions dealing with DEADLINE accounting Qais Yousef
2023-08-20 15:22   ` [PATCH 4/6] cgroup/cpuset: Iterate only if DEADLINE tasks are present Qais Yousef
2023-08-20 15:22   ` [PATCH 5/6] sched/deadline: Create DL BW alloc, free & check overflow interface Qais Yousef
2023-08-20 15:22   ` [PATCH 6/6] cgroup/cpuset: Free DL BW in case can_attach() fails Qais Yousef
  -- strict thread matches above, loose matches on Subject: below --
2023-08-21 22:19 [PATCH 0/6] Backport rework of deadline bandwidth restoration for 6.4.y Qais Yousef
     [not found] ` <20230821221956.698117-1-qyousef-wp2msK0BRk8tq7phqP6ubQ@public.gmane.org>
2023-08-21 22:19   ` [PATCH 3/6] sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets Qais Yousef
2023-08-20 15:24 [PATCH 0/6] Backport rework of deadline bandwidth restoration for 6.1.y Qais Yousef
2023-08-20 15:24 ` [PATCH 3/6] sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets Qais Yousef
2023-08-20 15:21 [PATCH 0/6] Backport rework of deadline bandwidth restoration for 5.10.y Qais Yousef
2023-08-20 15:21 ` [PATCH 3/6] sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets Qais Yousef
2023-03-29 12:55 [PATCH 0/6] sched/deadline: cpuset: Rework DEADLINE bandwidth restoration Juri Lelli
2023-03-29 12:55 ` [PATCH 3/6] sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets Juri Lelli
     [not found]   ` <20230329125558.255239-4-juri.lelli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2023-04-04 20:06     ` Qais Yousef
2023-10-09 11:43   ` Xia Fukun
2023-10-09 15:26     ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230820152258.518128-4-qyousef@layalina.io \
    --to=qyousef@layalina.io \
    --cc=cgroups@vger.kernel.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=haoluo@google.com \
    --cc=jstultz@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox