public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: torvalds@linux-foundation.org, mingo@redhat.com,
	peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com, vschneid@redhat.com, ast@kernel.org,
	daniel@iogearbox.net, andrii@kernel.org, martin.lau@kernel.org,
	joshdon@google.com, brho@google.com, pjt@google.com,
	derkling@google.com, haoluo@google.com, dvernet@meta.com,
	dschatzberg@meta.com, dskarlat@cs.cmu.edu, riel@surriel.com
Cc: linux-kernel@vger.kernel.org, bpf@vger.kernel.org,
	kernel-team@meta.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 04/32] sched: Allow sched_cgroup_fork() to fail and introduce sched_cancel_fork()
Date: Fri, 17 Mar 2023 11:33:05 -1000	[thread overview]
Message-ID: <20230317213333.2174969-5-tj@kernel.org> (raw)
In-Reply-To: <20230317213333.2174969-1-tj@kernel.org>

A new BPF extensible sched_class will need more control over the forking
process. It wants to be able to fail from sched_cgroup_fork() after the new
task's sched_task_group is initialized so that the loaded BPF program can
prepare the task with its cgroup association is established and reject fork
if e.g. allocation fails.

Allow sched_cgroup_fork() to fail by making it return int instead of void
and adding sched_cancel_fork() to undo sched_fork() in the error path.

sched_cgroup_fork() doesn't fail yet and this patch shouldn't cause any
behavior changes.

v2: Patch description updated to detail the expected use.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/sched/task.h |  3 ++-
 kernel/fork.c              | 15 ++++++++++-----
 kernel/sched/core.c        |  8 +++++++-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 357e0068497c..dcff721170c3 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -58,7 +58,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
+extern int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
+extern void sched_cancel_fork(struct task_struct *p);
 extern void sched_post_fork(struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index f68954d05e89..0d166537a1a3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2242,7 +2242,7 @@ static __latent_entropy struct task_struct *copy_process(
 
 	retval = perf_event_init_task(p, clone_flags);
 	if (retval)
-		goto bad_fork_cleanup_policy;
+		goto bad_fork_sched_cancel_fork;
 	retval = audit_alloc(p);
 	if (retval)
 		goto bad_fork_cleanup_perf;
@@ -2383,7 +2383,9 @@ static __latent_entropy struct task_struct *copy_process(
 	 * cgroup specific, it unconditionally needs to place the task on a
 	 * runqueue.
 	 */
-	sched_cgroup_fork(p, args);
+	retval = sched_cgroup_fork(p, args);
+	if (retval)
+		goto bad_fork_cancel_cgroup;
 
 	/*
 	 * From this point on we must avoid any synchronous user-space
@@ -2429,13 +2431,13 @@ static __latent_entropy struct task_struct *copy_process(
 	/* Don't start children in a dying pid namespace */
 	if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
 		retval = -ENOMEM;
-		goto bad_fork_cancel_cgroup;
+		goto bad_fork_core_free;
 	}
 
 	/* Let kill terminate clone/fork in the middle */
 	if (fatal_signal_pending(current)) {
 		retval = -EINTR;
-		goto bad_fork_cancel_cgroup;
+		goto bad_fork_core_free;
 	}
 
 	/* No more failure paths after this point. */
@@ -2510,10 +2512,11 @@ static __latent_entropy struct task_struct *copy_process(
 
 	return p;
 
-bad_fork_cancel_cgroup:
+bad_fork_core_free:
 	sched_core_free(p);
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
+bad_fork_cancel_cgroup:
 	cgroup_cancel_fork(p, args);
 bad_fork_put_pidfd:
 	if (clone_flags & CLONE_PIDFD) {
@@ -2552,6 +2555,8 @@ static __latent_entropy struct task_struct *copy_process(
 	audit_free(p);
 bad_fork_cleanup_perf:
 	perf_event_free_task(p);
+bad_fork_sched_cancel_fork:
+	sched_cancel_fork(p);
 bad_fork_cleanup_policy:
 	lockdep_free_task(p);
 #ifdef CONFIG_NUMA
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index efac96fd6cfd..fdf4dba12a7e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4768,7 +4768,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	return 0;
 }
 
-void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
+int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
 {
 	unsigned long flags;
 
@@ -4795,6 +4795,12 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
 	if (p->sched_class->task_fork)
 		p->sched_class->task_fork(p);
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+	return 0;
+}
+
+void sched_cancel_fork(struct task_struct *p)
+{
 }
 
 void sched_post_fork(struct task_struct *p)
-- 
2.39.2


  parent reply	other threads:[~2023-03-17 21:36 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-17 21:33 [PATCHSET v3] sched: Implement BPF extensible scheduler class Tejun Heo
2023-03-17 21:33 ` [PATCH 01/32] cgroup: Implement cgroup_show_cftypes() Tejun Heo
2023-03-17 21:33 ` [PATCH 02/32] sched: Encapsulate task attribute change sequence into a helper macro Tejun Heo
2023-03-17 21:33 ` [PATCH 03/32] sched: Restructure sched_class order sanity checks in sched_init() Tejun Heo
2023-03-17 21:33 ` Tejun Heo [this message]
2023-03-17 21:33 ` [PATCH 05/32] sched: Add sched_class->reweight_task() Tejun Heo
2023-03-17 21:33 ` [PATCH 06/32] sched: Add sched_class->switching_to() and expose check_class_changing/changed() Tejun Heo
2023-03-17 21:33 ` [PATCH 07/32] sched: Factor out cgroup weight conversion functions Tejun Heo
2023-03-17 21:33 ` [PATCH 08/32] sched: Expose css_tg(), __setscheduler_prio() and SCHED_CHANGE_BLOCK() Tejun Heo
2023-03-17 21:33 ` [PATCH 09/32] sched: Enumerate CPU cgroup file types Tejun Heo
2023-03-17 21:33 ` [PATCH 10/32] sched: Add @reason to sched_class->rq_{on|off}line() Tejun Heo
2023-03-17 21:33 ` [PATCH 11/32] sched: Add normal_policy() Tejun Heo
2023-03-17 21:33 ` [PATCH 12/32] sched_ext: Add boilerplate for extensible scheduler class Tejun Heo
2023-03-17 21:33 ` [PATCH 13/32] sched_ext: Implement BPF " Tejun Heo
2023-03-17 21:33 ` [PATCH 14/32] sched_ext: Add scx_example_simple and scx_example_qmap example schedulers Tejun Heo
2023-03-17 21:33 ` [PATCH 15/32] sched_ext: Add sysrq-S which disables the BPF scheduler Tejun Heo
2023-03-17 21:33 ` [PATCH 16/32] sched_ext: Implement runnable task stall watchdog Tejun Heo
2023-03-17 21:33 ` [PATCH 17/32] sched_ext: Allow BPF schedulers to disallow specific tasks from joining SCHED_EXT Tejun Heo
2023-03-17 21:33 ` [PATCH 18/32] sched_ext: Allow BPF schedulers to switch all eligible tasks into sched_ext Tejun Heo
2023-03-17 21:33 ` [PATCH 19/32] sched_ext: Implement scx_bpf_kick_cpu() and task preemption support Tejun Heo
2023-03-17 21:33 ` [PATCH 20/32] sched_ext: Make watchdog handle ops.dispatch() looping stall Tejun Heo
2023-03-17 21:33 ` [PATCH 21/32] sched_ext: Add task state tracking operations Tejun Heo
2023-03-17 21:33 ` [PATCH 22/32] sched_ext: Implement tickless support Tejun Heo
2023-03-17 21:33 ` [PATCH 23/32] sched_ext: Track tasks that are subjects of the in-flight SCX operation Tejun Heo
2023-03-17 21:33 ` [PATCH 24/32] sched_ext: Add cgroup support Tejun Heo
2023-04-20 20:02   ` Andrea Righi
2023-04-21 14:32     ` Tejun Heo
2023-03-17 21:33 ` [PATCH 25/32] sched_ext: Implement SCX_KICK_WAIT Tejun Heo
2023-03-17 21:33 ` [PATCH 26/32] sched_ext: Implement sched_ext_ops.cpu_acquire/release() Tejun Heo
2023-03-17 21:33 ` [PATCH 27/32] sched_ext: Implement sched_ext_ops.cpu_online/offline() Tejun Heo
2023-03-17 21:33 ` [PATCH 28/32] sched_ext: Implement core-sched support Tejun Heo
2023-04-20 19:56   ` Andrea Righi
2023-04-21 14:31     ` Tejun Heo
2023-03-17 21:33 ` [PATCH 29/32] sched_ext: Add vtime-ordered priority queue to dispatch_q's Tejun Heo
2023-03-17 21:33 ` [PATCH 30/32] sched_ext: Documentation: scheduler: Document extensible scheduler class Tejun Heo
2023-03-18  2:05   ` Bagas Sanjaya
2023-03-17 21:33 ` [PATCH 31/32] sched_ext: Add a basic, userland vruntime scheduler Tejun Heo
2023-03-17 21:33 ` [PATCH 32/32] sched_ext: Add a rust userspace hybrid example scheduler Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230317213333.2174969-5-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brho@google.com \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=daniel@iogearbox.net \
    --cc=derkling@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=dschatzberg@meta.com \
    --cc=dskarlat@cs.cmu.edu \
    --cc=dvernet@meta.com \
    --cc=haoluo@google.com \
    --cc=joshdon@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin.lau@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=riel@surriel.com \
    --cc=rostedt@goodmis.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox