From: Djalal Harouni <tixxdz@gmail.com>
To: tj@kernel.org, hannes@cmpxchg.org, mkoutny@suse.com,
ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
martin.lau@linux.dev, eddyz87@gmail.com, song@kernel.org,
yonghong.song@linux.dev, john.fastabend@gmail.com,
kpsingh@kernel.org, sdf@fomichev.me, haoluo@google.com,
jolsa@kernel.org, mykolal@fb.com, shuah@kernel.org,
cgroups@vger.kernel.org, bpf@vger.kernel.org,
linux-kselftest@vger.kernel.org, tixxdz@opendz.org
Cc: Djalal Harouni <tixxdz@gmail.com>
Subject: [RFC PATCH v2 bpf-next 1/3] kernfs: cgroup: support writing cgroup interfaces from a kernfs node
Date: Mon, 18 Aug 2025 10:04:22 +0100 [thread overview]
Message-ID: <20250818090424.90458-2-tixxdz@gmail.com> (raw)
In-Reply-To: <20250818090424.90458-1-tixxdz@gmail.com>
Freezing a cgroup of a task from BPF is better than user space which
could be too late and is subject to races. To achieve this allow writing to
cgroup core interfaces from BPF by adding a new kfunc helper that take a
kernfs node directly.
Currently only writing to "cgroup.freeze" on the default hierarchy is
allowed. The writing goes directly via a kernfs_node which allows to
share the same path as if a kernfs_node was opened from userspace.
Signed-off-by: Djalal Harouni <tixxdz@gmail.com>
---
include/linux/cgroup.h | 3 ++
kernel/cgroup/cgroup.c | 102 ++++++++++++++++++++++++++++++++++++++---
2 files changed, 99 insertions(+), 6 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b18fb5fcb38e..03a0782c94bf 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -125,6 +125,9 @@ int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
void cgroup_file_show(struct cgroup_file *cfile, bool show);
+ssize_t cgroup_kn_interface_write(struct kernfs_node *kn, const char *name__str,
+ const char *buf, size_t nbytes, loff_t off);
+
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 312c6a8b55bb..cddd7c1d354d 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -229,6 +229,24 @@ static struct file_system_type cgroup2_fs_type;
static struct cftype cgroup_base_files[];
static struct cftype cgroup_psi_files[];
+struct cgroup_kn_cftype {
+ char name[MAX_CFTYPE_NAME];
+ unsigned int namelen;
+
+ /*
+ * write() is the write operation on a kernfs node.
+ */
+ ssize_t (*write)(struct kernfs_node *kn, const char *buf, size_t nbytes,
+ loff_t off, bool revalidate);
+};
+
+#define CGROUP_PREFIX "cgroup."
+#define CGROUP_CORE_INTERFACE_FREEZE_SUFFIX "freeze"
+#define CGROUP_CORE_INTERFACE_FREEZE (CGROUP_PREFIX CGROUP_CORE_INTERFACE_FREEZE_SUFFIX)
+#define CGROUP_CORE_INTERFACE_FREEZE_LEN (sizeof(CGROUP_CORE_INTERFACE_FREEZE) - 1)
+
+static struct cgroup_kn_cftype kn_cfts[];
+
/* cgroup optional features */
enum cgroup_opt_features {
#ifdef CONFIG_PSI
@@ -4030,29 +4048,58 @@ static int cgroup_freeze_show(struct seq_file *seq, void *v)
return 0;
}
-static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
+static bool cgroup_kn_revalidate(struct cgroup *cgrp)
+{
+ if (!cgroup_on_dfl(cgrp) || !cgroup_parent(cgrp))
+ return false;
+
+ return true;
+}
+
+static ssize_t cgroup_kn_freeze(struct kernfs_node *kn,
+ const char *buf, size_t nbytes, loff_t off,
+ bool revalidate)
{
struct cgroup *cgrp;
ssize_t ret;
int freeze;
+ char b[4] = {0};
+
+ /* Handle userspace writes +(0|1)\n and fail otherwise */
+ ret = strscpy(b, buf, sizeof(b));
+ if (ret < 0)
+ return ret;
- ret = kstrtoint(strstrip(buf), 0, &freeze);
+ nbytes = ret;
+ ret = kstrtoint(strstrip(b), 0, &freeze);
if (ret)
return ret;
if (freeze < 0 || freeze > 1)
return -ERANGE;
- cgrp = cgroup_kn_lock_live(of->kn, false);
+ cgrp = cgroup_kn_lock_live(kn, false);
if (!cgrp)
return -ENOENT;
+ if (revalidate && !cgroup_kn_revalidate(cgrp)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
cgroup_freeze(cgrp, freeze);
- cgroup_kn_unlock(of->kn);
+ ret = nbytes;
- return nbytes;
+out:
+ cgroup_kn_unlock(kn);
+ return ret;
+}
+
+static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ return cgroup_kn_freeze(of->kn, buf, nbytes, off, false);
}
static void __cgroup_kill(struct cgroup *cgrp)
@@ -4601,6 +4648,49 @@ void cgroup_file_show(struct cgroup_file *cfile, bool show)
kernfs_put(kn);
}
+static struct cgroup_kn_cftype kn_cfts[] = {
+ {
+ .name = CGROUP_CORE_INTERFACE_FREEZE,
+ .namelen = CGROUP_CORE_INTERFACE_FREEZE_LEN,
+ .write = cgroup_kn_freeze,
+ },
+ { },
+};
+
+static const struct cgroup_kn_cftype *cgroup_kn_cft(const char *name__str)
+{
+ struct cgroup_kn_cftype *kn_cft;
+
+ for (kn_cft = kn_cfts; kn_cft && kn_cft->name[0] != '\0'; kn_cft++) {
+ if (!strncmp(name__str, kn_cft->name, kn_cft->namelen))
+ return kn_cft;
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+ssize_t cgroup_kn_interface_write(struct kernfs_node *kn, const char *name__str,
+ const char *buf, size_t nbytes, loff_t off)
+{
+ const struct cgroup_kn_cftype *kn_cft;
+
+ /* empty, do not remove */
+ if (!nbytes)
+ return 0;
+
+ if (kernfs_type(kn) != KERNFS_DIR)
+ return -ENOTDIR;
+
+ kn_cft = cgroup_kn_cft(name__str);
+ if (IS_ERR(kn_cft))
+ return PTR_ERR(kn_cft);
+
+ if (unlikely(!kn_cft->write))
+ return -EOPNOTSUPP;
+
+ return kn_cft->write(kn, buf, nbytes, off, true);
+}
+
/**
* css_next_child - find the next child of a given css
* @pos: the current position (%NULL to initiate traversal)
--
2.43.0
next prev parent reply other threads:[~2025-08-18 9:05 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-18 9:04 [RFC PATCH v2 bpf-next 0/3] bpf: cgroup: support writing and freezing cgroups from BPF Djalal Harouni
2025-08-18 9:04 ` Djalal Harouni [this message]
2025-08-18 9:04 ` [RFC PATCH v2 bpf-next 2/3] bpf: cgroup: Add BPF Kfunc to write and freeze a cgroup Djalal Harouni
2025-08-18 9:04 ` [RFC PATCH v2 bpf-next 3/3] selftests/bpf: add selftest for bpf_cgroup_write_interface Djalal Harouni
2025-08-18 17:32 ` [RFC PATCH v2 bpf-next 0/3] bpf: cgroup: support writing and freezing cgroups from BPF Tejun Heo
2025-08-19 23:31 ` Djalal Harouni
2025-08-19 23:36 ` Djalal Harouni
2025-08-20 1:14 ` Tejun Heo
2025-08-22 18:16 ` Djalal Harouni
2025-08-25 18:48 ` Tejun Heo
2025-08-26 3:45 ` Alexei Starovoitov
2025-08-26 10:23 ` Djalal Harouni
2025-08-26 14:18 ` Michal Koutný
2025-08-26 23:27 ` Djalal Harouni
2025-08-28 14:38 ` Michal Koutný
2025-09-01 19:53 ` Djalal Harouni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250818090424.90458-2-tixxdz@gmail.com \
--to=tixxdz@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=hannes@cmpxchg.org \
--cc=haoluo@google.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=mkoutny@suse.com \
--cc=mykolal@fb.com \
--cc=sdf@fomichev.me \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=tixxdz@opendz.org \
--cc=tj@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).