public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next 1/2] bpf: add new BPF_CGROUP_ITER_CHILDREN_ONLY control option
@ 2026-01-21 13:54 Matt Bobrowski
  2026-01-21 13:54 ` [PATCH bpf-next 2/2] bpf/selftests: cover " Matt Bobrowski
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Matt Bobrowski @ 2026-01-21 13:54 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	ohn Fastabend, KP Singh, Stanislav Fomichev, Jiri Olsa,
	Roman Gushchin, Chuyi Zhou, Tejun Heo, Matt Bobrowski

Currently, the BPF cgroup iterator supports walking descendants in
either pre-order (BPF_CGROUP_ITER_DESCENDANTS_PRE) or post-order
(BPF_CGROUP_ITER_DESCENDANTS_POST). These modes perform an exhaustive
depth-first search (DFS) of the hierarchy. In scenarios where a BPF
program may need to inspect only the direct children of a given parent
cgroup, a full DFS is unnecessarily expensive.

This patch introduces a new BPF cgroup iterator control option,
BPF_CGROUP_ITER_CHILDREN_ONLY. This control option restricts the
traversal to the immediate children of a specified parent cgroup,
allowing for more targeted and efficient iteration, particularly when
exhaustive depth-first search (DFS) traversal is not required.

Signed-off-by: Matt Bobrowski <mattbobrowski@google.com>
---
 include/uapi/linux/bpf.h       | 16 ++++++++++++----
 kernel/bpf/cgroup_iter.c       | 26 +++++++++++++++++++++-----
 tools/include/uapi/linux/bpf.h | 16 ++++++++++++----
 3 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2a2ade4be60f..eae8f9133df2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -115,10 +115,18 @@ struct bpf_cgroup_storage_key {
 
 enum bpf_cgroup_iter_order {
 	BPF_CGROUP_ITER_ORDER_UNSPEC = 0,
-	BPF_CGROUP_ITER_SELF_ONLY,		/* process only a single object. */
-	BPF_CGROUP_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
-	BPF_CGROUP_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
-	BPF_CGROUP_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+	BPF_CGROUP_ITER_SELF_ONLY, 		/* process only a single object. */
+	BPF_CGROUP_ITER_DESCENDANTS_PRE, 	/* walk descendants in pre-order. */
+	BPF_CGROUP_ITER_DESCENDANTS_POST, 	/* walk descendants in post-order. */
+	BPF_CGROUP_ITER_ANCESTORS_UP, 		/* walk ancestors upward. */
+	/*
+	 * Walks the immediate children of the specified parent
+	 * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
+	 * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
+	 * the iterator does not include the specified parent as one of the
+	 * returned iterator elements.
+	 */
+	BPF_CGROUP_ITER_CHILDREN_ONLY,
 };
 
 union bpf_iter_link_info {
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index f04a468cf6a7..bca95bbfecf0 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -8,12 +8,13 @@
 
 #include "../cgroup/cgroup-internal.h"  /* cgroup_mutex and cgroup_is_dead */
 
-/* cgroup_iter provides four modes of traversal to the cgroup hierarchy.
+/* cgroup_iter provides five modes of traversal to the cgroup hierarchy.
  *
  *  1. Walk the descendants of a cgroup in pre-order.
  *  2. Walk the descendants of a cgroup in post-order.
  *  3. Walk the ancestors of a cgroup.
  *  4. Show the given cgroup only.
+ *  5. Walk only the children of a given parent cgroup.
  *
  * For walking descendants, cgroup_iter can walk in either pre-order or
  * post-order. For walking ancestors, the iter walks up from a cgroup to
@@ -78,6 +79,8 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
 		return css_next_descendant_pre(NULL, p->start_css);
 	else if (p->order == BPF_CGROUP_ITER_DESCENDANTS_POST)
 		return css_next_descendant_post(NULL, p->start_css);
+	else if (p->order == BPF_CGROUP_ITER_CHILDREN_ONLY)
+		return css_next_child(NULL, p->start_css);
 	else /* BPF_CGROUP_ITER_SELF_ONLY and BPF_CGROUP_ITER_ANCESTORS_UP */
 		return p->start_css;
 }
@@ -113,6 +116,8 @@ static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return css_next_descendant_post(curr, p->start_css);
 	else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP)
 		return curr->parent;
+	else if (p->order == BPF_CGROUP_ITER_CHILDREN_ONLY)
+		return css_next_child(curr, p->start_css);
 	else  /* BPF_CGROUP_ITER_SELF_ONLY */
 		return NULL;
 }
@@ -200,11 +205,16 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
 	int order = linfo->cgroup.order;
 	struct cgroup *cgrp;
 
-	if (order != BPF_CGROUP_ITER_DESCENDANTS_PRE &&
-	    order != BPF_CGROUP_ITER_DESCENDANTS_POST &&
-	    order != BPF_CGROUP_ITER_ANCESTORS_UP &&
-	    order != BPF_CGROUP_ITER_SELF_ONLY)
+	switch (order) {
+	case BPF_CGROUP_ITER_DESCENDANTS_PRE:
+	case BPF_CGROUP_ITER_DESCENDANTS_POST:
+	case BPF_CGROUP_ITER_ANCESTORS_UP:
+	case BPF_CGROUP_ITER_SELF_ONLY:
+	case BPF_CGROUP_ITER_CHILDREN_ONLY:
+		break;
+	default:
 		return -EINVAL;
+	}
 
 	if (fd && id)
 		return -EINVAL;
@@ -257,6 +267,8 @@ static void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux,
 		seq_puts(seq, "order: descendants_post\n");
 	else if (aux->cgroup.order == BPF_CGROUP_ITER_ANCESTORS_UP)
 		seq_puts(seq, "order: ancestors_up\n");
+	else if (aux->cgroup.order == BPF_CGROUP_ITER_CHILDREN_ONLY)
+		seq_puts(seq, "order: children_only\n");
 	else /* BPF_CGROUP_ITER_SELF_ONLY */
 		seq_puts(seq, "order: self_only\n");
 }
@@ -320,6 +332,7 @@ __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
 	case BPF_CGROUP_ITER_DESCENDANTS_PRE:
 	case BPF_CGROUP_ITER_DESCENDANTS_POST:
 	case BPF_CGROUP_ITER_ANCESTORS_UP:
+	case BPF_CGROUP_ITER_CHILDREN_ONLY:
 		break;
 	default:
 		return -EINVAL;
@@ -345,6 +358,9 @@ __bpf_kfunc struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *i
 	case BPF_CGROUP_ITER_DESCENDANTS_POST:
 		kit->pos = css_next_descendant_post(kit->pos, kit->start);
 		break;
+	case BPF_CGROUP_ITER_CHILDREN_ONLY:
+		kit->pos = css_next_child(kit->pos, kit->start);
+		break;
 	case BPF_CGROUP_ITER_ANCESTORS_UP:
 		kit->pos = kit->pos ? kit->pos->parent : kit->start;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b816bc53d2e1..fa0686bbc638 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -115,10 +115,18 @@ struct bpf_cgroup_storage_key {
 
 enum bpf_cgroup_iter_order {
 	BPF_CGROUP_ITER_ORDER_UNSPEC = 0,
-	BPF_CGROUP_ITER_SELF_ONLY,		/* process only a single object. */
-	BPF_CGROUP_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
-	BPF_CGROUP_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
-	BPF_CGROUP_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+	BPF_CGROUP_ITER_SELF_ONLY, 		/* process only a single object. */
+	BPF_CGROUP_ITER_DESCENDANTS_PRE, 	/* walk descendants in pre-order. */
+	BPF_CGROUP_ITER_DESCENDANTS_POST, 	/* walk descendants in post-order. */
+	BPF_CGROUP_ITER_ANCESTORS_UP, 		/* walk ancestors upward. */
+	/*
+	 * Walks the immediate children of the specified parent
+	 * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
+	 * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
+	 * the iterator does not include the specified parent as one of the
+	 * returned iterator elements.
+	 */
+	BPF_CGROUP_ITER_CHILDREN_ONLY,
 };
 
 union bpf_iter_link_info {
-- 
2.52.0.457.g6b5491de43-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2026-01-27  8:28 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-21 13:54 [PATCH bpf-next 1/2] bpf: add new BPF_CGROUP_ITER_CHILDREN_ONLY control option Matt Bobrowski
2026-01-21 13:54 ` [PATCH bpf-next 2/2] bpf/selftests: cover " Matt Bobrowski
2026-01-21 19:14 ` [PATCH bpf-next 1/2] bpf: add new " Song Liu
2026-01-22 12:31   ` Matt Bobrowski
2026-01-23  4:26 ` Alexei Starovoitov
2026-01-23 11:06   ` Matt Bobrowski
2026-01-23 17:17     ` Alexei Starovoitov
2026-01-26  9:03       ` Matt Bobrowski
2026-01-27  2:26         ` Alexei Starovoitov
2026-01-27  8:28           ` Matt Bobrowski
2026-01-23 18:50     ` Tejun Heo
2026-01-26  9:14       ` Matt Bobrowski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox