Linux cgroups development
 help / color / mirror / Atom feed
From: Tao Cui <cuitao@kylinos.cn>
To: tj@kernel.org, hannes@cmpxchg.org, mkoutny@suse.com,
	cgroups@vger.kernel.org
Cc: Tao Cui <cuitao@kylinos.cn>
Subject: [PATCH v3 3/4] cgroup/rdma: add rdma.events.local for per-cgroup allocation failure attribution
Date: Thu, 14 May 2026 14:50:33 +0800	[thread overview]
Message-ID: <20260514065034.387197-4-cuitao@kylinos.cn> (raw)
In-Reply-To: <20260514065034.387197-1-cuitao@kylinos.cn>

Add per-cgroup local event counters to track RDMA resource limit
exhaustion from the perspective of individual cgroups. The
rdma.events.local file reports two per-resource counters:

- max: number of times this cgroup's limit was the one that blocked
  an allocation in the subtree
- alloc_fail: number of allocation attempts originating from this
  cgroup that failed due to an ancestor's limit

This mirrors the design of pids.events.local, where events are
attributed to the cgroup that imposed the limit, not necessarily the
cgroup where the allocation was attempted.

Also extend rdma.events with a hierarchical alloc_fail counter that
tracks allocation failures propagating upward from the requesting
cgroup, complementing the existing max counter, so that rdma.events
and rdma.events.local share the same output format.

Signed-off-by: Tao Cui <cuitao@kylinos.cn>
---
 include/linux/cgroup_rdma.h |   3 +-
 kernel/cgroup/rdma.c        | 143 +++++++++++++++++++++++++++---------
 2 files changed, 109 insertions(+), 37 deletions(-)

diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h
index ac691fe7d3f5..404e746552ca 100644
--- a/include/linux/cgroup_rdma.h
+++ b/include/linux/cgroup_rdma.h
@@ -25,8 +25,9 @@ struct rdma_cgroup {
 	 */
 	struct list_head		rpools;
 
-	/* Handle for rdma.events */
+	/* Handles for rdma.events[.local] */
 	struct cgroup_file		events_file;
+	struct cgroup_file		events_local_file;
 };
 
 struct rdmacg_device {
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index 927bbf1eb949..7c238a9d64d4 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -82,8 +82,11 @@ struct rdmacg_resource_pool {
 	/* total number counts which are set to max */
 	int			num_max_cnt;
 
-	/* per-resource hierarchical max event counters */
+	/* per-resource event counters */
 	u64			events_max[RDMACG_RESOURCE_MAX];
+	u64			events_alloc_fail[RDMACG_RESOURCE_MAX];
+	u64			events_local_max[RDMACG_RESOURCE_MAX];
+	u64			events_local_alloc_fail[RDMACG_RESOURCE_MAX];
 };
 
 static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
@@ -131,6 +134,26 @@ static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
 	kfree(rpool);
 }
 
+static bool rpool_has_persistent_state(struct rdmacg_resource_pool *rpool)
+{
+	int i;
+
+	/*
+	 * Keep the rpool alive if any peak value is non-zero,
+	 * so that rdma.peak persists as a historical high-
+	 * watermark even after all resources are freed.
+	 */
+	for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
+		if (rpool->resources[i].peak ||
+		    READ_ONCE(rpool->events_max[i]) ||
+		    READ_ONCE(rpool->events_local_max[i]) ||
+		    READ_ONCE(rpool->events_alloc_fail[i]) ||
+		    READ_ONCE(rpool->events_local_alloc_fail[i]))
+			return true;
+	}
+	return false;
+}
+
 static struct rdmacg_resource_pool *
 find_cg_rpool_locked(struct rdma_cgroup *cg,
 		     struct rdmacg_device *device)
@@ -209,37 +232,30 @@ uncharge_cg_locked(struct rdma_cgroup *cg,
 	rpool->usage_sum--;
 	if (rpool->usage_sum == 0 &&
 	    rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
-		int i;
-
-		/*
-		 * Keep the rpool alive if any peak value is non-zero,
-		 * so that rdma.peak persists as a historical high-
-		 * watermark even after all resources are freed.
-		 */
-		for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
-			if (rpool->resources[i].peak ||
-			    READ_ONCE(rpool->events_max[i]))
-				return;
+		if (!rpool_has_persistent_state(rpool)) {
+			/*
+			 * No user of the rpool and all entries are set to max, so
+			 * safe to delete this rpool.
+			 */
+			free_cg_rpool_locked(rpool);
 		}
-		/*
-		 * No user of the rpool and all entries are set to max, so
-		 * safe to delete this rpool.
-		 */
-		free_cg_rpool_locked(rpool);
 	}
 }
 
 /**
- * rdmacg_event_locked - fire hierarchical max event when resource limit is hit
+ * rdmacg_event_locked - fire event when resource allocation exceeds limit
+ * @cg: requesting cgroup
  * @over_cg: cgroup whose limit was exceeded
  * @device: rdma device
  * @index: resource type index
  *
- * Must be called under rdmacg_mutex. Propagates max event counts
- * from @over_cg (including itself) upward to all ancestors with
- * an rpool and notifies userspace.
+ * Must be called under rdmacg_mutex. Updates event counters in the
+ * resource pools of @cg and @over_cg, propagates hierarchical max
+ * events from @over_cg (including itself) upward, and notifies
+ * userspace via cgroup_file_notify().
  */
-static void rdmacg_event_locked(struct rdma_cgroup *over_cg,
+static void rdmacg_event_locked(struct rdma_cgroup *cg,
+				struct rdma_cgroup *over_cg,
 				struct rdmacg_device *device,
 				enum rdmacg_resource_type index)
 {
@@ -248,6 +264,21 @@ static void rdmacg_event_locked(struct rdma_cgroup *over_cg,
 
 	lockdep_assert_held(&rdmacg_mutex);
 
+	/* Increment local alloc_fail in requesting cgroup */
+	rpool = find_cg_rpool_locked(cg, device);
+	if (rpool) {
+		rpool->events_local_alloc_fail[index]++;
+		cgroup_file_notify(&cg->events_local_file);
+	}
+
+	/* Increment local max in the over-limit cgroup */
+	rpool = find_cg_rpool_locked(over_cg, device);
+	if (rpool) {
+		rpool->events_local_max[index]++;
+		cgroup_file_notify(&over_cg->events_local_file);
+	}
+
+	/* Propagate hierarchical max events upward */
 	for (p = over_cg; parent_rdmacg(p); p = parent_rdmacg(p)) {
 		rpool = get_cg_rpool_locked(p, device);
 		if (!IS_ERR(rpool)) {
@@ -255,6 +286,14 @@ static void rdmacg_event_locked(struct rdma_cgroup *over_cg,
 			cgroup_file_notify(&p->events_file);
 		}
 	}
+	/* Propagate hierarchical alloc_fail from requesting cgroup upward */
+	for (p = cg; parent_rdmacg(p); p = parent_rdmacg(p)) {
+		rpool = get_cg_rpool_locked(p, device);
+		if (!IS_ERR(rpool)) {
+			rpool->events_alloc_fail[index]++;
+			cgroup_file_notify(&p->events_file);
+		}
+	}
 }
 
 /**
@@ -368,7 +407,7 @@ int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
 
 err:
 	if (ret == -EAGAIN)
-		rdmacg_event_locked(p, device, index);
+		rdmacg_event_locked(cg, p, device, index);
 	mutex_unlock(&rdmacg_mutex);
 	rdmacg_uncharge_hierarchy(cg, device, p, index);
 	return ret;
@@ -525,18 +564,13 @@ static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
 
 	if (rpool->usage_sum == 0 &&
 	    rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
-		int i;
-
-		for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
-			if (rpool->resources[i].peak ||
-			    READ_ONCE(rpool->events_max[i]))
-				goto dev_err;
+		if (!rpool_has_persistent_state(rpool)) {
+			/*
+			 * No user of the rpool and all entries are set to max, so
+			 * safe to delete this rpool.
+			 */
+			free_cg_rpool_locked(rpool);
 		}
-		/*
-		 * No user of the rpool and all entries are set to max, so
-		 * safe to delete this rpool.
-		 */
-		free_cg_rpool_locked(rpool);
 	}
 
 dev_err:
@@ -618,9 +652,40 @@ static int rdmacg_events_show(struct seq_file *sf, void *v)
 
 		seq_printf(sf, "%s ", device->name);
 		for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
-			seq_printf(sf, "%s.max=%llu",
+			seq_printf(sf, "%s.max=%llu %s.alloc_fail=%llu",
+				   rdmacg_resource_names[i],
+				   rpool ? READ_ONCE(rpool->events_max[i]) : 0ULL,
+				   rdmacg_resource_names[i],
+				   rpool ? READ_ONCE(rpool->events_alloc_fail[i]) : 0ULL);
+			if (i < RDMACG_RESOURCE_MAX - 1)
+				seq_putc(sf, ' ');
+		}
+		seq_putc(sf, '\n');
+	}
+
+	mutex_unlock(&rdmacg_mutex);
+	return 0;
+}
+
+static int rdmacg_events_local_show(struct seq_file *sf, void *v)
+{
+	struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
+	struct rdmacg_resource_pool *rpool;
+	struct rdmacg_device *device;
+	int i;
+
+	mutex_lock(&rdmacg_mutex);
+
+	list_for_each_entry(device, &rdmacg_devices, dev_node) {
+		rpool = find_cg_rpool_locked(cg, device);
+
+		seq_printf(sf, "%s ", device->name);
+		for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
+			seq_printf(sf, "%s.max=%llu %s.alloc_fail=%llu",
+				   rdmacg_resource_names[i],
+				   rpool ? READ_ONCE(rpool->events_local_max[i]) : 0ULL,
 				   rdmacg_resource_names[i],
-				   rpool ? READ_ONCE(rpool->events_max[i]) : 0ULL);
+				   rpool ? READ_ONCE(rpool->events_local_alloc_fail[i]) : 0ULL);
 			if (i < RDMACG_RESOURCE_MAX - 1)
 				seq_putc(sf, ' ');
 		}
@@ -657,6 +722,12 @@ static struct cftype rdmacg_files[] = {
 		.file_offset = offsetof(struct rdma_cgroup, events_file),
 		.flags = CFTYPE_NOT_ON_ROOT,
 	},
+	{
+		.name = "events.local",
+		.seq_show = rdmacg_events_local_show,
+		.file_offset = offsetof(struct rdma_cgroup, events_local_file),
+		.flags = CFTYPE_NOT_ON_ROOT,
+	},
 	{ }	/* terminate */
 };
 
-- 
2.43.0


  parent reply	other threads:[~2026-05-14  6:51 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-14  6:50 [PATCH v3 0/4] cgroup/rdma: add rdma.peak and rdma.events[.local] Tao Cui
2026-05-14  6:50 ` [PATCH v3 1/4] cgroup/rdma: add rdma.peak for per-device peak usage tracking Tao Cui
2026-05-14  6:50 ` [PATCH v3 2/4] cgroup/rdma: add rdma.events to track resource limit exhaustion Tao Cui
2026-05-14  6:50 ` Tao Cui [this message]
2026-05-14  6:50 ` [PATCH v3 4/4] cgroup/rdma: document rdma.peak, rdma.events and rdma.events.local Tao Cui
2026-05-14 21:26 ` [PATCH v3 0/4] cgroup/rdma: add rdma.peak and rdma.events[.local] Tejun Heo
2026-05-15  0:48   ` Tao Cui
2026-05-14 21:27 ` Tejun Heo
2026-05-14 21:27 ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260514065034.387197-4-cuitao@kylinos.cn \
    --to=cuitao@kylinos.cn \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=mkoutny@suse.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox