Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Sobin Thomas <sobin.thomas@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: zbigniew.kempczynski@intel.com, kamil.konieczny@intel.com,
	Sobin Thomas <sobin.thomas@intel.com>
Subject: [PATCH i-g-t] RFC: Add disk usage quota carryover for igt_runner
Date: Thu, 11 Jun 2026 13:18:55 +0000	[thread overview]
Message-ID: <20260611131855.368185-1-sobin.thomas@intel.com> (raw)

Add a quota carryover mechanism to the disk usage limit enforcement.
Instead of applying a hard per-test limit, unused disk budget from
frugal tests is accumulated in a quota pool and made available to
subsequent tests that need more space.

For each test:
  B = MIN(quota, L)       -- borrow from pool, capped at L
  effective_limit = L + B -- usable limit for this test
  quota += (effective_limit - actual_used) -- return leftover

This allows tests that write less than L bytes to subsidise tests
that occasionally need more, without raising the global limit.

The quota pool is seeded with L so the first test always receives
exactly 2xL as its limit.

Suggested-by: Zbigniew Kempczynski <zbigniew.kempczynski@intel.com>
Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
---
 runner/executor.c | 68 ++++++++++++++++++++++++++++++++++++++---------
 runner/executor.h |  1 +
 2 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/runner/executor.c b/runner/executor.c
index a8907c575..c1775095a 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -821,14 +821,13 @@ static const char *show_kernel_task_state(const char *msg)
 	return msg;
 }
 
-static bool disk_usage_limit_exceeded(struct settings *settings,
-				      size_t disk_usage)
+static bool disk_usage_limit_exceeded(size_t limit, size_t disk_usage)
 {
-	return settings->disk_usage_limit != 0 &&
-		disk_usage > settings->disk_usage_limit;
+	return limit != 0 && disk_usage > limit;
 }
 
 static const char *need_to_timeout(struct settings *settings,
+				   size_t effective_limit,
 				   int killed,
 				   unsigned long taints,
 				   double time_since_activity,
@@ -897,7 +896,7 @@ static const char *need_to_timeout(struct settings *settings,
 		return show_kernel_task_state("Inactivity timeout exceeded. Killing the current test with SIGQUIT.\n");
 	}
 
-	if (disk_usage_limit_exceeded(settings, disk_usage))
+	if (disk_usage_limit_exceeded(effective_limit, disk_usage))
 		return "Disk usage limit exceeded.\n";
 
 	return NULL;
@@ -961,6 +960,8 @@ static int monitor_output(pid_t child,
 			  int *outputs,
 			  double *time_spent,
 			  struct settings *settings,
+			  size_t effective_limit,
+			  size_t *total_disk_used,
 			  char **abortreason,
 			  bool *abort_already_written)
 {
@@ -981,6 +982,7 @@ static int monitor_output(pid_t child,
 	unsigned long taints = 0;
 	bool aborting = false;
 	size_t disk_usage = 0;
+	size_t total_disk_usage = 0;
 	size_t dmsg_chunk_size = 4096 * max_t(size_t, sysconf(_SC_NPROCESSORS_ONLN), 16);
 	long dmesgwritten;
 	bool socket_comms_used = false; /* whether the test actually uses comms */
@@ -1070,6 +1072,7 @@ static int monitor_output(pid_t child,
 
 			write(outputs[_F_OUT], buf, s);
 			disk_usage += s;
+			total_disk_usage += s;
 			if (settings->sync) {
 				fdatasync(outputs[_F_OUT]);
 			}
@@ -1162,6 +1165,7 @@ static int monitor_output(pid_t child,
 			} else {
 				write(outputs[_F_ERR], buf, s);
 				disk_usage += s;
+				total_disk_usage += s;
 				if (settings->sync) {
 					fdatasync(outputs[_F_ERR]);
 				}
@@ -1253,6 +1257,7 @@ static int monitor_output(pid_t child,
 
 				write_packet_with_canary(outputs[_F_SOCKET], packet, settings->sync);
 				disk_usage += packet->size;
+				total_disk_usage += packet->size;
 
 				if (packet->type == PACKETTYPE_SUBTEST_RESULT ||
 				    packet->type == PACKETTYPE_DYNAMIC_SUBTEST_RESULT)
@@ -1319,6 +1324,7 @@ static int monitor_output(pid_t child,
 				kmsgfd = -1;
 			} else {
 				disk_usage += dmesgwritten;
+				total_disk_usage += dmesgwritten;
 			}
 		}
 
@@ -1463,7 +1469,8 @@ static int monitor_output(pid_t child,
 				 * Same goes for stopping because we
 				 * exceeded the disk usage limit.
 				 */
-				if (killed && disk_usage_limit_exceeded(settings, disk_usage)) {
+				if (killed &&
+				    disk_usage_limit_exceeded(effective_limit, disk_usage)) {
 					timeoutresult = false;
 
 					if (socket_comms_used) {
@@ -1474,7 +1481,7 @@ static int monitor_output(pid_t child,
 							 "runner: This test was killed due to exceeding disk usage limit. "
 							 "(Used %zd bytes, limit %zd)\n",
 							 disk_usage,
-							 settings->disk_usage_limit);
+							 effective_limit);
 						message = runnerpacket_log(STDOUT_FILENO, killmsg);
 						write_packet_with_canary(outputs[_F_SOCKET], message, settings->sync);
 						free(message);
@@ -1483,7 +1490,7 @@ static int monitor_output(pid_t child,
 							"\nrunner: This test was killed due to exceeding disk usage limit. "
 							"(Used %zd bytes, limit %zd)\n",
 							disk_usage,
-							settings->disk_usage_limit);
+							effective_limit);
 						if (settings->sync)
 							fdatasync(outputs[_F_OUT]);
 					}
@@ -1533,7 +1540,7 @@ static int monitor_output(pid_t child,
 		}
 
 		igt_kernel_tainted(&taints);
-		timeout_reason = need_to_timeout(settings, killed,
+		timeout_reason = need_to_timeout(settings, effective_limit, killed,
 						 taints,
 						 igt_time_elapsed(&time_last_activity, &time_now),
 						 igt_time_elapsed(&time_last_subtest, &time_now),
@@ -1553,7 +1560,8 @@ static int monitor_output(pid_t child,
 					asprintf(abortreason, "Child refuses to die, tainted 0x%lx.", taints);
 				}
 
-				dmsg_chunk_size = calc_last_dmesg_chunk(settings->disk_usage_limit, disk_usage);
+				dmsg_chunk_size = calc_last_dmesg_chunk(effective_limit,
+									disk_usage);
 				dump_dmesg(kmsgfd, outputs[_F_DMESG], dmsg_chunk_size);
 				if (settings->sync)
 					fdatasync(outputs[_F_DMESG]);
@@ -1590,10 +1598,13 @@ static int monitor_output(pid_t child,
 		fdatasync(outputs[_F_DMESG]);
 	if (dmesgwritten > 0) {
 		disk_usage += dmesgwritten;
-		if (settings->disk_usage_limit && disk_usage > settings->disk_usage_limit) {
+		total_disk_usage += dmesgwritten;
+		if (effective_limit && disk_usage > effective_limit) {
 			char disk[1024];
 
-			snprintf(disk, sizeof(disk), "igt_runner: disk limit exceeded at dmesg dump, %zu > %zu\n", disk_usage, settings->disk_usage_limit);
+			snprintf(disk, sizeof(disk),
+				 "igt_runner: disk limit exceeded at dmesg dump, %zu > %zu\n",
+				 disk_usage, effective_limit);
 			if (settings->log_level >= LOG_LEVEL_NORMAL) {
 				outf("%s", disk);
 				fflush(stdout);
@@ -1610,6 +1621,8 @@ static int monitor_output(pid_t child,
 	close(socketfd);
 	close(kmsgfd);
 
+	*total_disk_used = total_disk_usage;
+
 	if (aborting)
 		return -1;
 
@@ -1770,6 +1783,8 @@ static int execute_next_entry(struct execute_state *state,
 			      struct job_list_entry *entry,
 			      int testdirfd, int resdirfd,
 			      int sigfd, sigset_t *sigmask,
+			      size_t effective_limit,
+			      size_t *disk_used_out,
 			      char **abortreason,
 			      bool *abort_already_written)
 {
@@ -1893,6 +1908,7 @@ static int execute_next_entry(struct execute_state *state,
 	result = monitor_output(child, outfd, errfd, socketfd,
 				kmsgfd, sigfd,
 				outputs, time_spent, settings,
+				effective_limit, disk_used_out,
 				abortreason, abort_already_written);
 
 out_kmsgfd:
@@ -2134,6 +2150,8 @@ bool initialize_execute_state_from_resume(int dirfd,
 	}
 
 	init_time_left(state, settings);
+	/* Seed with one limit so first test can borrow. */
+	state->quota = settings->disk_usage_limit;
 
 	for (i = list->size; i >= 0; i--) {
 		char name[32];
@@ -2214,6 +2232,8 @@ bool initialize_execute_state(struct execute_state *state,
 	init_time_left(state, settings);
 
 	state->dry = settings->dry_run;
+	/* Seed with one limit so first test can borrow. */
+	state->quota = settings->disk_usage_limit;
 
 	return true;
 }
@@ -2644,6 +2664,23 @@ bool execute(struct execute_state *state,
 		}
 
 		if (reason == NULL) {
+			size_t disk_used = 0;
+			size_t effective_limit = settings->disk_usage_limit;
+
+			/*
+			 * Quota carryover: borrow unused budget from previous
+			 * tests (up to L) so frugal tests subsidise heavy ones.
+			 *   B = MIN(Q, L)        -- borrow from quota
+			 *   U = L + B            -- usable limit for this test
+			 *   Q += (U - actual)    -- return leftover to quota
+			 */
+			if (settings->disk_usage_limit != 0) {
+				size_t borrow = min_t(size_t, state->quota,
+						       settings->disk_usage_limit);
+				state->quota -= borrow;
+				effective_limit = settings->disk_usage_limit + borrow;
+			}
+
 			result = execute_next_entry(state,
 						    job_list->size,
 						    &time_spent,
@@ -2651,8 +2688,15 @@ bool execute(struct execute_state *state,
 						    &job_list->entries[state->next],
 						    testdirfd, resdirfd,
 						    sigfd, &sigmask,
+						    effective_limit, &disk_used,
 						    &reason, &already_written);
 
+			if (settings->disk_usage_limit != 0) {
+				size_t leftover = (disk_used < effective_limit) ?
+					effective_limit - disk_used : 0;
+				state->quota += leftover;
+			}
+
 			if (settings->cov_results_per_test) {
 				code_coverage_stop(settings, job_name, sigfd, &reason);
 				free(job_name);
diff --git a/runner/executor.h b/runner/executor.h
index bc6ac80dc..e2210dac9 100644
--- a/runner/executor.h
+++ b/runner/executor.h
@@ -14,6 +14,7 @@ struct execute_state
 	 */
 	double time_left;
 	bool dry;
+	size_t quota; /* accumulated unused disk budget from previous tests */
 };
 
 enum {
-- 
2.43.0


             reply	other threads:[~2026-06-11 13:19 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-11 13:18 Sobin Thomas [this message]
2026-06-11 16:21 ` ✓ i915.CI.BAT: success for RFC: Add disk usage quota carryover for igt_runner Patchwork
2026-06-11 17:13 ` ✓ Xe.CI.BAT: " Patchwork
2026-06-12  6:48 ` ✗ Xe.CI.FULL: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260611131855.368185-1-sobin.thomas@intel.com \
    --to=sobin.thomas@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=kamil.konieczny@intel.com \
    --cc=zbigniew.kempczynski@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox