From: Sobin Thomas <sobin.thomas@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: zbigniew.kempczynski@intel.com, kamil.konieczny@intel.com,
Sobin Thomas <sobin.thomas@intel.com>
Subject: [PATCH i-g-t] RFC: Add disk usage quota carryover for igt_runner
Date: Thu, 11 Jun 2026 13:18:55 +0000 [thread overview]
Message-ID: <20260611131855.368185-1-sobin.thomas@intel.com> (raw)
Add a quota carryover mechanism to the disk usage limit enforcement.
Instead of applying a hard per-test limit, unused disk budget from
frugal tests is accumulated in a quota pool and made available to
subsequent tests that need more space.
For each test:
B = MIN(quota, L) -- borrow from pool, capped at L
effective_limit = L + B -- usable limit for this test
quota += (effective_limit - actual_used) -- return leftover
This allows tests that write less than L bytes to subsidise tests
that occasionally need more, without raising the global limit.
The quota pool is seeded with L so the first test always receives
exactly 2xL as its limit.
Suggested-by: Zbigniew Kempczynski <zbigniew.kempczynski@intel.com>
Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
---
runner/executor.c | 68 ++++++++++++++++++++++++++++++++++++++---------
runner/executor.h | 1 +
2 files changed, 57 insertions(+), 12 deletions(-)
diff --git a/runner/executor.c b/runner/executor.c
index a8907c575..c1775095a 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -821,14 +821,13 @@ static const char *show_kernel_task_state(const char *msg)
return msg;
}
-static bool disk_usage_limit_exceeded(struct settings *settings,
- size_t disk_usage)
+static bool disk_usage_limit_exceeded(size_t limit, size_t disk_usage)
{
- return settings->disk_usage_limit != 0 &&
- disk_usage > settings->disk_usage_limit;
+ return limit != 0 && disk_usage > limit;
}
static const char *need_to_timeout(struct settings *settings,
+ size_t effective_limit,
int killed,
unsigned long taints,
double time_since_activity,
@@ -897,7 +896,7 @@ static const char *need_to_timeout(struct settings *settings,
return show_kernel_task_state("Inactivity timeout exceeded. Killing the current test with SIGQUIT.\n");
}
- if (disk_usage_limit_exceeded(settings, disk_usage))
+ if (disk_usage_limit_exceeded(effective_limit, disk_usage))
return "Disk usage limit exceeded.\n";
return NULL;
@@ -961,6 +960,8 @@ static int monitor_output(pid_t child,
int *outputs,
double *time_spent,
struct settings *settings,
+ size_t effective_limit,
+ size_t *total_disk_used,
char **abortreason,
bool *abort_already_written)
{
@@ -981,6 +982,7 @@ static int monitor_output(pid_t child,
unsigned long taints = 0;
bool aborting = false;
size_t disk_usage = 0;
+ size_t total_disk_usage = 0;
size_t dmsg_chunk_size = 4096 * max_t(size_t, sysconf(_SC_NPROCESSORS_ONLN), 16);
long dmesgwritten;
bool socket_comms_used = false; /* whether the test actually uses comms */
@@ -1070,6 +1072,7 @@ static int monitor_output(pid_t child,
write(outputs[_F_OUT], buf, s);
disk_usage += s;
+ total_disk_usage += s;
if (settings->sync) {
fdatasync(outputs[_F_OUT]);
}
@@ -1162,6 +1165,7 @@ static int monitor_output(pid_t child,
} else {
write(outputs[_F_ERR], buf, s);
disk_usage += s;
+ total_disk_usage += s;
if (settings->sync) {
fdatasync(outputs[_F_ERR]);
}
@@ -1253,6 +1257,7 @@ static int monitor_output(pid_t child,
write_packet_with_canary(outputs[_F_SOCKET], packet, settings->sync);
disk_usage += packet->size;
+ total_disk_usage += packet->size;
if (packet->type == PACKETTYPE_SUBTEST_RESULT ||
packet->type == PACKETTYPE_DYNAMIC_SUBTEST_RESULT)
@@ -1319,6 +1324,7 @@ static int monitor_output(pid_t child,
kmsgfd = -1;
} else {
disk_usage += dmesgwritten;
+ total_disk_usage += dmesgwritten;
}
}
@@ -1463,7 +1469,8 @@ static int monitor_output(pid_t child,
* Same goes for stopping because we
* exceeded the disk usage limit.
*/
- if (killed && disk_usage_limit_exceeded(settings, disk_usage)) {
+ if (killed &&
+ disk_usage_limit_exceeded(effective_limit, disk_usage)) {
timeoutresult = false;
if (socket_comms_used) {
@@ -1474,7 +1481,7 @@ static int monitor_output(pid_t child,
"runner: This test was killed due to exceeding disk usage limit. "
"(Used %zd bytes, limit %zd)\n",
disk_usage,
- settings->disk_usage_limit);
+ effective_limit);
message = runnerpacket_log(STDOUT_FILENO, killmsg);
write_packet_with_canary(outputs[_F_SOCKET], message, settings->sync);
free(message);
@@ -1483,7 +1490,7 @@ static int monitor_output(pid_t child,
"\nrunner: This test was killed due to exceeding disk usage limit. "
"(Used %zd bytes, limit %zd)\n",
disk_usage,
- settings->disk_usage_limit);
+ effective_limit);
if (settings->sync)
fdatasync(outputs[_F_OUT]);
}
@@ -1533,7 +1540,7 @@ static int monitor_output(pid_t child,
}
igt_kernel_tainted(&taints);
- timeout_reason = need_to_timeout(settings, killed,
+ timeout_reason = need_to_timeout(settings, effective_limit, killed,
taints,
igt_time_elapsed(&time_last_activity, &time_now),
igt_time_elapsed(&time_last_subtest, &time_now),
@@ -1553,7 +1560,8 @@ static int monitor_output(pid_t child,
asprintf(abortreason, "Child refuses to die, tainted 0x%lx.", taints);
}
- dmsg_chunk_size = calc_last_dmesg_chunk(settings->disk_usage_limit, disk_usage);
+ dmsg_chunk_size = calc_last_dmesg_chunk(effective_limit,
+ disk_usage);
dump_dmesg(kmsgfd, outputs[_F_DMESG], dmsg_chunk_size);
if (settings->sync)
fdatasync(outputs[_F_DMESG]);
@@ -1590,10 +1598,13 @@ static int monitor_output(pid_t child,
fdatasync(outputs[_F_DMESG]);
if (dmesgwritten > 0) {
disk_usage += dmesgwritten;
- if (settings->disk_usage_limit && disk_usage > settings->disk_usage_limit) {
+ total_disk_usage += dmesgwritten;
+ if (effective_limit && disk_usage > effective_limit) {
char disk[1024];
- snprintf(disk, sizeof(disk), "igt_runner: disk limit exceeded at dmesg dump, %zu > %zu\n", disk_usage, settings->disk_usage_limit);
+ snprintf(disk, sizeof(disk),
+ "igt_runner: disk limit exceeded at dmesg dump, %zu > %zu\n",
+ disk_usage, effective_limit);
if (settings->log_level >= LOG_LEVEL_NORMAL) {
outf("%s", disk);
fflush(stdout);
@@ -1610,6 +1621,8 @@ static int monitor_output(pid_t child,
close(socketfd);
close(kmsgfd);
+ *total_disk_used = total_disk_usage;
+
if (aborting)
return -1;
@@ -1770,6 +1783,8 @@ static int execute_next_entry(struct execute_state *state,
struct job_list_entry *entry,
int testdirfd, int resdirfd,
int sigfd, sigset_t *sigmask,
+ size_t effective_limit,
+ size_t *disk_used_out,
char **abortreason,
bool *abort_already_written)
{
@@ -1893,6 +1908,7 @@ static int execute_next_entry(struct execute_state *state,
result = monitor_output(child, outfd, errfd, socketfd,
kmsgfd, sigfd,
outputs, time_spent, settings,
+ effective_limit, disk_used_out,
abortreason, abort_already_written);
out_kmsgfd:
@@ -2134,6 +2150,8 @@ bool initialize_execute_state_from_resume(int dirfd,
}
init_time_left(state, settings);
+ /* Seed with one limit so first test can borrow. */
+ state->quota = settings->disk_usage_limit;
for (i = list->size; i >= 0; i--) {
char name[32];
@@ -2214,6 +2232,8 @@ bool initialize_execute_state(struct execute_state *state,
init_time_left(state, settings);
state->dry = settings->dry_run;
+ /* Seed with one limit so first test can borrow. */
+ state->quota = settings->disk_usage_limit;
return true;
}
@@ -2644,6 +2664,23 @@ bool execute(struct execute_state *state,
}
if (reason == NULL) {
+ size_t disk_used = 0;
+ size_t effective_limit = settings->disk_usage_limit;
+
+ /*
+ * Quota carryover: borrow unused budget from previous
+ * tests (up to L) so frugal tests subsidise heavy ones.
+ * B = MIN(Q, L) -- borrow from quota
+ * U = L + B -- usable limit for this test
+ * Q += (U - actual) -- return leftover to quota
+ */
+ if (settings->disk_usage_limit != 0) {
+ size_t borrow = min_t(size_t, state->quota,
+ settings->disk_usage_limit);
+ state->quota -= borrow;
+ effective_limit = settings->disk_usage_limit + borrow;
+ }
+
result = execute_next_entry(state,
job_list->size,
&time_spent,
@@ -2651,8 +2688,15 @@ bool execute(struct execute_state *state,
&job_list->entries[state->next],
testdirfd, resdirfd,
sigfd, &sigmask,
+ effective_limit, &disk_used,
&reason, &already_written);
+ if (settings->disk_usage_limit != 0) {
+ size_t leftover = (disk_used < effective_limit) ?
+ effective_limit - disk_used : 0;
+ state->quota += leftover;
+ }
+
if (settings->cov_results_per_test) {
code_coverage_stop(settings, job_name, sigfd, &reason);
free(job_name);
diff --git a/runner/executor.h b/runner/executor.h
index bc6ac80dc..e2210dac9 100644
--- a/runner/executor.h
+++ b/runner/executor.h
@@ -14,6 +14,7 @@ struct execute_state
*/
double time_left;
bool dry;
+ size_t quota; /* accumulated unused disk budget from previous tests */
};
enum {
--
2.43.0
next reply other threads:[~2026-06-11 13:19 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-11 13:18 Sobin Thomas [this message]
2026-06-11 16:21 ` ✓ i915.CI.BAT: success for RFC: Add disk usage quota carryover for igt_runner Patchwork
2026-06-11 17:13 ` ✓ Xe.CI.BAT: " Patchwork
2026-06-12 6:48 ` ✗ Xe.CI.FULL: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260611131855.368185-1-sobin.thomas@intel.com \
--to=sobin.thomas@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=kamil.konieczny@intel.com \
--cc=zbigniew.kempczynski@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox