public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Petri Latvala <petri.latvala@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: Petri Latvala <petri.latvala@intel.com>,
	Chris Wilson <chris@chris-wilson.co.uk>
Subject: [igt-dev] [PATCH i-g-t v4] runner: Correctly handle abort before first test
Date: Wed, 21 Dec 2022 13:42:13 +0200	[thread overview]
Message-ID: <20221221114213.2913884-1-petri.latvala@intel.com> (raw)

Don't leave the execution in a "please resume me" state if bootup
causes an abort condition. Especially handle the case of abort on
bootup when resuming correctly, so that it doesn't attempt to run a
test on a tainted kernel if we've explicitly configured the runner to
not execute when there's a taint.

v2: Fudge the results directory instead to get the desired results:
    runner exits with nonzero, and resuming exits with "all done" instead
    of executing anything.

v3: Use faccessat instead of open+close, use less magic strings,
    remember to close fds (Chris)

v4: Use GRACEFUL_EXITCODE in monitor_output, remove the 'resuming'
    field (why was it a double?!). (Ryszard)
    Stop trying to execute if all tests are already run, to avoid a
    crash in environment validation.

Signed-off-by: Petri Latvala <petri.latvala@intel.com>
Cc: Arkadiusz Hiler <arek@hiler.eu>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kamil Konieczny <kamil.konieczny@linux.intel.com>
Cc: Ryszard Knop <ryszard.knop@intel.com>
---
 runner/executor.c | 57 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/runner/executor.c b/runner/executor.c
index d2253082..e954c23e 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -37,6 +37,7 @@
 
 #define KMSG_HEADER "[IGT] "
 #define KMSG_WARN 4
+#define GRACEFUL_EXITCODE -SIGHUP
 
 static struct {
 	int *fds;
@@ -1249,7 +1250,7 @@ static int monitor_output(pid_t child,
 					} else {
 						dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n",
 							EXECUTOR_EXIT,
-							-SIGHUP, 0.0);
+							GRACEFUL_EXITCODE, 0.0);
 						if (settings->sync)
 							fdatasync(outputs[_F_JOURNAL]);
 					}
@@ -1720,6 +1721,41 @@ out_dirfd:
 	return result;
 }
 
+static void fill_results_directory_with_notruns(struct job_list *list,
+						int resdirfd)
+{
+	int outputs[_F_LAST];
+	char name[32];
+	int dirfd;
+	size_t i;
+
+	for (i = 0; i < list->size; i++) {
+		snprintf(name, sizeof(name), "%zd", i);
+
+		if (faccessat(resdirfd, name, F_OK, 0) == 0)
+			continue;
+
+		mkdirat(resdirfd, name, 0777);
+		dirfd = openat(resdirfd, name, O_DIRECTORY | O_RDONLY);
+		if (dirfd < 0) {
+			errf("Error accessing individual test result directory\n");
+			return;
+		}
+
+		if (!open_output_files(dirfd, outputs, true)) {
+			errf("Error opening output files\n");
+			close(dirfd);
+			return;
+		}
+
+		dprintf(outputs[_F_OUT], "Forced notrun result because of abort condition on bootup\n");
+		dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n", EXECUTOR_EXIT, GRACEFUL_EXITCODE, 0.0);
+
+		close_outputs(outputs);
+		close(dirfd);
+	}
+}
+
 static int remove_file(int dirfd, const char *name)
 {
 	return unlinkat(dirfd, name, 0) && errno != ENOENT;
@@ -1845,7 +1881,6 @@ bool initialize_execute_state_from_resume(int dirfd,
 	clear_settings(settings);
 	free_job_list(list);
 	memset(state, 0, sizeof(*state));
-	state->resuming = true;
 
 	if (!read_settings_from_dir(settings, dirfd) ||
 	    !read_job_list(list, dirfd)) {
@@ -2183,6 +2218,11 @@ bool execute(struct execute_state *state,
 		return true;
 	}
 
+	if (state->next >= job_list->size) {
+		outf("All tests already executed.\n");
+		return true;
+	}
+
 	igt_list_for_each_entry(env_var, &settings->env_vars, link) {
 		setenv(env_var->key, env_var->value, 1);
 	}
@@ -2271,7 +2311,7 @@ bool execute(struct execute_state *state,
 	close(unamefd);
 
 	/* Check if we're already in abort-state at bootup */
-	if (!state->resuming) {
+	{
 		char *reason;
 
 		if ((reason = need_to_abort(settings)) != NULL) {
@@ -2280,6 +2320,17 @@ bool execute(struct execute_state *state,
 			free(reason);
 			free(nexttest);
 
+			/*
+			 * If an abort condition happened at bootup,
+			 * assume that it happens on every boot,
+			 * making this test execution impossible.
+			 * Write stuff to the results directory
+			 * indicating this so resuming immediately
+			 * finishes instead of getting stuck in an
+			 * infinite reboot loop.
+			 */
+			fill_results_directory_with_notruns(job_list, resdirfd);
+
 			status = false;
 
 			goto end;
-- 
2.30.2

             reply	other threads:[~2022-12-21 11:41 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-21 11:42 Petri Latvala [this message]
2022-12-21 13:30 ` [igt-dev] ✓ Fi.CI.BAT: success for runner: Correctly handle abort before first test (rev4) Patchwork
2022-12-21 15:24 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2022-12-22 12:11 ` [igt-dev] [PATCH i-g-t v4] runner: Correctly handle abort before first test Kamil Konieczny
2023-01-05 19:25 ` Kamil Konieczny
2023-01-09  9:49   ` Petri Latvala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221221114213.2913884-1-petri.latvala@intel.com \
    --to=petri.latvala@intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=igt-dev@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox