public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Petri Latvala <petri.latvala@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>,
	Petri Latvala <petri.latvala@intel.com>,
	Daniel Vetter <daniel@ffwll.ch>
Subject: [igt-dev] [PATCH i-g-t 2/4] runner: Add support for aborting on network failure
Date: Mon,  9 Sep 2019 14:38:07 +0300	[thread overview]
Message-ID: <20190909113809.1229-2-petri.latvala@intel.com> (raw)
In-Reply-To: <20190909113809.1229-1-petri.latvala@intel.com>

If the network goes down while testing, CI tends to interpret that as
the device being down, cutting its power after a while. This causes an
incomplete to an innocent test, increasing noise in the results.

A new flag to --abort-on-monitored-error, "ping", uses liboping to
ping a host configured in .igtrc with one ping after each test
execution and aborts the run if there is no reply in a hardcoded
amount of time.

v2:
 - Use a higher timeout
 - Allow hostname configuration from environment
v3:
 - Use runner_c_args for holding c args for runner
 - Handle runner's meson options in runner/meson.build
 - Instead of one ping with 20 second timeout, ping with 1 second timeout
   for a duration of 20 seconds
v4:
 - Rebase
 - Use now-exported igt_load_igtrc instead of copypaste code
 - Use define for timeout, clearer var name for single attempt timeout

Signed-off-by: Petri Latvala <petri.latvala@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 meson_options.txt  |   4 ++
 runner/executor.c  | 129 +++++++++++++++++++++++++++++++++++++++++++++
 runner/meson.build |  14 ++++-
 runner/settings.c  |   4 ++
 runner/settings.h  |   5 +-
 5 files changed, 153 insertions(+), 3 deletions(-)

diff --git a/meson_options.txt b/meson_options.txt
index 8e2e1cf0..ff3abf08 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -42,6 +42,10 @@ option('runner',
        type : 'feature',
        description : 'Build test runner')
 
+option('oping',
+       type : 'feature',
+       description : 'Build test runner with liboping for pinging support')
+
 option('use_rpath',
        type : 'boolean',
        value : false,
diff --git a/runner/executor.c b/runner/executor.c
index 52fee7d1..6ac95663 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -1,6 +1,10 @@
 #include <errno.h>
 #include <fcntl.h>
+#include <glib.h>
 #include <linux/watchdog.h>
+#if HAVE_OPING
+#include <oping.h>
+#endif
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -17,6 +21,7 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "igt_aux.h"
 #include "igt_core.h"
 #include "executor.h"
 #include "output_strings.h"
@@ -130,6 +135,129 @@ static void ping_watchdogs(void)
 	}
 }
 
+#if HAVE_OPING
+static pingobj_t *pingobj = NULL;
+
+static bool load_ping_config_from_file(void)
+{
+	GError *error = NULL;
+	GKeyFile *key_file = NULL;
+	const char *ping_hostname;
+
+	/* Load igt config file */
+	key_file = igt_load_igtrc();
+	if (!key_file)
+		return false;
+
+	ping_hostname =
+		g_key_file_get_string(key_file, "DUT",
+				      "PingHostName", &error);
+
+	g_clear_error(&error);
+	g_key_file_free(key_file);
+
+	if (!ping_hostname)
+		return false;
+
+	if (ping_host_add(pingobj, ping_hostname)) {
+		fprintf(stderr,
+			"abort on ping: Cannot use hostname from config file\n");
+		return false;
+	}
+
+	return true;
+}
+
+static bool load_ping_config_from_env(void)
+{
+	const char *ping_hostname;
+
+	ping_hostname = getenv("IGT_PING_HOSTNAME");
+	if (!ping_hostname)
+		return false;
+
+	if (ping_host_add(pingobj, ping_hostname)) {
+		fprintf(stderr,
+			"abort on ping: Cannot use hostname from environment\n");
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * On some hosts, getting network back up after suspend takes
+ * upwards of 10 seconds. 20 seconds should be enough to see
+ * if network comes back at all, and hopefully not too long to
+ * make external monitoring freak out.
+ */
+#define PING_ABORT_DEADLINE 20
+
+static bool can_ping(void)
+{
+	igt_until_timeout(PING_ABORT_DEADLINE) {
+		pingobj_iter_t *iter;
+
+		ping_send(pingobj);
+
+		for (iter = ping_iterator_get(pingobj);
+		     iter != NULL;
+		     iter = ping_iterator_next(iter)) {
+			double latency;
+			size_t len = sizeof(latency);
+
+			ping_iterator_get_info(iter,
+					       PING_INFO_LATENCY,
+					       &latency,
+					       &len);
+			if (latency >= 0.0)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+#endif
+
+static void ping_config(void)
+{
+#if HAVE_OPING
+	double single_attempt_timeout = 1.0;
+
+	if (pingobj)
+		return;
+
+	pingobj = ping_construct();
+
+	/* Try env first, then config file */
+	if (!load_ping_config_from_env() && !load_ping_config_from_file()) {
+		fprintf(stderr,
+			"abort on ping: No host to ping configured\n");
+		ping_destroy(pingobj);
+		pingobj = NULL;
+		return;
+	}
+
+	ping_setopt(pingobj, PING_OPT_TIMEOUT, &single_attempt_timeout);
+#endif
+}
+
+static char *handle_ping(void)
+{
+#if HAVE_OPING
+	if (pingobj && !can_ping()) {
+		char *reason;
+
+		asprintf(&reason,
+			 "Ping host did not respond to ping, network down");
+		return reason;
+	}
+#endif
+
+	return NULL;
+}
+
 static char *handle_lockdep(void)
 {
 	const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
@@ -219,6 +347,7 @@ static const struct {
 } abort_handlers[] = {
 	{ ABORT_LOCKDEP, handle_lockdep },
 	{ ABORT_TAINT, handle_taint },
+	{ ABORT_PING, handle_ping },
 	{ 0, 0 },
 };
 
diff --git a/runner/meson.build b/runner/meson.build
index 86521f94..6d8d3ab2 100644
--- a/runner/meson.build
+++ b/runner/meson.build
@@ -13,6 +13,14 @@ runner_test_sources = [ 'runner_tests.c' ]
 runner_json_test_sources = [ 'runner_json_tests.c' ]
 
 jsonc = dependency('json-c', required: build_runner)
+runner_deps = [jsonc, glib]
+runner_c_args = []
+
+liboping = dependency('liboping', required: get_option('oping'))
+if liboping.found()
+	runner_deps += liboping
+	runner_c_args += '-DHAVE_OPING=1'
+endif
 
 if not build_tests and jsonc.found()
 	error('Building test runner requires building tests')
@@ -23,7 +31,8 @@ if jsonc.found()
 
 	runnerlib = static_library('igt_runner', runnerlib_sources,
 				   include_directories : inc,
-				   dependencies : [jsonc, glib])
+				   c_args : runner_c_args,
+				   dependencies : runner_deps)
 
 	runner = executable('igt_runner', runner_sources,
 			    link_with : runnerlib,
@@ -61,6 +70,9 @@ if jsonc.found()
 	test('runner_json', runner_json_test)
 
 	build_info += 'Build test runner: true'
+	if liboping.found()
+		build_info += 'Build test runner with oping: true'
+	endif
 else
 	build_info += 'Build test runner: false'
 endif
diff --git a/runner/settings.c b/runner/settings.c
index 8b39c063..d601cd11 100644
--- a/runner/settings.c
+++ b/runner/settings.c
@@ -51,6 +51,7 @@ static struct {
 } abort_conditions[] = {
 	{ ABORT_TAINT, "taint" },
 	{ ABORT_LOCKDEP, "lockdep" },
+	{ ABORT_PING, "ping" },
 	{ ABORT_ALL, "all" },
 	{ 0, 0 },
 };
@@ -140,6 +141,9 @@ static const char *usage_str =
 	"                        Possible conditions:\n"
 	"                         lockdep - abort when kernel lockdep has been angered.\n"
 	"                         taint   - abort when kernel becomes fatally tainted.\n"
+	"                         ping    - abort when a host configured in .igtrc or\n"
+	"                                   environment variable IGT_PING_HOSTNAME does\n"
+	"                                   not respond to ping.\n"
 	"                         all     - abort for all of the above.\n"
 	"  -s, --sync            Sync results to disk after every test\n"
 	"  -l {quiet,verbose,dummy}, --log-level {quiet,verbose,dummy}\n"
diff --git a/runner/settings.h b/runner/settings.h
index 6dcfa8c5..13409f04 100644
--- a/runner/settings.h
+++ b/runner/settings.h
@@ -15,9 +15,10 @@ enum {
 
 #define ABORT_TAINT   (1 << 0)
 #define ABORT_LOCKDEP (1 << 1)
-#define ABORT_ALL     (ABORT_TAINT | ABORT_LOCKDEP)
+#define ABORT_PING    (1 << 2)
+#define ABORT_ALL     (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING)
 
-_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP), "ABORT_ALL must be all conditions bitwise or'd");
+_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING), "ABORT_ALL must be all conditions bitwise or'd");
 
 struct regex_list {
 	char **regex_strings;
-- 
2.19.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

  reply	other threads:[~2019-09-09 11:38 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-09 11:38 [igt-dev] [PATCH i-g-t 1/4] lib: Export a function for loading igtrc Petri Latvala
2019-09-09 11:38 ` Petri Latvala [this message]
2019-09-10 12:43   ` [igt-dev] [PATCH i-g-t 2/4] runner: Add support for aborting on network failure Arkadiusz Hiler
2019-09-09 11:38 ` [igt-dev] [PATCH i-g-t 3/4] HAX: Check all conditions to abort Petri Latvala
2019-09-09 11:38 ` [igt-dev] [PATCH i-g-t 4/4] HAX: Print ping times to stderr Petri Latvala
2019-09-09 11:50   ` Petri Latvala
2019-09-09 12:12 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/4] lib: Export a function for loading igtrc Patchwork
2019-09-09 12:29 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/4] lib: Export a function for loading igtrc (rev2) Patchwork
2019-09-09 17:40 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/4] lib: Export a function for loading igtrc Patchwork
2019-09-09 18:31 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/4] lib: Export a function for loading igtrc (rev2) Patchwork
2019-09-10 12:41 ` [igt-dev] [PATCH i-g-t 1/4] lib: Export a function for loading igtrc Arkadiusz Hiler
2019-09-13  9:26   ` Petri Latvala
2019-09-13 10:50     ` Arkadiusz Hiler
2019-09-13 12:48       ` Petri Latvala
2019-09-16 13:00         ` Arkadiusz Hiler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190909113809.1229-2-petri.latvala@intel.com \
    --to=petri.latvala@intel.com \
    --cc=daniel@ffwll.ch \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=tomi.p.sarvela@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox