All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
@ 2017-11-29 12:30 Chris Wilson
  2017-11-29 12:40 ` Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Chris Wilson @ 2017-11-29 12:30 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

Checking for a tainted kernel is a convenient way to see if the test
generated a critical error such as a oops, or machine check.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
---
 lib/Makefile.sources   |  2 ++
 lib/igt_core.c         | 19 +++++++++-
 lib/igt_kernel_taint.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_kernel_taint.h | 34 ++++++++++++++++++
 4 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 lib/igt_kernel_taint.c
 create mode 100644 lib/igt_kernel_taint.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 6e968d9f..15215390 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -22,6 +22,8 @@ lib_source_list =	 	\
 	igt_gt.h		\
 	igt_gvt.c		\
 	igt_gvt.h		\
+	igt_kernel_taint.c	\
+	igt_kernel_taint.h	\
 	igt_primes.c		\
 	igt_primes.h		\
 	igt_rand.c		\
diff --git a/lib/igt_core.c b/lib/igt_core.c
index 6ce83bec..7e656323 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -63,6 +63,7 @@
 #include "intel_chipset.h"
 #include "intel_io.h"
 #include "igt_debugfs.h"
+#include "igt_kernel_taint.h"
 #include "version.h"
 #include "config.h"
 
@@ -258,6 +259,7 @@ static bool list_subtests = false;
 static char *run_single_subtest = NULL;
 static bool run_single_subtest_found = false;
 static const char *in_subtest = NULL;
+static unsigned long saved_kernel_taint;
 static struct timespec subtest_time;
 static clockid_t igt_clock = (clockid_t)-1;
 static bool in_fixture = false;
@@ -1002,6 +1004,8 @@ bool __igt_run_subtest(const char *subtest_name)
 		return false;
 	}
 
+	saved_kernel_taint = igt_read_kernel_taint();
+
 	kmsg(KERN_INFO "[IGT] %s: starting subtest %s\n", command_str, subtest_name);
 	igt_debug("Starting subtest: %s\n", subtest_name);
 
@@ -1148,8 +1152,21 @@ void __igt_skip_check(const char *file, const int line,
 void igt_success(void)
 {
 	succeeded_one = true;
-	if (in_subtest)
+	if (in_subtest) {
+		unsigned long new_kernel_taints =
+			igt_read_kernel_taint() & ~saved_kernel_taint;
+		unsigned int tainted = igt_kernel_tainted(new_kernel_taints);
+
+		if (tainted) {
+			igt_kernel_taint_print(new_kernel_taints);
+			if (tainted & TAINT_ERROR)
+				exit_subtest("FAIL");
+			else
+				exit_subtest("WARN");
+		}
+
 		exit_subtest("SUCCESS");
+	}
 }
 
 /**
diff --git a/lib/igt_kernel_taint.c b/lib/igt_kernel_taint.c
new file mode 100644
index 00000000..86d9cd20
--- /dev/null
+++ b/lib/igt_kernel_taint.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "igt.h"
+#include "igt_kernel_taint.h"
+#include "igt_sysfs.h"
+
+#define BIT(x) (1ul << (x))
+
+static const struct kernel_taint {
+	const char *msg;
+	unsigned int flags;
+} taints[] = {
+	{ "Non-GPL module loaded" },
+	{ "Forced module load" },
+	{ "Unsafe SMP processor" },
+	{ "Forced module unload" },
+	{ "Machine Check Exception", TAINT_WARN },
+	{ "Bad page detected", TAINT_ERROR },
+	{ "Tainted by user request", TAINT_WARN },
+	{ "System is on fire", TAINT_ERROR },
+	{ "ACPI DSDT has been overridden by user" },
+	{ "OOPS", TAINT_ERROR },
+	{ "Staging driver loaded; are you mad?" },
+	{ "Severe firmware bug workaround active", TAINT_WARN },
+	{ "Out-of-tree module loaded" },
+	{ "Unsigned module loaded" },
+	{ "Soft-lockup detected", TAINT_WARN },
+	{ "Kernel has been live patched" },
+};
+
+unsigned long igt_read_kernel_taint(void)
+{
+	unsigned long taint = 0;
+	int dir;
+
+	dir = open("/proc/sys/kernel", O_RDONLY);
+	if (dir != -1) {
+		igt_sysfs_scanf(dir, "tainted", "%lu", &taint);
+		close(dir);
+	}
+
+	return taint;
+}
+
+void igt_kernel_taint_print(unsigned long taint)
+{
+	igt_info("Kernel taint: %08lx\n", taint);
+	for (long bit = 0; bit < ARRAY_SIZE(taints); bit++) {
+		if (!(taint & BIT(bit)))
+			continue;
+
+		if (taints[bit].flags & (TAINT_WARN | TAINT_ERROR))
+			igt_warn("\t%08lx - %s\n", BIT(bit), taints[bit].msg);
+		else
+			igt_info("\t%08lx - %s \n", BIT(bit), taints[bit].msg);
+	}
+}
+
+unsigned int igt_kernel_tainted(unsigned long taint)
+{
+	unsigned int result = 0;
+
+	for (long bit = 0; bit < ARRAY_SIZE(taints); bit++) {
+		if (!(taint & BIT(bit)))
+			continue;
+
+		result |= taints[bit].flags & (TAINT_WARN | TAINT_ERROR);
+	}
+
+	return result;
+}
diff --git a/lib/igt_kernel_taint.h b/lib/igt_kernel_taint.h
new file mode 100644
index 00000000..2240fe79
--- /dev/null
+++ b/lib/igt_kernel_taint.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef IGT_KERNEL_TAINT_H
+#define IGT_KERNEL_TAINT_H
+
+#define TAINT_WARN 0x1
+#define TAINT_ERROR 0x2
+
+unsigned long igt_read_kernel_taint(void);
+void igt_kernel_taint_print(unsigned long taint);
+unsigned int igt_kernel_tainted(unsigned long taint);
+
+#endif /* IGT_KERNEL_TAINT_H */
-- 
2.15.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
  2017-11-29 12:30 [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint Chris Wilson
@ 2017-11-29 12:40 ` Chris Wilson
  2017-11-29 13:14   ` Szwichtenberg, Radoslaw
  2017-11-29 13:43 ` Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-11-29 12:40 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

Quoting Chris Wilson (2017-11-29 12:30:23)
> Checking for a tainted kernel is a convenient way to see if the test
> generated a critical error such as a oops, or machine check.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
> ---
> diff --git a/lib/igt_kernel_taint.c b/lib/igt_kernel_taint.c
> new file mode 100644
> index 00000000..86d9cd20
> --- /dev/null
> +++ b/lib/igt_kernel_taint.c
> @@ -0,0 +1,95 @@
> +/*
> + * Copyright 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <unistd.h>
> +#include <fcntl.h>
> +
> +#include "igt.h"
> +#include "igt_kernel_taint.h"
> +#include "igt_sysfs.h"
> +
> +#define BIT(x) (1ul << (x))
> +
> +static const struct kernel_taint {
> +       const char *msg;
> +       unsigned int flags;
> +} taints[] = {
> +       { "Non-GPL module loaded" },
> +       { "Forced module load" },
> +       { "Unsafe SMP processor" },
> +       { "Forced module unload" },
> +       { "Machine Check Exception", TAINT_WARN },
> +       { "Bad page detected", TAINT_ERROR },
> +       { "Tainted by user request", TAINT_WARN },

Since unsafe modparams generate these and we are still using them
extensively, we should probably ignore this one.

> +       { "System is on fire", TAINT_ERROR },
> +       { "ACPI DSDT has been overridden by user" },
> +       { "OOPS", TAINT_ERROR },
> +       { "Staging driver loaded; are you mad?" },
> +       { "Severe firmware bug workaround active", TAINT_WARN },
> +       { "Out-of-tree module loaded" },
> +       { "Unsigned module loaded" },
> +       { "Soft-lockup detected", TAINT_WARN },
> +       { "Kernel has been live patched" },
> +};
> +
> +unsigned long igt_read_kernel_taint(void)

One thing I haven't checked is whether we can clear the kernel taints.
At the moment, once we see an oops, we never report a second test
generating another oops.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
  2017-11-29 12:40 ` Chris Wilson
@ 2017-11-29 13:14   ` Szwichtenberg, Radoslaw
  2017-11-29 13:23     ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Szwichtenberg, Radoslaw @ 2017-11-29 13:14 UTC (permalink / raw)
  To: intel-gfx@lists.freedesktop.org, chris@chris-wilson.co.uk
  Cc: daniel.vetter@ffwll.ch

On Wed, 2017-11-29 at 12:40 +0000, Chris Wilson wrote:
> Quoting Chris Wilson (2017-11-29 12:30:23)
> > Checking for a tainted kernel is a convenient way to see if the test
> > generated a critical error such as a oops, or machine check.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
> > ---
> > diff --git a/lib/igt_kernel_taint.c b/lib/igt_kernel_taint.c
> > new file mode 100644
> > index 00000000..86d9cd20
> > --- /dev/null
> > +++ b/lib/igt_kernel_taint.c
> > @@ -0,0 +1,95 @@
> > +/*
> > + * Copyright 2017 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the
> > "Software"),
> > + * to deal in the Software without restriction, including without
> > limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> > next
> > + * paragraph) shall be included in all copies or substantial portions of
> > the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> > OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> > OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> > DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include <unistd.h>
> > +#include <fcntl.h>
> > +
> > +#include "igt.h"
> > +#include "igt_kernel_taint.h"
> > +#include "igt_sysfs.h"
> > +
> > +#define BIT(x) (1ul << (x))
> > +
> > +static const struct kernel_taint {
> > +       const char *msg;
> > +       unsigned int flags;
> > +} taints[] = {
> > +       { "Non-GPL module loaded" },
> > +       { "Forced module load" },
> > +       { "Unsafe SMP processor" },
> > +       { "Forced module unload" },
> > +       { "Machine Check Exception", TAINT_WARN },
> > +       { "Bad page detected", TAINT_ERROR },
> > +       { "Tainted by user request", TAINT_WARN },
> 
> Since unsafe modparams generate these and we are still using them
> extensively, we should probably ignore this one.
> 
> > +       { "System is on fire", TAINT_ERROR },
> > +       { "ACPI DSDT has been overridden by user" },
> > +       { "OOPS", TAINT_ERROR },
> > +       { "Staging driver loaded; are you mad?" },
> > +       { "Severe firmware bug workaround active", TAINT_WARN },
> > +       { "Out-of-tree module loaded" },
> > +       { "Unsigned module loaded" },
> > +       { "Soft-lockup detected", TAINT_WARN },
> > +       { "Kernel has been live patched" },
> > +};
> > +
> > +unsigned long igt_read_kernel_taint(void)
> 
> One thing I haven't checked is whether we can clear the kernel taints.
> At the moment, once we see an oops, we never report a second test
> generating another oops.
> -Chris

I guess that clearing kernel taints is not needed when you hit oops - you
probably should stop executing tests and reboot the machine, right?

Radek
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
  2017-11-29 13:14   ` Szwichtenberg, Radoslaw
@ 2017-11-29 13:23     ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-11-29 13:23 UTC (permalink / raw)
  To: Szwichtenberg, Radoslaw, intel-gfx@lists.freedesktop.org
  Cc: daniel.vetter@ffwll.ch

Quoting Szwichtenberg, Radoslaw (2017-11-29 13:14:52)
> On Wed, 2017-11-29 at 12:40 +0000, Chris Wilson wrote:
> > Quoting Chris Wilson (2017-11-29 12:30:23)
> > > Checking for a tainted kernel is a convenient way to see if the test
> > > generated a critical error such as a oops, or machine check.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
> > > ---
> > > diff --git a/lib/igt_kernel_taint.c b/lib/igt_kernel_taint.c
> > > new file mode 100644
> > > index 00000000..86d9cd20
> > > --- /dev/null
> > > +++ b/lib/igt_kernel_taint.c
> > > @@ -0,0 +1,95 @@
> > > +/*
> > > + * Copyright 2017 Intel Corporation
> > > + *
> > > + * Permission is hereby granted, free of charge, to any person obtaining a
> > > + * copy of this software and associated documentation files (the
> > > "Software"),
> > > + * to deal in the Software without restriction, including without
> > > limitation
> > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > > + * and/or sell copies of the Software, and to permit persons to whom the
> > > + * Software is furnished to do so, subject to the following conditions:
> > > + *
> > > + * The above copyright notice and this permission notice (including the
> > > next
> > > + * paragraph) shall be included in all copies or substantial portions of
> > > the
> > > + * Software.
> > > + *
> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> > > OR
> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> > > OTHER
> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> > > DEALINGS
> > > + * IN THE SOFTWARE.
> > > + */
> > > +
> > > +#include <unistd.h>
> > > +#include <fcntl.h>
> > > +
> > > +#include "igt.h"
> > > +#include "igt_kernel_taint.h"
> > > +#include "igt_sysfs.h"
> > > +
> > > +#define BIT(x) (1ul << (x))
> > > +
> > > +static const struct kernel_taint {
> > > +       const char *msg;
> > > +       unsigned int flags;
> > > +} taints[] = {
> > > +       { "Non-GPL module loaded" },
> > > +       { "Forced module load" },
> > > +       { "Unsafe SMP processor" },
> > > +       { "Forced module unload" },
> > > +       { "Machine Check Exception", TAINT_WARN },
> > > +       { "Bad page detected", TAINT_ERROR },
> > > +       { "Tainted by user request", TAINT_WARN },
> > 
> > Since unsafe modparams generate these and we are still using them
> > extensively, we should probably ignore this one.
> > 
> > > +       { "System is on fire", TAINT_ERROR },
> > > +       { "ACPI DSDT has been overridden by user" },
> > > +       { "OOPS", TAINT_ERROR },
> > > +       { "Staging driver loaded; are you mad?" },
> > > +       { "Severe firmware bug workaround active", TAINT_WARN },
> > > +       { "Out-of-tree module loaded" },
> > > +       { "Unsigned module loaded" },
> > > +       { "Soft-lockup detected", TAINT_WARN },
> > > +       { "Kernel has been live patched" },
> > > +};
> > > +
> > > +unsigned long igt_read_kernel_taint(void)
> > 
> > One thing I haven't checked is whether we can clear the kernel taints.
> > At the moment, once we see an oops, we never report a second test
> > generating another oops.
> > -Chris
> 
> I guess that clearing kernel taints is not needed when you hit oops - you
> probably should stop executing tests and reboot the machine, right?

Oops in the driver tends to stop igt pretty hard. A good rule of thumb
is indeed to abandon all hope and reboot. I'm thinking that with this
sort of early-warning detection in place, we can use the kernel_taint
when we do detect a persistent error, e.g. abandon the run if one flip
times out, or if we fail to park or reset the GPU. All to make that
catastrophic error stand out and not pollute other test results.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
  2017-11-29 12:30 [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint Chris Wilson
  2017-11-29 12:40 ` Chris Wilson
@ 2017-11-29 13:43 ` Chris Wilson
  2017-11-29 14:34 ` ✓ Fi.CI.BAT: success for " Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-11-29 13:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

Quoting Chris Wilson (2017-11-29 12:30:23)
> +static const struct kernel_taint {
> +       const char *msg;
> +       unsigned int flags;
> +} taints[] = {
> +       { "Non-GPL module loaded" },
> +       { "Forced module load" },
> +       { "Unsafe SMP processor" },
> +       { "Forced module unload" },
> +       { "Machine Check Exception", TAINT_WARN },
> +       { "Bad page detected", TAINT_ERROR },
> +       { "Tainted by user request", TAINT_WARN },
> +       { "System is on fire", TAINT_ERROR },
> +       { "ACPI DSDT has been overridden by user" },
> +       { "OOPS", TAINT_ERROR },
> +       { "Staging driver loaded; are you mad?" },
> +       { "Severe firmware bug workaround active", TAINT_WARN },
> +       { "Out-of-tree module loaded" },
> +       { "Unsigned module loaded" },
> +       { "Soft-lockup detected", TAINT_WARN },
> +       { "Kernel has been live patched" },

There's now also TAINT_AUX, which is unused in the kernel.
	{ "Some other reason!" }

Maybe should also add
	const char *name;
for "AUX" etc.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* ✓ Fi.CI.BAT: success for lib: Check and report if a subtest triggers a new kernel taint
  2017-11-29 12:30 [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint Chris Wilson
  2017-11-29 12:40 ` Chris Wilson
  2017-11-29 13:43 ` Chris Wilson
@ 2017-11-29 14:34 ` Patchwork
  2017-11-29 17:46 ` ✓ Fi.CI.IGT: " Patchwork
  2017-12-04 13:46 ` [PATCH igt] " Joonas Lahtinen
  4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2017-11-29 14:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: lib: Check and report if a subtest triggers a new kernel taint
URL   : https://patchwork.freedesktop.org/series/34616/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
643dc097156fa9a0ab9286c7c159459cfbe3079e Revert "lib/igt_aux: Make procps optional"

with latest DRM-Tip kernel build CI_DRM_3407
05ece2ad7959 drm-tip: 2017y-11m-29d-13h-15m-55s UTC integration manifest

No testlist changes.

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:442s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:442s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:389s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:522s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:284s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:505s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:512s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:492s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:476s
fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:431s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:548s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:357s
fi-hsw-4770r     total:288  pass:224  dwarn:0   dfail:0   fail:0   skip:64  time:261s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:483s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:485s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:535s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:477s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:534s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:592s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:454s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:543s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:568s
fi-skl-6700k     total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:523s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:519s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:446s
fi-snb-2520m     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:553s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:417s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:605s
fi-glk-dsi       total:63   pass:55   dwarn:0   dfail:0   fail:0   skip:7  

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_564/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* ✓ Fi.CI.IGT: success for lib: Check and report if a subtest triggers a new kernel taint
  2017-11-29 12:30 [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint Chris Wilson
                   ` (2 preceding siblings ...)
  2017-11-29 14:34 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-11-29 17:46 ` Patchwork
  2017-12-04 13:46 ` [PATCH igt] " Joonas Lahtinen
  4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2017-11-29 17:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: lib: Check and report if a subtest triggers a new kernel taint
URL   : https://patchwork.freedesktop.org/series/34616/
State : success

== Summary ==

Blacklisted hosts:
shard-hsw        total:2610 pass:1501 dwarn:3   dfail:1   fail:9   skip:1095 time:8916s
shard-snb        total:2591 pass:1256 dwarn:10  dfail:5   fail:9   skip:1307 time:7637s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_564/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
  2017-11-29 12:30 [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint Chris Wilson
                   ` (3 preceding siblings ...)
  2017-11-29 17:46 ` ✓ Fi.CI.IGT: " Patchwork
@ 2017-12-04 13:46 ` Joonas Lahtinen
  4 siblings, 0 replies; 12+ messages in thread
From: Joonas Lahtinen @ 2017-12-04 13:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter

On Wed, 2017-11-29 at 12:30 +0000, Chris Wilson wrote:
> Checking for a tainted kernel is a convenient way to see if the test
> generated a critical error such as a oops, or machine check.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>

Reviewed-by: Joonas Lahtien <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
@ 2017-12-05 12:24 Chris Wilson
  2017-12-05 12:32 ` Petri Latvala
  0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-12-05 12:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

Checking for a tainted kernel is a convenient way to see if the test
generated a critical error such as a oops, or machine check.

v2: Docs?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 lib/Makefile.sources   |   2 +
 lib/igt_core.c         |  19 +++++++-
 lib/igt_kernel_taint.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_kernel_taint.h |  34 ++++++++++++++
 4 files changed, 174 insertions(+), 1 deletion(-)
 create mode 100644 lib/igt_kernel_taint.c
 create mode 100644 lib/igt_kernel_taint.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 6e968d9f7..152153908 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -22,6 +22,8 @@ lib_source_list =	 	\
 	igt_gt.h		\
 	igt_gvt.c		\
 	igt_gvt.h		\
+	igt_kernel_taint.c	\
+	igt_kernel_taint.h	\
 	igt_primes.c		\
 	igt_primes.h		\
 	igt_rand.c		\
diff --git a/lib/igt_core.c b/lib/igt_core.c
index 03fa6e4e8..486c5989d 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -63,6 +63,7 @@
 #include "intel_chipset.h"
 #include "intel_io.h"
 #include "igt_debugfs.h"
+#include "igt_kernel_taint.h"
 #include "version.h"
 #include "config.h"
 
@@ -261,6 +262,7 @@ static bool list_subtests = false;
 static char *run_single_subtest = NULL;
 static bool run_single_subtest_found = false;
 static const char *in_subtest = NULL;
+static unsigned long saved_kernel_taint;
 static struct timespec subtest_time;
 static clockid_t igt_clock = (clockid_t)-1;
 static bool in_fixture = false;
@@ -937,6 +939,8 @@ bool __igt_run_subtest(const char *subtest_name)
 		return false;
 	}
 
+	saved_kernel_taint = igt_read_kernel_taint();
+
 	kmsg(KERN_INFO "[IGT] %s: starting subtest %s\n", command_str, subtest_name);
 	igt_debug("Starting subtest: %s\n", subtest_name);
 
@@ -1083,8 +1087,21 @@ void __igt_skip_check(const char *file, const int line,
 void igt_success(void)
 {
 	succeeded_one = true;
-	if (in_subtest)
+	if (in_subtest) {
+		unsigned long new_kernel_taints =
+			igt_read_kernel_taint() & ~saved_kernel_taint;
+		unsigned int tainted = igt_kernel_tainted(new_kernel_taints);
+
+		if (tainted) {
+			igt_kernel_taint_print(new_kernel_taints);
+			if (tainted & TAINT_ERROR)
+				exit_subtest("FAIL");
+			else
+				exit_subtest("WARN");
+		}
+
 		exit_subtest("SUCCESS");
+	}
 }
 
 /**
diff --git a/lib/igt_kernel_taint.c b/lib/igt_kernel_taint.c
new file mode 100644
index 000000000..7f105ead7
--- /dev/null
+++ b/lib/igt_kernel_taint.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "igt.h"
+#include "igt_kernel_taint.h"
+#include "igt_sysfs.h"
+
+#define BIT(x) (1ul << (x))
+
+static const struct kernel_taint {
+	const char *msg;
+	unsigned int flags;
+} taints[] = {
+	{ "Non-GPL module loaded" },
+	{ "Forced module load" },
+	{ "Unsafe SMP processor" },
+	{ "Forced module unload" },
+	{ "Machine Check Exception", TAINT_WARN },
+	{ "Bad page detected", TAINT_ERROR },
+	{ "Tainted by user request (e.g. modparam_unsafe)" },
+	{ "System is on fire", TAINT_ERROR },
+	{ "ACPI DSDT has been overridden by user" },
+	{ "OOPS", TAINT_ERROR },
+	{ "Staging driver loaded; are you mad?" },
+	{ "Severe firmware bug workaround active", TAINT_WARN },
+	{ "Out-of-tree module loaded" },
+	{ "Unsigned module loaded" },
+	{ "Soft-lockup detected", TAINT_WARN },
+	{ "Kernel has been live patched" },
+};
+
+/**
+ * igt_read_kernel_taint: Reads the kernel's taint status
+ *
+ * The kernel maintains a bitmask of taints (permanent errors) it has
+ * seen since boot. Each taint means that the kernel is subsequently
+ * unreliable, but some taints are more important than others.
+ */
+unsigned long igt_read_kernel_taint(void)
+{
+	unsigned long taint = 0;
+	int dir;
+
+	dir = open("/proc/sys/kernel", O_RDONLY);
+	if (dir != -1) {
+		igt_sysfs_scanf(dir, "tainted", "%lu", &taint);
+		close(dir);
+	}
+
+	return taint;
+}
+
+/**
+ * igt_kernel_taint_print: Print the meaning of a taint bitmask
+ * @taint: The taint bitmask to decode
+ *
+ * Decodes a bitmask into error messages and displays them using
+ * igt_info() for normal errors, and igt_warn() for critical errors.
+ */
+void igt_kernel_taint_print(unsigned long taint)
+{
+	igt_info("Kernel taint: %08lx\n", taint);
+	for (long bit = 0; bit < ARRAY_SIZE(taints); bit++) {
+		if (!(taint & BIT(bit)))
+			continue;
+
+		if (taints[bit].flags & (TAINT_WARN | TAINT_ERROR))
+			igt_warn("\t%08lx - %s\n", BIT(bit), taints[bit].msg);
+		else
+			igt_info("\t%08lx - %s\n", BIT(bit), taints[bit].msg);
+	}
+}
+
+/**
+ * igt_kernel_tainted: Checks the tainted bitmask for crtical errors
+ * @taint: The taint bitmask to decode
+ *
+ * Some kernel taints are more important than others. Some tell us about
+ * user operations that are risky, others tell us when the system is on fire.
+ * igt_kernel_tainted() decodes the taint bitmask into whether we can
+ * safely ignore the taint or should flag an error.
+ *
+ * Returns: a bitmask of TAINT_WARN | TAINT_ERROR.
+ */
+unsigned int igt_kernel_tainted(unsigned long taint)
+{
+	unsigned int result = 0;
+
+	for (long bit = 0; bit < ARRAY_SIZE(taints); bit++) {
+		if (!(taint & BIT(bit)))
+			continue;
+
+		result |= taints[bit].flags & (TAINT_WARN | TAINT_ERROR);
+	}
+
+	return result;
+}
diff --git a/lib/igt_kernel_taint.h b/lib/igt_kernel_taint.h
new file mode 100644
index 000000000..2240fe791
--- /dev/null
+++ b/lib/igt_kernel_taint.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef IGT_KERNEL_TAINT_H
+#define IGT_KERNEL_TAINT_H
+
+#define TAINT_WARN 0x1
+#define TAINT_ERROR 0x2
+
+unsigned long igt_read_kernel_taint(void);
+void igt_kernel_taint_print(unsigned long taint);
+unsigned int igt_kernel_tainted(unsigned long taint);
+
+#endif /* IGT_KERNEL_TAINT_H */
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
  2017-12-05 12:24 Chris Wilson
@ 2017-12-05 12:32 ` Petri Latvala
  2017-12-05 12:38   ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Petri Latvala @ 2017-12-05 12:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Tue, Dec 05, 2017 at 12:24:53PM +0000, Chris Wilson wrote:
> Checking for a tainted kernel is a convenient way to see if the test
> generated a critical error such as a oops, or machine check.
> 
> v2: Docs?
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  lib/Makefile.sources   |   2 +
>  lib/igt_core.c         |  19 +++++++-
>  lib/igt_kernel_taint.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/igt_kernel_taint.h |  34 ++++++++++++++
>  4 files changed, 174 insertions(+), 1 deletion(-)
>  create mode 100644 lib/igt_kernel_taint.c
>  create mode 100644 lib/igt_kernel_taint.h
> 
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index 6e968d9f7..152153908 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -22,6 +22,8 @@ lib_source_list =	 	\
>  	igt_gt.h		\
>  	igt_gvt.c		\
>  	igt_gvt.h		\
> +	igt_kernel_taint.c	\
> +	igt_kernel_taint.h	\
>  	igt_primes.c		\
>  	igt_primes.h		\
>  	igt_rand.c		\
> diff --git a/lib/igt_core.c b/lib/igt_core.c
> index 03fa6e4e8..486c5989d 100644
> --- a/lib/igt_core.c
> +++ b/lib/igt_core.c
> @@ -63,6 +63,7 @@
>  #include "intel_chipset.h"
>  #include "intel_io.h"
>  #include "igt_debugfs.h"
> +#include "igt_kernel_taint.h"
>  #include "version.h"
>  #include "config.h"
>  
> @@ -261,6 +262,7 @@ static bool list_subtests = false;
>  static char *run_single_subtest = NULL;
>  static bool run_single_subtest_found = false;
>  static const char *in_subtest = NULL;
> +static unsigned long saved_kernel_taint;
>  static struct timespec subtest_time;
>  static clockid_t igt_clock = (clockid_t)-1;
>  static bool in_fixture = false;
> @@ -937,6 +939,8 @@ bool __igt_run_subtest(const char *subtest_name)
>  		return false;
>  	}
>  
> +	saved_kernel_taint = igt_read_kernel_taint();
> +
>  	kmsg(KERN_INFO "[IGT] %s: starting subtest %s\n", command_str, subtest_name);
>  	igt_debug("Starting subtest: %s\n", subtest_name);
>  
> @@ -1083,8 +1087,21 @@ void __igt_skip_check(const char *file, const int line,
>  void igt_success(void)
>  {
>  	succeeded_one = true;
> -	if (in_subtest)
> +	if (in_subtest) {
> +		unsigned long new_kernel_taints =
> +			igt_read_kernel_taint() & ~saved_kernel_taint;
> +		unsigned int tainted = igt_kernel_tainted(new_kernel_taints);
> +
> +		if (tainted) {
> +			igt_kernel_taint_print(new_kernel_taints);
> +			if (tainted & TAINT_ERROR)
> +				exit_subtest("FAIL");
> +			else
> +				exit_subtest("WARN");
> +		}
> +
>  		exit_subtest("SUCCESS");
> +	}
>  }


If you change the result to FAIL or WARN here, succeeded_one should not be changed.

What of tests that don't have subtests?


-- 
Petri Latvala



>  
>  /**
> diff --git a/lib/igt_kernel_taint.c b/lib/igt_kernel_taint.c
> new file mode 100644
> index 000000000..7f105ead7
> --- /dev/null
> +++ b/lib/igt_kernel_taint.c
> @@ -0,0 +1,120 @@
> +/*
> + * Copyright 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <unistd.h>
> +#include <fcntl.h>
> +
> +#include "igt.h"
> +#include "igt_kernel_taint.h"
> +#include "igt_sysfs.h"
> +
> +#define BIT(x) (1ul << (x))
> +
> +static const struct kernel_taint {
> +	const char *msg;
> +	unsigned int flags;
> +} taints[] = {
> +	{ "Non-GPL module loaded" },
> +	{ "Forced module load" },
> +	{ "Unsafe SMP processor" },
> +	{ "Forced module unload" },
> +	{ "Machine Check Exception", TAINT_WARN },
> +	{ "Bad page detected", TAINT_ERROR },
> +	{ "Tainted by user request (e.g. modparam_unsafe)" },
> +	{ "System is on fire", TAINT_ERROR },
> +	{ "ACPI DSDT has been overridden by user" },
> +	{ "OOPS", TAINT_ERROR },
> +	{ "Staging driver loaded; are you mad?" },
> +	{ "Severe firmware bug workaround active", TAINT_WARN },
> +	{ "Out-of-tree module loaded" },
> +	{ "Unsigned module loaded" },
> +	{ "Soft-lockup detected", TAINT_WARN },
> +	{ "Kernel has been live patched" },
> +};
> +
> +/**
> + * igt_read_kernel_taint: Reads the kernel's taint status
> + *
> + * The kernel maintains a bitmask of taints (permanent errors) it has
> + * seen since boot. Each taint means that the kernel is subsequently
> + * unreliable, but some taints are more important than others.
> + */
> +unsigned long igt_read_kernel_taint(void)
> +{
> +	unsigned long taint = 0;
> +	int dir;
> +
> +	dir = open("/proc/sys/kernel", O_RDONLY);
> +	if (dir != -1) {
> +		igt_sysfs_scanf(dir, "tainted", "%lu", &taint);
> +		close(dir);
> +	}
> +
> +	return taint;
> +}
> +
> +/**
> + * igt_kernel_taint_print: Print the meaning of a taint bitmask
> + * @taint: The taint bitmask to decode
> + *
> + * Decodes a bitmask into error messages and displays them using
> + * igt_info() for normal errors, and igt_warn() for critical errors.
> + */
> +void igt_kernel_taint_print(unsigned long taint)
> +{
> +	igt_info("Kernel taint: %08lx\n", taint);
> +	for (long bit = 0; bit < ARRAY_SIZE(taints); bit++) {
> +		if (!(taint & BIT(bit)))
> +			continue;
> +
> +		if (taints[bit].flags & (TAINT_WARN | TAINT_ERROR))
> +			igt_warn("\t%08lx - %s\n", BIT(bit), taints[bit].msg);
> +		else
> +			igt_info("\t%08lx - %s\n", BIT(bit), taints[bit].msg);
> +	}
> +}
> +
> +/**
> + * igt_kernel_tainted: Checks the tainted bitmask for crtical errors
> + * @taint: The taint bitmask to decode
> + *
> + * Some kernel taints are more important than others. Some tell us about
> + * user operations that are risky, others tell us when the system is on fire.
> + * igt_kernel_tainted() decodes the taint bitmask into whether we can
> + * safely ignore the taint or should flag an error.
> + *
> + * Returns: a bitmask of TAINT_WARN | TAINT_ERROR.
> + */
> +unsigned int igt_kernel_tainted(unsigned long taint)
> +{
> +	unsigned int result = 0;
> +
> +	for (long bit = 0; bit < ARRAY_SIZE(taints); bit++) {
> +		if (!(taint & BIT(bit)))
> +			continue;
> +
> +		result |= taints[bit].flags & (TAINT_WARN | TAINT_ERROR);
> +	}
> +
> +	return result;
> +}
> diff --git a/lib/igt_kernel_taint.h b/lib/igt_kernel_taint.h
> new file mode 100644
> index 000000000..2240fe791
> --- /dev/null
> +++ b/lib/igt_kernel_taint.h
> @@ -0,0 +1,34 @@
> +/*
> + * Copyright 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef IGT_KERNEL_TAINT_H
> +#define IGT_KERNEL_TAINT_H
> +
> +#define TAINT_WARN 0x1
> +#define TAINT_ERROR 0x2
> +
> +unsigned long igt_read_kernel_taint(void);
> +void igt_kernel_taint_print(unsigned long taint);
> +unsigned int igt_kernel_tainted(unsigned long taint);
> +
> +#endif /* IGT_KERNEL_TAINT_H */
> -- 
> 2.15.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
  2017-12-05 12:32 ` Petri Latvala
@ 2017-12-05 12:38   ` Chris Wilson
  2017-12-05 13:01     ` Petri Latvala
  0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-12-05 12:38 UTC (permalink / raw)
  To: Petri Latvala; +Cc: Daniel Vetter, intel-gfx

Quoting Petri Latvala (2017-12-05 12:32:36)
> On Tue, Dec 05, 2017 at 12:24:53PM +0000, Chris Wilson wrote:
> > Checking for a tainted kernel is a convenient way to see if the test
> > generated a critical error such as a oops, or machine check.
> > 
> > v2: Docs?
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >  lib/Makefile.sources   |   2 +
> >  lib/igt_core.c         |  19 +++++++-
> >  lib/igt_kernel_taint.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/igt_kernel_taint.h |  34 ++++++++++++++
> >  4 files changed, 174 insertions(+), 1 deletion(-)
> >  create mode 100644 lib/igt_kernel_taint.c
> >  create mode 100644 lib/igt_kernel_taint.h
> > 
> > diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> > index 6e968d9f7..152153908 100644
> > --- a/lib/Makefile.sources
> > +++ b/lib/Makefile.sources
> > @@ -22,6 +22,8 @@ lib_source_list =           \
> >       igt_gt.h                \
> >       igt_gvt.c               \
> >       igt_gvt.h               \
> > +     igt_kernel_taint.c      \
> > +     igt_kernel_taint.h      \
> >       igt_primes.c            \
> >       igt_primes.h            \
> >       igt_rand.c              \
> > diff --git a/lib/igt_core.c b/lib/igt_core.c
> > index 03fa6e4e8..486c5989d 100644
> > --- a/lib/igt_core.c
> > +++ b/lib/igt_core.c
> > @@ -63,6 +63,7 @@
> >  #include "intel_chipset.h"
> >  #include "intel_io.h"
> >  #include "igt_debugfs.h"
> > +#include "igt_kernel_taint.h"
> >  #include "version.h"
> >  #include "config.h"
> >  
> > @@ -261,6 +262,7 @@ static bool list_subtests = false;
> >  static char *run_single_subtest = NULL;
> >  static bool run_single_subtest_found = false;
> >  static const char *in_subtest = NULL;
> > +static unsigned long saved_kernel_taint;
> >  static struct timespec subtest_time;
> >  static clockid_t igt_clock = (clockid_t)-1;
> >  static bool in_fixture = false;
> > @@ -937,6 +939,8 @@ bool __igt_run_subtest(const char *subtest_name)
> >               return false;
> >       }
> >  
> > +     saved_kernel_taint = igt_read_kernel_taint();
> > +
> >       kmsg(KERN_INFO "[IGT] %s: starting subtest %s\n", command_str, subtest_name);
> >       igt_debug("Starting subtest: %s\n", subtest_name);
> >  
> > @@ -1083,8 +1087,21 @@ void __igt_skip_check(const char *file, const int line,
> >  void igt_success(void)
> >  {
> >       succeeded_one = true;
> > -     if (in_subtest)
> > +     if (in_subtest) {
> > +             unsigned long new_kernel_taints =
> > +                     igt_read_kernel_taint() & ~saved_kernel_taint;
> > +             unsigned int tainted = igt_kernel_tainted(new_kernel_taints);
> > +
> > +             if (tainted) {
> > +                     igt_kernel_taint_print(new_kernel_taints);
> > +                     if (tainted & TAINT_ERROR)
> > +                             exit_subtest("FAIL");
> > +                     else
> > +                             exit_subtest("WARN");
> > +             }
> > +
> >               exit_subtest("SUCCESS");
> > +     }
> >  }
> 
> 
> If you change the result to FAIL or WARN here, succeeded_one should not be changed.
> 
> What of tests that don't have subtests?

I don't know where they are tracked. If there is a location we can place
such a before/after check. Or even if we do want to change test status
at all, and just make it a warn if the kernel becomes tainted.

The ambition here isn't just to flag oops reliably, but to respond when
we know the HW is broken and requires rebooting.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint
  2017-12-05 12:38   ` Chris Wilson
@ 2017-12-05 13:01     ` Petri Latvala
  0 siblings, 0 replies; 12+ messages in thread
From: Petri Latvala @ 2017-12-05 13:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Tue, Dec 05, 2017 at 12:38:19PM +0000, Chris Wilson wrote:
> Quoting Petri Latvala (2017-12-05 12:32:36)
> > On Tue, Dec 05, 2017 at 12:24:53PM +0000, Chris Wilson wrote:
> > > Checking for a tainted kernel is a convenient way to see if the test
> > > generated a critical error such as a oops, or machine check.
> > > 
> > > v2: Docs?
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
> > > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > > ---
> > >  lib/Makefile.sources   |   2 +
> > >  lib/igt_core.c         |  19 +++++++-
> > >  lib/igt_kernel_taint.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++
> > >  lib/igt_kernel_taint.h |  34 ++++++++++++++
> > >  4 files changed, 174 insertions(+), 1 deletion(-)
> > >  create mode 100644 lib/igt_kernel_taint.c
> > >  create mode 100644 lib/igt_kernel_taint.h
> > > 
> > > diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> > > index 6e968d9f7..152153908 100644
> > > --- a/lib/Makefile.sources
> > > +++ b/lib/Makefile.sources
> > > @@ -22,6 +22,8 @@ lib_source_list =           \
> > >       igt_gt.h                \
> > >       igt_gvt.c               \
> > >       igt_gvt.h               \
> > > +     igt_kernel_taint.c      \
> > > +     igt_kernel_taint.h      \
> > >       igt_primes.c            \
> > >       igt_primes.h            \
> > >       igt_rand.c              \
> > > diff --git a/lib/igt_core.c b/lib/igt_core.c
> > > index 03fa6e4e8..486c5989d 100644
> > > --- a/lib/igt_core.c
> > > +++ b/lib/igt_core.c
> > > @@ -63,6 +63,7 @@
> > >  #include "intel_chipset.h"
> > >  #include "intel_io.h"
> > >  #include "igt_debugfs.h"
> > > +#include "igt_kernel_taint.h"
> > >  #include "version.h"
> > >  #include "config.h"
> > >  
> > > @@ -261,6 +262,7 @@ static bool list_subtests = false;
> > >  static char *run_single_subtest = NULL;
> > >  static bool run_single_subtest_found = false;
> > >  static const char *in_subtest = NULL;
> > > +static unsigned long saved_kernel_taint;
> > >  static struct timespec subtest_time;
> > >  static clockid_t igt_clock = (clockid_t)-1;
> > >  static bool in_fixture = false;
> > > @@ -937,6 +939,8 @@ bool __igt_run_subtest(const char *subtest_name)
> > >               return false;
> > >       }
> > >  
> > > +     saved_kernel_taint = igt_read_kernel_taint();
> > > +
> > >       kmsg(KERN_INFO "[IGT] %s: starting subtest %s\n", command_str, subtest_name);
> > >       igt_debug("Starting subtest: %s\n", subtest_name);
> > >  
> > > @@ -1083,8 +1087,21 @@ void __igt_skip_check(const char *file, const int line,
> > >  void igt_success(void)
> > >  {
> > >       succeeded_one = true;
> > > -     if (in_subtest)
> > > +     if (in_subtest) {
> > > +             unsigned long new_kernel_taints =
> > > +                     igt_read_kernel_taint() & ~saved_kernel_taint;
> > > +             unsigned int tainted = igt_kernel_tainted(new_kernel_taints);
> > > +
> > > +             if (tainted) {
> > > +                     igt_kernel_taint_print(new_kernel_taints);
> > > +                     if (tainted & TAINT_ERROR)
> > > +                             exit_subtest("FAIL");
> > > +                     else
> > > +                             exit_subtest("WARN");
> > > +             }
> > > +
> > >               exit_subtest("SUCCESS");
> > > +     }
> > >  }
> > 
> > 
> > If you change the result to FAIL or WARN here, succeeded_one should not be changed.
> > 
> > What of tests that don't have subtests?
> 
> I don't know where they are tracked. If there is a location we can place
> such a before/after check. Or even if we do want to change test status
> at all, and just make it a warn if the kernel becomes tainted.
> 
> The ambition here isn't just to flag oops reliably, but to respond when
> we know the HW is broken and requires rebooting.

Still, the behaviour should be consistent whether it's a simple_test
or a subtest, right?

igt_simple_init_parse_opts would be a good place for storing the old
taint status, and checking generated taints / remapping the result
could be done in igt_exit, when test_with_subtests == false.


-- 
Petri Latvala
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-12-05 13:02 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-11-29 12:30 [PATCH igt] lib: Check and report if a subtest triggers a new kernel taint Chris Wilson
2017-11-29 12:40 ` Chris Wilson
2017-11-29 13:14   ` Szwichtenberg, Radoslaw
2017-11-29 13:23     ` Chris Wilson
2017-11-29 13:43 ` Chris Wilson
2017-11-29 14:34 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-11-29 17:46 ` ✓ Fi.CI.IGT: " Patchwork
2017-12-04 13:46 ` [PATCH igt] " Joonas Lahtinen
  -- strict thread matches above, loose matches on Subject: below --
2017-12-05 12:24 Chris Wilson
2017-12-05 12:32 ` Petri Latvala
2017-12-05 12:38   ` Chris Wilson
2017-12-05 13:01     ` Petri Latvala

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.