public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t v2] runner: Make taint abort messages more verbose
@ 2018-12-03 15:00 Arkadiusz Hiler
  2018-12-03 15:09 ` Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Arkadiusz Hiler @ 2018-12-03 15:00 UTC (permalink / raw)
  To: igt-dev; +Cc: Petri Latvala

Since not everyone is familiar with kernel taints, and it is easy to get
confused and attribute an abort to an innocent TAINT_USER caused by an
unsafe module option, which is usually the first thing people find
greping dmesg for "taint", we should provide more guidance.

This patch extends the abort log by printing the taint names, as found
in the kernel, along with a short explanation, so people know what to
look for in the dmesg.

v2: rebase, reword

Cc: Petri Latvala <petri.latvala@intel.com>
Signed-off-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
---
 runner/executor.c | 38 ++++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/runner/executor.c b/runner/executor.c
index 54c530b7..c0639a66 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -137,13 +137,23 @@ static char *handle_lockdep(void)
 	return NULL;
 }
 
+/* see Linux's include/linux/kernel.h */
+static const struct {
+	unsigned long bit;
+	const char *explanation;
+} abort_taints[] = {
+  {(1 << 5), "TAINT_BAD_PAGE: Bad page reference or an unexpected page flags."},
+  {(1 << 7), "TAINT_DIE: Kernel has died - BUG/OOPS."},
+  {(1 << 9), "TAINT_WARN: WARN_ON has happened."},
+  {0, 0}};
+
 static unsigned long tainted(unsigned long *taints)
 {
-	const unsigned long bad_taints =
-		0x20  | /* TAINT_PAGE */
-		0x80  | /* TAINT_DIE */
-		0x200; /* TAINT_OOPS */
 	FILE *f;
+	unsigned long bad_taints = 0;
+
+	for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++)
+		bad_taints |= taint->bit;
 
 	*taints = 0;
 
@@ -158,14 +168,26 @@ static unsigned long tainted(unsigned long *taints)
 
 static char *handle_taint(void)
 {
-	unsigned long taints, bad_taints;
+	unsigned long taints;
 	char *reason;
 
-	bad_taints = tainted(&taints);
-	if (!bad_taints)
+	if (!tainted(&taints))
 		return NULL;
 
-	asprintf(&reason, "Kernel tainted (%#lx -- %lx)", taints, bad_taints);
+	asprintf(&reason, "Kernel badly tainted (%#lx) (check dmesg for details):\n",
+		 taints);
+
+	for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++) {
+		if (taint->bit & taints) {
+			char *old_reason = reason;
+			asprintf(&reason, "%s\t(%#lx) %s\n",
+					old_reason,
+					taint->bit,
+					taint->explanation);
+			free(old_reason);
+		}
+	}
+
 	return reason;
 }
 
-- 
2.19.2

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2] runner: Make taint abort messages more verbose
  2018-12-03 15:00 [igt-dev] [PATCH i-g-t v2] runner: Make taint abort messages more verbose Arkadiusz Hiler
@ 2018-12-03 15:09 ` Chris Wilson
  2018-12-03 15:45   ` Arkadiusz Hiler
  2018-12-03 15:47 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
  2018-12-03 19:48 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
  2 siblings, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2018-12-03 15:09 UTC (permalink / raw)
  To: Arkadiusz Hiler, igt-dev; +Cc: Petri Latvala

Quoting Arkadiusz Hiler (2018-12-03 15:00:12)
> Since not everyone is familiar with kernel taints, and it is easy to get
> confused and attribute an abort to an innocent TAINT_USER caused by an
> unsafe module option, which is usually the first thing people find
> greping dmesg for "taint", we should provide more guidance.
> 
> This patch extends the abort log by printing the taint names, as found
> in the kernel, along with a short explanation, so people know what to
> look for in the dmesg.
> 
> v2: rebase, reword
> 
> Cc: Petri Latvala <petri.latvala@intel.com>
> Signed-off-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> ---
>  runner/executor.c | 38 ++++++++++++++++++++++++++++++--------
>  1 file changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/runner/executor.c b/runner/executor.c
> index 54c530b7..c0639a66 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -137,13 +137,23 @@ static char *handle_lockdep(void)
>         return NULL;
>  }
>  
> +/* see Linux's include/linux/kernel.h */
> +static const struct {
> +       unsigned long bit;
> +       const char *explanation;
> +} abort_taints[] = {
> +  {(1 << 5), "TAINT_BAD_PAGE: Bad page reference or an unexpected page flags."},
> +  {(1 << 7), "TAINT_DIE: Kernel has died - BUG/OOPS."},
> +  {(1 << 9), "TAINT_WARN: WARN_ON has happened."},
> +  {0, 0}};
> +
>  static unsigned long tainted(unsigned long *taints)
>  {
> -       const unsigned long bad_taints =
> -               0x20  | /* TAINT_PAGE */
> -               0x80  | /* TAINT_DIE */
> -               0x200; /* TAINT_OOPS */
>         FILE *f;
> +       unsigned long bad_taints = 0;
> +
> +       for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++)
> +               bad_taints |= taint->bit;
>  
>         *taints = 0;
>  
> @@ -158,14 +168,26 @@ static unsigned long tainted(unsigned long *taints)
>  
>  static char *handle_taint(void)
>  {
> -       unsigned long taints, bad_taints;
> +       unsigned long taints;
>         char *reason;
>  
> -       bad_taints = tainted(&taints);
> -       if (!bad_taints)
> +       if (!tainted(&taints))
>                 return NULL;
>  
> -       asprintf(&reason, "Kernel tainted (%#lx -- %lx)", taints, bad_taints);
> +       asprintf(&reason, "Kernel badly tainted (%#lx) (check dmesg for details):\n",
> +                taints);
> +
> +       for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++) {
> +               if (taint->bit & taints) {
> +                       char *old_reason = reason;
> +                       asprintf(&reason, "%s\t(%#lx) %s\n",
> +                                       old_reason,
> +                                       taint->bit,
> +                                       taint->explanation);
> +                       free(old_reason);

Still this gives no more information, than having to manually check
dmesg for the explanation.

	"TAINT_BAD_PAGE: Bad page reference or an unexpected page flags."},
	"TAINT_DIE: Kernel has died - BUG/OOPS."},
	"TAINT_WARN: WARN_ON has happened."},

are quite frankly misleading, and not informative.

And since the runner is handling the dmesg, do the work to present the
relevant information.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2] runner: Make taint abort messages more verbose
  2018-12-03 15:09 ` Chris Wilson
@ 2018-12-03 15:45   ` Arkadiusz Hiler
  0 siblings, 0 replies; 5+ messages in thread
From: Arkadiusz Hiler @ 2018-12-03 15:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev, Petri Latvala

On Mon, Dec 03, 2018 at 03:09:29PM +0000, Chris Wilson wrote:
> Quoting Arkadiusz Hiler (2018-12-03 15:00:12)
> > Since not everyone is familiar with kernel taints, and it is easy to get
> > confused and attribute an abort to an innocent TAINT_USER caused by an
> > unsafe module option, which is usually the first thing people find
> > greping dmesg for "taint", we should provide more guidance.
> > 
> > This patch extends the abort log by printing the taint names, as found
> > in the kernel, along with a short explanation, so people know what to
> > look for in the dmesg.
> > 
> > v2: rebase, reword
> > 
> > Cc: Petri Latvala <petri.latvala@intel.com>
> > Signed-off-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> > ---
> >  runner/executor.c | 38 ++++++++++++++++++++++++++++++--------
> >  1 file changed, 30 insertions(+), 8 deletions(-)
> > 
> > diff --git a/runner/executor.c b/runner/executor.c
> > index 54c530b7..c0639a66 100644
> > --- a/runner/executor.c
> > +++ b/runner/executor.c
> > @@ -137,13 +137,23 @@ static char *handle_lockdep(void)
> >         return NULL;
> >  }
> >  
> > +/* see Linux's include/linux/kernel.h */
> > +static const struct {
> > +       unsigned long bit;
> > +       const char *explanation;
> > +} abort_taints[] = {
> > +  {(1 << 5), "TAINT_BAD_PAGE: Bad page reference or an unexpected page flags."},
> > +  {(1 << 7), "TAINT_DIE: Kernel has died - BUG/OOPS."},
> > +  {(1 << 9), "TAINT_WARN: WARN_ON has happened."},
> > +  {0, 0}};
> > +
> >  static unsigned long tainted(unsigned long *taints)
> >  {
> > -       const unsigned long bad_taints =
> > -               0x20  | /* TAINT_PAGE */
> > -               0x80  | /* TAINT_DIE */
> > -               0x200; /* TAINT_OOPS */
> >         FILE *f;
> > +       unsigned long bad_taints = 0;
> > +
> > +       for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++)
> > +               bad_taints |= taint->bit;
> >  
> >         *taints = 0;
> >  
> > @@ -158,14 +168,26 @@ static unsigned long tainted(unsigned long *taints)
> >  
> >  static char *handle_taint(void)
> >  {
> > -       unsigned long taints, bad_taints;
> > +       unsigned long taints;
> >         char *reason;
> >  
> > -       bad_taints = tainted(&taints);
> > -       if (!bad_taints)
> > +       if (!tainted(&taints))
> >                 return NULL;
> >  
> > -       asprintf(&reason, "Kernel tainted (%#lx -- %lx)", taints, bad_taints);
> > +       asprintf(&reason, "Kernel badly tainted (%#lx) (check dmesg for details):\n",
> > +                taints);
> > +
> > +       for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++) {
> > +               if (taint->bit & taints) {
> > +                       char *old_reason = reason;
> > +                       asprintf(&reason, "%s\t(%#lx) %s\n",
> > +                                       old_reason,
> > +                                       taint->bit,
> > +                                       taint->explanation);
> > +                       free(old_reason);
> 
> Still this gives no more information, than having to manually check
> dmesg for the explanation.

I still stand by what has been said last week:
https://lists.freedesktop.org/archives/igt-dev/2018-November/007128.html

I have got two more questions on why are we aborting on unsafe module
options since then.

> 	"TAINT_BAD_PAGE: Bad page reference or an unexpected page flags."},
> 	"TAINT_DIE: Kernel has died - BUG/OOPS."},
> 	"TAINT_WARN: WARN_ON has happened."},
> 
> are quite frankly misleading, and not informative.

Any suggestions on how to make them less misleading?

> And since the runner is handling the dmesg, do the work to present the
> relevant information.
> -Chris

WIP by Petri, but that is going to take a while.

-- 
Cheers,
Arek
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for runner: Make taint abort messages more verbose
  2018-12-03 15:00 [igt-dev] [PATCH i-g-t v2] runner: Make taint abort messages more verbose Arkadiusz Hiler
  2018-12-03 15:09 ` Chris Wilson
@ 2018-12-03 15:47 ` Patchwork
  2018-12-03 19:48 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
  2 siblings, 0 replies; 5+ messages in thread
From: Patchwork @ 2018-12-03 15:47 UTC (permalink / raw)
  To: Arkadiusz Hiler; +Cc: igt-dev

== Series Details ==

Series: runner: Make taint abort messages more verbose
URL   : https://patchwork.freedesktop.org/series/53409/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5244 -> IGTPW_2114
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/53409/revisions/1/mbox/

Known issues
------------

  Here are the changes found in IGTPW_2114 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-ivb-3520m:       PASS -> FAIL [fdo#108880]

  * igt@i915_selftest@live_contexts:
    - fi-kbl-7560u:       NOTRUN -> INCOMPLETE [fdo#108767]

  * igt@prime_vgem@basic-fence-flip:
    - fi-ilk-650:         PASS -> FAIL [fdo#104008]

  
#### Possible fixes ####

  * igt@gem_basic@create-close:
    - fi-kbl-7560u:       INCOMPLETE -> PASS

  * igt@gem_ctx_create@basic-files:
    - fi-bsw-kefka:       FAIL [fdo#108656] -> PASS

  
  [fdo#104008]: https://bugs.freedesktop.org/show_bug.cgi?id=104008
  [fdo#108656]: https://bugs.freedesktop.org/show_bug.cgi?id=108656
  [fdo#108767]: https://bugs.freedesktop.org/show_bug.cgi?id=108767
  [fdo#108880]: https://bugs.freedesktop.org/show_bug.cgi?id=108880


Participating hosts (48 -> 42)
------------------------------

  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-icl-y 


Build changes
-------------

    * IGT: IGT_4736 -> IGTPW_2114

  CI_DRM_5244: 12d14c46966c11670520eed3cd2f1f87c6af7948 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2114: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2114/
  IGT_4736: 285ebfb3b7adc56586031afa5150c4e5ad40c229 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2114/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for runner: Make taint abort messages more verbose
  2018-12-03 15:00 [igt-dev] [PATCH i-g-t v2] runner: Make taint abort messages more verbose Arkadiusz Hiler
  2018-12-03 15:09 ` Chris Wilson
  2018-12-03 15:47 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-12-03 19:48 ` Patchwork
  2 siblings, 0 replies; 5+ messages in thread
From: Patchwork @ 2018-12-03 19:48 UTC (permalink / raw)
  To: Arkadiusz Hiler; +Cc: igt-dev

== Series Details ==

Series: runner: Make taint abort messages more verbose
URL   : https://patchwork.freedesktop.org/series/53409/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5244_full -> IGTPW_2114_full
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with IGTPW_2114_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_2114_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/53409/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_2114_full:

### IGT changes ###

#### Warnings ####

  * igt@gem_tiled_swapping@non-threaded:
    - shard-kbl:          SKIP -> PASS
    - shard-apl:          SKIP -> PASS

  
Known issues
------------

  Here are the changes found in IGTPW_2114_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_reg_read@timestamp-monotonic:
    - shard-snb:          PASS -> INCOMPLETE [fdo#105411] / [fdo#107469]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-a:
    - shard-snb:          PASS -> DMESG-WARN [fdo#107956]
    - shard-glk:          PASS -> DMESG-WARN [fdo#107956]

  * igt@kms_cursor_crc@cursor-128x128-suspend:
    - shard-apl:          PASS -> FAIL [fdo#103191] / [fdo#103232]
    - shard-kbl:          PASS -> FAIL [fdo#103191] / [fdo#103232]

  * igt@kms_cursor_crc@cursor-256x85-sliding:
    - shard-apl:          PASS -> FAIL [fdo#103232] +3

  * igt@kms_cursor_crc@cursor-64x64-onscreen:
    - shard-kbl:          PASS -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-64x64-suspend:
    - shard-glk:          PASS -> FAIL [fdo#103232] +3

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible:
    - shard-glk:          PASS -> FAIL [fdo#105363]

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-hsw:          PASS -> DMESG-WARN [fdo#102614] +1

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-wc:
    - shard-apl:          PASS -> FAIL [fdo#103167] +1

  * igt@kms_frontbuffer_tracking@fbc-1p-rte:
    - shard-glk:          PASS -> FAIL [fdo#103167] / [fdo#105682]

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-cpu:
    - shard-glk:          PASS -> FAIL [fdo#103167] +10

  * igt@kms_plane_alpha_blend@pipe-b-alpha-transparant-fb:
    - shard-kbl:          NOTRUN -> FAIL [fdo#108145]

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - shard-apl:          PASS -> FAIL [fdo#103166] +1

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
    - shard-glk:          PASS -> FAIL [fdo#103166] +5

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-x:
    - shard-kbl:          PASS -> FAIL [fdo#103166]

  
#### Possible fixes ####

  * igt@gem_ppgtt@blt-vs-render-ctxn:
    - shard-kbl:          INCOMPLETE [fdo#103665] / [fdo#106023] / [fdo#106887] -> PASS

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-b:
    - shard-kbl:          DMESG-WARN [fdo#107956] -> PASS

  * igt@kms_cursor_crc@cursor-128x128-sliding:
    - shard-kbl:          FAIL [fdo#103232] -> PASS +1
    - shard-apl:          FAIL [fdo#103232] -> PASS

  * igt@kms_cursor_crc@cursor-128x42-onscreen:
    - shard-glk:          FAIL [fdo#103232] -> PASS +2

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-fullscreen:
    - shard-apl:          FAIL [fdo#103167] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-onoff:
    - shard-glk:          FAIL [fdo#103167] -> PASS +3

  * igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb:
    - shard-glk:          FAIL [fdo#108145] -> PASS +2

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-x:
    - shard-apl:          FAIL [fdo#103166] -> PASS +1

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-yf:
    - shard-glk:          FAIL [fdo#103166] -> PASS

  
  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#105682]: https://bugs.freedesktop.org/show_bug.cgi?id=105682
  [fdo#106023]: https://bugs.freedesktop.org/show_bug.cgi?id=106023
  [fdo#106887]: https://bugs.freedesktop.org/show_bug.cgi?id=106887
  [fdo#107469]: https://bugs.freedesktop.org/show_bug.cgi?id=107469
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145


Participating hosts (7 -> 5)
------------------------------

  Missing    (2): shard-skl shard-iclb 


Build changes
-------------

    * IGT: IGT_4736 -> IGTPW_2114
    * Piglit: piglit_4509 -> None

  CI_DRM_5244: 12d14c46966c11670520eed3cd2f1f87c6af7948 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2114: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2114/
  IGT_4736: 285ebfb3b7adc56586031afa5150c4e5ad40c229 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2114/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-12-03 19:48 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-12-03 15:00 [igt-dev] [PATCH i-g-t v2] runner: Make taint abort messages more verbose Arkadiusz Hiler
2018-12-03 15:09 ` Chris Wilson
2018-12-03 15:45   ` Arkadiusz Hiler
2018-12-03 15:47 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2018-12-03 19:48 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox