intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged()
@ 2018-08-15  9:25 Chris Wilson
  2018-08-15  9:40 ` Chris Wilson
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: Chris Wilson @ 2018-08-15  9:25 UTC (permalink / raw)
  To: intel-gfx

If we already wedged, i915_gem_set_wedged() becomes a complicated no-op.

v2: Make sure the double set-wedged is synchronous, a parallel call
should not return before the driver is indeed wedged.

References: https://bugs.freedesktop.org/show_bug.cgi?id=107343
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c       | 33 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gpu_error.h |  3 ++-
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0453eb42a1a3..4778d324529e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3298,12 +3298,27 @@ static void nop_complete_submit_request(struct i915_request *request)
 	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
 }
 
+static void wait_for_wedged(struct i915_gpu_error *error)
+{
+	DEFINE_WAIT_BIT(wq_entry, &error->flags, I915_WEDGED);
+
+	__wait_on_bit(&error->reset_queue,
+		      &wq_entry, bit_wait, TASK_UNINTERRUPTIBLE);
+}
+
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	GEM_TRACE("start\n");
+	if (test_bit(I915_WEDGED, &error->flags))
+		return;
+
+	if (test_and_set_bit(I915_WEDGE_IN_PROGRESS, &error->flags)) {
+		wait_for_wedged(error);
+		return;
+	}
 
 	if (GEM_SHOW_DEBUG()) {
 		struct drm_printer p = drm_debug_printer(__func__);
@@ -3312,8 +3327,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
-		goto out;
+	GEM_TRACE("start\n");
 
 	/*
 	 * First, stop submission to hw, but do not yet complete requests by
@@ -3373,18 +3387,25 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		i915_gem_reset_finish_engine(engine);
 	}
 
-out:
+	smp_mb__before_atomic();
+	set_bit(I915_WEDGED, &error->flags);
+	clear_bit(I915_WEDGE_IN_PROGRESS, &error->flags);
+
 	GEM_TRACE("end\n");
 
-	wake_up_all(&i915->gpu_error.reset_queue);
+	wake_up_all(&error->reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct i915_timeline *tl;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+
+	if (test_bit(I915_WEDGE_IN_PROGRESS, &error->flags))
+		wait_for_wedged(error);
+	if (!test_bit(I915_WEDGED, &error->flags))
 		return true;
 
 	GEM_TRACE("start\n");
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index f893a4e8b783..1a78a8f330f2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -267,8 +267,9 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF	0
 #define I915_RESET_HANDOFF	1
 #define I915_RESET_MODESET	2
+#define I915_RESET_ENGINE	3
 #define I915_WEDGED		(BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
+#define I915_WEDGE_IN_PROGRESS	(I915_WEDGED - 1)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged()
  2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
@ 2018-08-15  9:40 ` Chris Wilson
  2018-08-15  9:42 ` Chris Wilson
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2018-08-15  9:40 UTC (permalink / raw)
  To: intel-gfx

If we already wedged, i915_gem_set_wedged() becomes a complicated no-op.

v2: Make sure the double set-wedged is synchronous, a parallel call
should not return before the driver is indeed wedged.
v3: Use a real mutex (and fingers crossed that lockdep is ok!)

References: https://bugs.freedesktop.org/show_bug.cgi?id=107343
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c               | 36 +++++++++++++------
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0453eb42a1a3..d9e00705536d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3300,10 +3300,15 @@ static void nop_complete_submit_request(struct i915_request *request)
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	GEM_TRACE("start\n");
+	mutex_lock(&error->wedge_mutex);
+	if (test_bit(I915_WEDGED, &error->flags)) {
+		mutex_unlock(&error->wedge_mutex);
+		return;
+	}
 
 	if (GEM_SHOW_DEBUG()) {
 		struct drm_printer p = drm_debug_printer(__func__);
@@ -3312,8 +3317,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
-		goto out;
+	GEM_TRACE("start\n");
 
 	/*
 	 * First, stop submission to hw, but do not yet complete requests by
@@ -3373,19 +3377,27 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		i915_gem_reset_finish_engine(engine);
 	}
 
-out:
+	smp_mb__before_atomic();
+	set_bit(I915_WEDGED, &error->flags);
+
 	GEM_TRACE("end\n");
+	mutex_unlock(&error->wedge_mutex);
 
-	wake_up_all(&i915->gpu_error.reset_queue);
+	wake_up_all(&error->reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct i915_timeline *tl;
+	bool ret = false;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
-		return true;
+
+	if (!test_bit(I915_WEDGED, &error->flags))
+		return false;
+
+	mutex_lock(&error->wedge_mutex);
 
 	GEM_TRACE("start\n");
 
@@ -3420,7 +3432,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 */
 		if (dma_fence_default_wait(&rq->fence, true,
 					   MAX_SCHEDULE_TIMEOUT) < 0)
-			return false;
+			goto unlock;
 	}
 	i915_retire_requests(i915);
 	GEM_BUG_ON(i915->gt.active_requests);
@@ -3439,10 +3451,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 
 	GEM_TRACE("end\n");
 
-	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
-	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+	ret = true;
+unlock:
+	mutex_unlock(&i915->gpu_error.wedge_mutex);
 
-	return true;
+	return ret;
 }
 
 static void
@@ -5764,6 +5777,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 			  i915_gem_idle_work_handler);
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
+	mutex_init(&dev_priv->gpu_error.wedge_mutex);
 
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index f893a4e8b783..49950b1231cf 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -267,8 +267,8 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF	0
 #define I915_RESET_HANDOFF	1
 #define I915_RESET_MODESET	2
+#define I915_RESET_ENGINE	3
 #define I915_WEDGED		(BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
@@ -279,6 +279,8 @@ struct i915_gpu_error {
 	/** Reason for the current *global* reset */
 	const char *reason;
 
+	struct mutex wedge_mutex; /* serialises wedging/unwedging */
+
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
 	 * to release the struct_mutex for the reset to procede.
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 43ed8b28aeaa..6beb1f47e988 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -186,6 +186,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
+	mutex_init(&i915->gpu_error.wedge_mutex);
 
 	i915->wq = alloc_ordered_workqueue("mock", 0);
 	if (!i915->wq)
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged()
  2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
  2018-08-15  9:40 ` Chris Wilson
@ 2018-08-15  9:42 ` Chris Wilson
  2018-08-15 10:13 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev6) Patchwork
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2018-08-15  9:42 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2018-08-15 10:25:37)
> If we already wedged, i915_gem_set_wedged() becomes a complicated no-op.
> 
> v2: Make sure the double set-wedged is synchronous, a parallel call
> should not return before the driver is indeed wedged.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=107343
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Fwiw, I actually a landed a different version of this patch earlier.

The only advantage (esp. v3) is that we serialise the wedging more
carefully.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev6)
  2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
  2018-08-15  9:40 ` Chris Wilson
  2018-08-15  9:42 ` Chris Wilson
@ 2018-08-15 10:13 ` Patchwork
  2018-08-15 10:32 ` ✗ Fi.CI.BAT: failure " Patchwork
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-08-15 10:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev6)
URL   : https://patchwork.freedesktop.org/series/47067/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
d5ec539e7d20 drm/i915: Skip repeated calls to i915_gem_set_wedged()
-:52: WARNING:MEMORY_BARRIER: memory barrier without comment
#52: FILE: drivers/gpu/drm/i915/i915_gem.c:3380:
+	smp_mb__before_atomic();

total: 0 errors, 1 warnings, 0 checks, 109 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev6)
  2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
                   ` (2 preceding siblings ...)
  2018-08-15 10:13 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev6) Patchwork
@ 2018-08-15 10:32 ` Patchwork
  2018-08-15 12:45 ` [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-08-15 10:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev6)
URL   : https://patchwork.freedesktop.org/series/47067/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4670 -> Patchwork_9947 =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_9947 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9947, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/47067/revisions/6/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9947:

  === IGT changes ===

    ==== Possible regressions ====

    igt@drv_selftest@live_hangcheck:
      {fi-bdw-samus}:     PASS -> DMESG-FAIL
      fi-hsw-peppy:       PASS -> DMESG-FAIL
      fi-cnl-psr:         PASS -> DMESG-FAIL
      fi-kbl-7500u:       PASS -> DMESG-FAIL
      fi-hsw-4770r:       PASS -> DMESG-FAIL
      fi-kbl-7560u:       PASS -> DMESG-FAIL
      fi-bdw-5557u:       PASS -> DMESG-FAIL
      fi-skl-6700hq:      PASS -> DMESG-FAIL
      fi-skl-gvtdvm:      PASS -> DMESG-FAIL
      fi-skl-6700k2:      PASS -> DMESG-FAIL
      fi-elk-e7500:       PASS -> DMESG-FAIL
      fi-byt-j1900:       PASS -> DMESG-FAIL
      fi-blb-e6850:       PASS -> DMESG-FAIL
      fi-cfl-guc:         PASS -> DMESG-FAIL
      fi-skl-guc:         PASS -> DMESG-FAIL
      fi-skl-6600u:       PASS -> DMESG-FAIL
      fi-pnv-d510:        NOTRUN -> DMESG-FAIL
      {fi-bsw-kefka}:     PASS -> DMESG-FAIL
      fi-cfl-8700k:       PASS -> DMESG-FAIL
      fi-kbl-r:           PASS -> DMESG-FAIL
      fi-byt-n2820:       PASS -> DMESG-FAIL
      {fi-byt-clapper}:   PASS -> DMESG-FAIL
      {fi-cfl-8109u}:     PASS -> DMESG-FAIL
      fi-kbl-guc:         PASS -> DMESG-FAIL
      fi-cfl-s3:          PASS -> DMESG-FAIL
      fi-gdg-551:         PASS -> DMESG-FAIL
      fi-bwr-2160:        PASS -> DMESG-FAIL
      fi-snb-2600:        PASS -> DMESG-FAIL
      fi-skl-6770hq:      PASS -> DMESG-FAIL
      fi-whl-u:           PASS -> DMESG-FAIL
      fi-ivb-3520m:       PASS -> DMESG-FAIL
      fi-hsw-4770:        PASS -> DMESG-FAIL
      fi-bxt-j4205:       PASS -> DMESG-FAIL
      fi-skl-6260u:       PASS -> DMESG-FAIL
      {fi-skl-iommu}:     PASS -> DMESG-FAIL
      fi-glk-j4005:       PASS -> DMESG-FAIL
      fi-ivb-3770:        PASS -> DMESG-FAIL
      fi-ilk-650:         PASS -> DMESG-FAIL
      fi-bsw-n3050:       PASS -> DMESG-FAIL
      fi-bdw-gvtdvm:      PASS -> DMESG-FAIL
      {fi-icl-u}:         PASS -> DMESG-FAIL
      fi-kbl-x1275:       PASS -> DMESG-FAIL
      fi-kbl-7567u:       PASS -> DMESG-FAIL
      fi-glk-dsi:         PASS -> DMESG-FAIL

    
== Known issues ==

  Here are the changes found in Patchwork_9947 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    {igt@amdgpu/amd_basic@userptr}:
      {fi-kbl-8809g}:     PASS -> INCOMPLETE (fdo#107402)

    igt@drv_selftest@live_coherency:
      fi-gdg-551:         PASS -> DMESG-FAIL (fdo#107164)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      {fi-byt-clapper}:   PASS -> FAIL (fdo#107362, fdo#103191)

    
    ==== Possible fixes ====

    igt@kms_pipe_crc_basic@hang-read-crc-pipe-a:
      fi-skl-6700k2:      FAIL (fdo#103191) -> PASS

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b-frame-sequence:
      {fi-byt-clapper}:   FAIL (fdo#107362, fdo#103191) -> PASS

    
    ==== Warnings ====

    {igt@kms_psr@primary_page_flip}:
      fi-cnl-psr:         DMESG-FAIL (fdo#107372) -> DMESG-WARN (fdo#107372)

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#107164 https://bugs.freedesktop.org/show_bug.cgi?id=107164
  fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
  fdo#107372 https://bugs.freedesktop.org/show_bug.cgi?id=107372
  fdo#107402 https://bugs.freedesktop.org/show_bug.cgi?id=107402


== Participating hosts (53 -> 48) ==

  Additional (1): fi-pnv-d510 
  Missing    (6): fi-ilk-m540 fi-bxt-dsi fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 


== Build changes ==

    * Linux: CI_DRM_4670 -> Patchwork_9947

  CI_DRM_4670: 36137e724bdaee1ab5cee7142d06898d45b8aeb3 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4598: 9c0f04355107a8693650b16756b6343a78501138 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9947: d5ec539e7d207822aba0c0a6671e0fbfbe31882d @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

d5ec539e7d20 drm/i915: Skip repeated calls to i915_gem_set_wedged()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9947/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged()
  2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
                   ` (3 preceding siblings ...)
  2018-08-15 10:32 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2018-08-15 12:45 ` Chris Wilson
  2018-08-15 13:06 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7) Patchwork
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2018-08-15 12:45 UTC (permalink / raw)
  To: intel-gfx

If we already wedged, i915_gem_set_wedged() becomes a complicated no-op.

v2: Make sure the double set-wedged is synchronous, a parallel call
should not return before the driver is indeed wedged.
v3: Use a real mutex (and fingers crossed that lockdep is ok!)
v4: Fix incorrect conversion to return false if unwedged.

References: https://bugs.freedesktop.org/show_bug.cgi?id=107343
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c               | 34 +++++++++++++------
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0453eb42a1a3..98a3e2601ab4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3300,10 +3300,15 @@ static void nop_complete_submit_request(struct i915_request *request)
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	GEM_TRACE("start\n");
+	mutex_lock(&error->wedge_mutex);
+	if (test_bit(I915_WEDGED, &error->flags)) {
+		mutex_unlock(&error->wedge_mutex);
+		return;
+	}
 
 	if (GEM_SHOW_DEBUG()) {
 		struct drm_printer p = drm_debug_printer(__func__);
@@ -3312,8 +3317,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
-		goto out;
+	GEM_TRACE("start\n");
 
 	/*
 	 * First, stop submission to hw, but do not yet complete requests by
@@ -3373,20 +3377,28 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		i915_gem_reset_finish_engine(engine);
 	}
 
-out:
+	smp_mb__before_atomic();
+	set_bit(I915_WEDGED, &error->flags);
+
 	GEM_TRACE("end\n");
+	mutex_unlock(&error->wedge_mutex);
 
-	wake_up_all(&i915->gpu_error.reset_queue);
+	wake_up_all(&error->reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct i915_timeline *tl;
+	bool ret = false;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+
+	if (!test_bit(I915_WEDGED, &error->flags))
 		return true;
 
+	mutex_lock(&error->wedge_mutex);
+
 	GEM_TRACE("start\n");
 
 	/*
@@ -3420,7 +3432,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 */
 		if (dma_fence_default_wait(&rq->fence, true,
 					   MAX_SCHEDULE_TIMEOUT) < 0)
-			return false;
+			goto unlock;
 	}
 	i915_retire_requests(i915);
 	GEM_BUG_ON(i915->gt.active_requests);
@@ -3439,10 +3451,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 
 	GEM_TRACE("end\n");
 
-	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
-	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+	ret = true;
+unlock:
+	mutex_unlock(&i915->gpu_error.wedge_mutex);
 
-	return true;
+	return ret;
 }
 
 static void
@@ -5764,6 +5777,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 			  i915_gem_idle_work_handler);
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
+	mutex_init(&dev_priv->gpu_error.wedge_mutex);
 
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index f893a4e8b783..49950b1231cf 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -267,8 +267,8 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF	0
 #define I915_RESET_HANDOFF	1
 #define I915_RESET_MODESET	2
+#define I915_RESET_ENGINE	3
 #define I915_WEDGED		(BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
@@ -279,6 +279,8 @@ struct i915_gpu_error {
 	/** Reason for the current *global* reset */
 	const char *reason;
 
+	struct mutex wedge_mutex; /* serialises wedging/unwedging */
+
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
 	 * to release the struct_mutex for the reset to procede.
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 43ed8b28aeaa..6beb1f47e988 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -186,6 +186,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
+	mutex_init(&i915->gpu_error.wedge_mutex);
 
 	i915->wq = alloc_ordered_workqueue("mock", 0);
 	if (!i915->wq)
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7)
  2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
                   ` (4 preceding siblings ...)
  2018-08-15 12:45 ` [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
@ 2018-08-15 13:06 ` Patchwork
  2018-08-15 13:23 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-08-15 15:53 ` ✗ Fi.CI.IGT: failure " Patchwork
  7 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-08-15 13:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7)
URL   : https://patchwork.freedesktop.org/series/47067/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
a2eac6ecd61d drm/i915: Skip repeated calls to i915_gem_set_wedged()
-:53: WARNING:MEMORY_BARRIER: memory barrier without comment
#53: FILE: drivers/gpu/drm/i915/i915_gem.c:3380:
+	smp_mb__before_atomic();

total: 0 errors, 1 warnings, 0 checks, 109 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7)
  2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
                   ` (5 preceding siblings ...)
  2018-08-15 13:06 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7) Patchwork
@ 2018-08-15 13:23 ` Patchwork
  2018-08-15 15:53 ` ✗ Fi.CI.IGT: failure " Patchwork
  7 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-08-15 13:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7)
URL   : https://patchwork.freedesktop.org/series/47067/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4671 -> Patchwork_9952 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/47067/revisions/7/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9952 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_hangcheck:
      fi-cfl-s3:          PASS -> DMESG-FAIL (fdo#106560)

    igt@gem_exec_suspend@basic-s4-devices:
      fi-kbl-7500u:       PASS -> DMESG-WARN (fdo#105128, fdo#107139)

    igt@kms_chamelium@dp-edid-read:
      fi-kbl-7500u:       PASS -> FAIL (fdo#103841)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-snb-2520m:       PASS -> INCOMPLETE (fdo#103713)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
      fi-bxt-dsi:         PASS -> INCOMPLETE (fdo#103927)

    
    ==== Possible fixes ====

    igt@kms_frontbuffer_tracking@basic:
      fi-hsw-peppy:       DMESG-FAIL (fdo#102614) -> PASS

    
    ==== Warnings ====

    {igt@kms_psr@primary_page_flip}:
      fi-cnl-psr:         DMESG-WARN (fdo#107372) -> DMESG-FAIL (fdo#107372)

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
  fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
  fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
  fdo#105128 https://bugs.freedesktop.org/show_bug.cgi?id=105128
  fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
  fdo#107139 https://bugs.freedesktop.org/show_bug.cgi?id=107139
  fdo#107372 https://bugs.freedesktop.org/show_bug.cgi?id=107372


== Participating hosts (53 -> 48) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4671 -> Patchwork_9952

  CI_DRM_4671: 77a98fa3e9b6eb29d513b1666ecddfdcfc424e86 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4598: 9c0f04355107a8693650b16756b6343a78501138 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9952: a2eac6ecd61d49472c529e17117a72d44e3bfd0e @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

a2eac6ecd61d drm/i915: Skip repeated calls to i915_gem_set_wedged()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9952/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7)
  2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
                   ` (6 preceding siblings ...)
  2018-08-15 13:23 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-08-15 15:53 ` Patchwork
  7 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-08-15 15:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7)
URL   : https://patchwork.freedesktop.org/series/47067/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4671_full -> Patchwork_9952_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_9952_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9952_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9952_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@drv_selftest@live_execlists:
      shard-hsw:          PASS -> DMESG-WARN +15

    igt@drv_selftest@live_objects:
      shard-glk:          PASS -> DMESG-WARN +14

    igt@gem_eio@execbuf:
      shard-glk:          PASS -> DMESG-FAIL +21

    igt@gem_eio@in-flight-contexts-immediate:
      shard-hsw:          PASS -> DMESG-FAIL +19

    igt@gem_eio@in-flight-immediate:
      shard-apl:          PASS -> DMESG-FAIL +20

    igt@gem_eio@in-flight-internal-1us:
      shard-hsw:          SKIP -> DMESG-FAIL

    igt@gem_eio@in-flight-suspend:
      shard-kbl:          PASS -> DMESG-FAIL +22

    igt@gem_eio@throttle:
      shard-snb:          PASS -> DMESG-FAIL +18

    igt@kms_draw_crc@draw-method-rgb565-render-xtiled:
      shard-apl:          SKIP -> FAIL +1
      shard-glk:          SKIP -> FAIL +1
      shard-hsw:          SKIP -> FAIL

    igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-mmap-gtt:
      shard-snb:          PASS -> FAIL +6

    igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-draw-pwrite:
      shard-hsw:          PASS -> FAIL +50

    igt@kms_frontbuffer_tracking@fbc-rgb565-draw-render:
      shard-snb:          SKIP -> FAIL +1

    igt@perf@sysctl-defaults:
      shard-apl:          PASS -> DMESG-WARN
      shard-kbl:          PASS -> DMESG-WARN

    igt@pm_rpm@cursor-dpms:
      shard-glk:          PASS -> FAIL +6

    igt@pm_rpm@gem-mmap-cpu:
      shard-apl:          PASS -> FAIL +10

    igt@pm_rpm@pm-caching:
      shard-kbl:          PASS -> FAIL

    
    ==== Warnings ====

    igt@gem_exec_reloc@basic-write-read:
      shard-hsw:          PASS -> SKIP +242

    igt@gem_exec_store@basic-vebox:
      shard-kbl:          PASS -> SKIP +169

    igt@gem_mocs_settings@mocs-reset-render:
      shard-apl:          PASS -> SKIP +219

    igt@gem_pwrite_pread@snooped-pwrite-blt-cpu_mmap-correctness:
      shard-snb:          PASS -> SKIP +106

    igt@gem_userptr_blits@map-fixed-invalidate-overlap:
      shard-glk:          PASS -> SKIP +277

    
== Known issues ==

  Here are the changes found in Patchwork_9952_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_cursor_legacy@cursor-vs-flip-varying-size:
      shard-hsw:          PASS -> FAIL (fdo#103355)

    igt@kms_draw_crc@draw-method-rgb565-render-xtiled:
      shard-kbl:          SKIP -> FAIL (fdo#106064)

    igt@kms_draw_crc@draw-method-xrgb2101010-render-untiled:
      shard-kbl:          PASS -> FAIL (fdo#106064)

    igt@kms_flip@2x-flip-vs-expired-vblank:
      shard-glk:          PASS -> FAIL (fdo#105363)

    igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-mmap-gtt:
      shard-kbl:          PASS -> FAIL (fdo#106067, fdo#107260) +4

    igt@kms_frontbuffer_tracking@fbc-2p-rte:
      shard-hsw:          PASS -> FAIL (fdo#105682) +1

    igt@kms_frontbuffer_tracking@fbc-rgb101010-draw-render:
      shard-apl:          PASS -> FAIL (fdo#107260) +22

    igt@kms_frontbuffer_tracking@fbc-rgb565-draw-render:
      shard-glk:          SKIP -> FAIL (fdo#107260)
      shard-kbl:          SKIP -> FAIL (fdo#106067, fdo#107260)
      shard-apl:          SKIP -> FAIL (fdo#107260)

    igt@kms_plane_lowres@pipe-a-tiling-y:
      shard-kbl:          PASS -> FAIL (fdo#106066) +6

    igt@kms_rotation_crc@sprite-rotation-90:
      shard-glk:          PASS -> FAIL (fdo#107260) +62

    igt@kms_setmode@basic:
      shard-apl:          PASS -> FAIL (fdo#99912)

    
    ==== Possible fixes ====

    igt@gem_exec_schedule@pi-ringfull-blt:
      shard-glk:          FAIL (fdo#103158) -> SKIP +1
      shard-apl:          FAIL (fdo#103158) -> SKIP +1

    igt@gem_exec_schedule@pi-ringfull-render:
      shard-kbl:          FAIL (fdo#103158) -> SKIP +1

    igt@kms_flip@2x-flip-vs-expired-vblank-interruptible:
      shard-glk:          FAIL (fdo#105363) -> PASS

    igt@kms_setmode@basic:
      shard-kbl:          FAIL (fdo#99912) -> PASS

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#103158 https://bugs.freedesktop.org/show_bug.cgi?id=103158
  fdo#103355 https://bugs.freedesktop.org/show_bug.cgi?id=103355
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  fdo#105682 https://bugs.freedesktop.org/show_bug.cgi?id=105682
  fdo#106064 https://bugs.freedesktop.org/show_bug.cgi?id=106064
  fdo#106066 https://bugs.freedesktop.org/show_bug.cgi?id=106066
  fdo#106067 https://bugs.freedesktop.org/show_bug.cgi?id=106067
  fdo#107260 https://bugs.freedesktop.org/show_bug.cgi?id=107260
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4671 -> Patchwork_9952

  CI_DRM_4671: 77a98fa3e9b6eb29d513b1666ecddfdcfc424e86 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4598: 9c0f04355107a8693650b16756b6343a78501138 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9952: a2eac6ecd61d49472c529e17117a72d44e3bfd0e @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9952/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-08-15 15:53 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-08-15  9:25 [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
2018-08-15  9:40 ` Chris Wilson
2018-08-15  9:42 ` Chris Wilson
2018-08-15 10:13 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev6) Patchwork
2018-08-15 10:32 ` ✗ Fi.CI.BAT: failure " Patchwork
2018-08-15 12:45 ` [PATCH] drm/i915: Skip repeated calls to i915_gem_set_wedged() Chris Wilson
2018-08-15 13:06 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev7) Patchwork
2018-08-15 13:23 ` ✓ Fi.CI.BAT: success " Patchwork
2018-08-15 15:53 ` ✗ Fi.CI.IGT: failure " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).