Intel-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks
@ 2022-12-23 12:18 Andi Shyti
  2022-12-23 13:15 ` [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2) Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Andi Shyti @ 2022-12-23 12:18 UTC (permalink / raw)
  To: intel-gfx, dri-devel; +Cc: Andrzej Hajda, Chris Wilson

From: Chris Wilson <chris@chris-wilson.co.uk>

On Haswell, in particular, we see an issue where resets fails because
the engine resumes from an incorrect RING_HEAD. Since the RING_HEAD
doesn't point to the remaining requests to re-run, but may instead point
into the uninitialised portion of the ring, the GPU may be then fed
invalid instructions from a privileged context, often pushing the GPU
into an unrecoverable hang.

If at first the write doesn't succeed, try, try again.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/5432
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3303
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 44 +++++++++++++------
 drivers/gpu/drm/i915/i915_utils.h             |  8 ++++
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 827adb0cfaea6..cdf283f5b1427 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -192,6 +192,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
 static int xcs_resume(struct intel_engine_cs *engine)
 {
 	struct intel_ring *ring = engine->legacy.ring;
+	ktime_t kt;
 
 	ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
 		     ring->head, ring->tail);
@@ -230,9 +231,20 @@ static int xcs_resume(struct intel_engine_cs *engine)
 	set_pp_dir(engine);
 
 	/* First wake the ring up to an empty/idle ring */
-	ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
+	until_timeout_ns(kt, 2 * NSEC_PER_MSEC) {
+		ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
+		if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
+			break;
+	}
+
 	ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
-	ENGINE_POSTING_READ(engine, RING_TAIL);
+	if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
+		ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
+			     ENGINE_READ_FW(engine, RING_HEAD),
+			     ENGINE_READ_FW(engine, RING_TAIL),
+			     ring->head);
+		goto err;
+	}
 
 	ENGINE_WRITE_FW(engine, RING_CTL,
 			RING_CTL_SIZE(ring->size) | RING_VALID);
@@ -241,12 +253,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
 	if (__intel_wait_for_register_fw(engine->uncore,
 					 RING_CTL(engine->mmio_base),
 					 RING_VALID, RING_VALID,
-					 5000, 0, NULL))
+					 5000, 0, NULL)) {
+		ENGINE_TRACE(engine, "failed to restart\n");
 		goto err;
+	}
 
-	if (GRAPHICS_VER(engine->i915) > 2)
+	if (GRAPHICS_VER(engine->i915) > 2) {
 		ENGINE_WRITE_FW(engine,
 				RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+		ENGINE_POSTING_READ(engine, RING_MI_MODE);
+	}
 
 	/* Now awake, let it get started */
 	if (ring->tail != ring->head) {
@@ -259,16 +275,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
 	return 0;
 
 err:
-	drm_err(&engine->i915->drm,
-		"%s initialization failed; "
-		"ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
-		engine->name,
-		ENGINE_READ(engine, RING_CTL),
-		ENGINE_READ(engine, RING_CTL) & RING_VALID,
-		ENGINE_READ(engine, RING_HEAD), ring->head,
-		ENGINE_READ(engine, RING_TAIL), ring->tail,
-		ENGINE_READ(engine, RING_START),
-		i915_ggtt_offset(ring->vma));
+	ENGINE_TRACE(engine,
+		     "initialization failed; "
+		     "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
+		     ENGINE_READ(engine, RING_CTL),
+		     ENGINE_READ(engine, RING_CTL) & RING_VALID,
+		     ENGINE_READ(engine, RING_HEAD), ring->head,
+		     ENGINE_READ(engine, RING_TAIL), ring->tail,
+		     ENGINE_READ(engine, RING_START),
+		     i915_ggtt_offset(ring->vma));
+	GEM_TRACE_DUMP();
 	return -EIO;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index b64192d9c7daa..f24a25c0685e1 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -254,6 +254,14 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 	}
 }
 
+/**
+ * until_timeout_ns - Keep retrying (busy spin) until the duration has passed
+ */
+#define until_timeout_ns(end, timeout_ns) \
+	for ((end) = ktime_get() + (timeout_ns); \
+	     ktime_before(ktime_get(), (end)); \
+	     cpu_relax())
+
 /**
  * __wait_for - magic wait macro
  *
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2)
  2022-12-23 12:18 [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks Andi Shyti
@ 2022-12-23 13:15 ` Patchwork
  2022-12-23 13:21 ` [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks Tvrtko Ursulin
  2022-12-23 14:30 ` [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2) Patchwork
  2 siblings, 0 replies; 5+ messages in thread
From: Patchwork @ 2022-12-23 13:15 UTC (permalink / raw)
  To: Mauro Carvalho Chehab; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 5305 bytes --]

== Series Details ==

Series: drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2)
URL   : https://patchwork.freedesktop.org/series/106377/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12524 -> Patchwork_106377v2
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/index.html

Participating hosts (45 -> 46)
------------------------------

  Additional (1): bat-atsm-1 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_106377v2:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live@workarounds:
    - {bat-rpls-2}:       [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/bat-rpls-2/igt@i915_selftest@live@workarounds.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/bat-rpls-2/igt@i915_selftest@live@workarounds.html

  
Known issues
------------

  Here are the changes found in Patchwork_106377v2 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live@gt_heartbeat:
    - fi-kbl-soraka:      [PASS][3] -> [DMESG-FAIL][4] ([i915#5334])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/fi-kbl-soraka/igt@i915_selftest@live@gt_heartbeat.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/fi-kbl-soraka/igt@i915_selftest@live@gt_heartbeat.html

  
#### Possible fixes ####

  * igt@gem_exec_gttfill@basic:
    - fi-pnv-d510:        [FAIL][5] ([i915#7229]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/fi-pnv-d510/igt@gem_exec_gttfill@basic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/fi-pnv-d510/igt@gem_exec_gttfill@basic.html

  * igt@i915_selftest@live@migrate:
    - bat-adlp-4:         [DMESG-FAIL][7] ([i915#7699]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/bat-adlp-4/igt@i915_selftest@live@migrate.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/bat-adlp-4/igt@i915_selftest@live@migrate.html

  * igt@i915_selftest@live@slpc:
    - {bat-adln-1}:       [DMESG-FAIL][9] ([i915#6997]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/bat-adln-1/igt@i915_selftest@live@slpc.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/bat-adln-1/igt@i915_selftest@live@slpc.html

  * igt@i915_selftest@live@workarounds:
    - {bat-rpls-1}:       [DMESG-WARN][11] -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/bat-rpls-1/igt@i915_selftest@live@workarounds.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/bat-rpls-1/igt@i915_selftest@live@workarounds.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#1836]: https://gitlab.freedesktop.org/drm/intel/issues/1836
  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4983]: https://gitlab.freedesktop.org/drm/intel/issues/4983
  [i915#5334]: https://gitlab.freedesktop.org/drm/intel/issues/5334
  [i915#6077]: https://gitlab.freedesktop.org/drm/intel/issues/6077
  [i915#6078]: https://gitlab.freedesktop.org/drm/intel/issues/6078
  [i915#6093]: https://gitlab.freedesktop.org/drm/intel/issues/6093
  [i915#6094]: https://gitlab.freedesktop.org/drm/intel/issues/6094
  [i915#6166]: https://gitlab.freedesktop.org/drm/intel/issues/6166
  [i915#6257]: https://gitlab.freedesktop.org/drm/intel/issues/6257
  [i915#6311]: https://gitlab.freedesktop.org/drm/intel/issues/6311
  [i915#6621]: https://gitlab.freedesktop.org/drm/intel/issues/6621
  [i915#6645]: https://gitlab.freedesktop.org/drm/intel/issues/6645
  [i915#6997]: https://gitlab.freedesktop.org/drm/intel/issues/6997
  [i915#7229]: https://gitlab.freedesktop.org/drm/intel/issues/7229
  [i915#7357]: https://gitlab.freedesktop.org/drm/intel/issues/7357
  [i915#7699]: https://gitlab.freedesktop.org/drm/intel/issues/7699


Build changes
-------------

  * Linux: CI_DRM_12524 -> Patchwork_106377v2

  CI-20190529: 20190529
  CI_DRM_12524: a29956c69a562e85ef8657e39382bc207a339941 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7102: bacfdc84a9c02556c5441deb21e3a3f18a07347d @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_106377v2: a29956c69a562e85ef8657e39382bc207a339941 @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

408162e35b81 drm/i915/gt: Retry RING_HEAD reset until it sticks

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/index.html

[-- Attachment #2: Type: text/html, Size: 4768 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks
  2022-12-23 12:18 [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks Andi Shyti
  2022-12-23 13:15 ` [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2) Patchwork
@ 2022-12-23 13:21 ` Tvrtko Ursulin
  2022-12-27 16:33   ` Jani Nikula
  2022-12-23 14:30 ` [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2) Patchwork
  2 siblings, 1 reply; 5+ messages in thread
From: Tvrtko Ursulin @ 2022-12-23 13:21 UTC (permalink / raw)
  To: Andi Shyti, intel-gfx, dri-devel; +Cc: Andrzej Hajda


On 23/12/2022 12:18, Andi Shyti wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> On Haswell, in particular, we see an issue where resets fails because
> the engine resumes from an incorrect RING_HEAD. Since the RING_HEAD
> doesn't point to the remaining requests to re-run, but may instead point
> into the uninitialised portion of the ring, the GPU may be then fed
> invalid instructions from a privileged context, often pushing the GPU
> into an unrecoverable hang.
> 
> If at first the write doesn't succeed, try, try again.
> 
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/5432
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/3303
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
> ---
>   .../gpu/drm/i915/gt/intel_ring_submission.c   | 44 +++++++++++++------
>   drivers/gpu/drm/i915/i915_utils.h             |  8 ++++
>   2 files changed, 38 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index 827adb0cfaea6..cdf283f5b1427 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -192,6 +192,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
>   static int xcs_resume(struct intel_engine_cs *engine)
>   {
>   	struct intel_ring *ring = engine->legacy.ring;
> +	ktime_t kt;
>   
>   	ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
>   		     ring->head, ring->tail);
> @@ -230,9 +231,20 @@ static int xcs_resume(struct intel_engine_cs *engine)
>   	set_pp_dir(engine);
>   
>   	/* First wake the ring up to an empty/idle ring */
> -	ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
> +	until_timeout_ns(kt, 2 * NSEC_PER_MSEC) {
> +		ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
> +		if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
> +			break;
> +	}

2ms?! Shudder..

#define done \
({ \
	ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); \
	ENGINE_READ_FW(engine, RING_HEAD) == ring->head; \
})
_wait_for_atomic(done, 2 * USEC_PER_MSEC, needs_to_be_atomic_or_not?);
#undef done

Should work and avoid the need to add yet another helper, please 
double-check. Not as pretty, but accumulating generic sounding helpers 
in i915_utils.h is a bit frowned upon.

Regards,

Tvrtko

> +
>   	ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
> -	ENGINE_POSTING_READ(engine, RING_TAIL);
> +	if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
> +		ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
> +			     ENGINE_READ_FW(engine, RING_HEAD),
> +			     ENGINE_READ_FW(engine, RING_TAIL),
> +			     ring->head);
> +		goto err;
> +	}
>   
>   	ENGINE_WRITE_FW(engine, RING_CTL,
>   			RING_CTL_SIZE(ring->size) | RING_VALID);
> @@ -241,12 +253,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
>   	if (__intel_wait_for_register_fw(engine->uncore,
>   					 RING_CTL(engine->mmio_base),
>   					 RING_VALID, RING_VALID,
> -					 5000, 0, NULL))
> +					 5000, 0, NULL)) {
> +		ENGINE_TRACE(engine, "failed to restart\n");
>   		goto err;
> +	}
>   
> -	if (GRAPHICS_VER(engine->i915) > 2)
> +	if (GRAPHICS_VER(engine->i915) > 2) {
>   		ENGINE_WRITE_FW(engine,
>   				RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
> +		ENGINE_POSTING_READ(engine, RING_MI_MODE);
> +	}
>   
>   	/* Now awake, let it get started */
>   	if (ring->tail != ring->head) {
> @@ -259,16 +275,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
>   	return 0;
>   
>   err:
> -	drm_err(&engine->i915->drm,
> -		"%s initialization failed; "
> -		"ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
> -		engine->name,
> -		ENGINE_READ(engine, RING_CTL),
> -		ENGINE_READ(engine, RING_CTL) & RING_VALID,
> -		ENGINE_READ(engine, RING_HEAD), ring->head,
> -		ENGINE_READ(engine, RING_TAIL), ring->tail,
> -		ENGINE_READ(engine, RING_START),
> -		i915_ggtt_offset(ring->vma));
> +	ENGINE_TRACE(engine,
> +		     "initialization failed; "
> +		     "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
> +		     ENGINE_READ(engine, RING_CTL),
> +		     ENGINE_READ(engine, RING_CTL) & RING_VALID,
> +		     ENGINE_READ(engine, RING_HEAD), ring->head,
> +		     ENGINE_READ(engine, RING_TAIL), ring->tail,
> +		     ENGINE_READ(engine, RING_START),
> +		     i915_ggtt_offset(ring->vma));
> +	GEM_TRACE_DUMP();
>   	return -EIO;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index b64192d9c7daa..f24a25c0685e1 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -254,6 +254,14 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
>   	}
>   }
>   
> +/**
> + * until_timeout_ns - Keep retrying (busy spin) until the duration has passed
> + */
> +#define until_timeout_ns(end, timeout_ns) \
> +	for ((end) = ktime_get() + (timeout_ns); \
> +	     ktime_before(ktime_get(), (end)); \
> +	     cpu_relax())
> + >   /**
>    * __wait_for - magic wait macro
>    *

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2)
  2022-12-23 12:18 [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks Andi Shyti
  2022-12-23 13:15 ` [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2) Patchwork
  2022-12-23 13:21 ` [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks Tvrtko Ursulin
@ 2022-12-23 14:30 ` Patchwork
  2 siblings, 0 replies; 5+ messages in thread
From: Patchwork @ 2022-12-23 14:30 UTC (permalink / raw)
  To: Mauro Carvalho Chehab; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 17316 bytes --]

== Series Details ==

Series: drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2)
URL   : https://patchwork.freedesktop.org/series/106377/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12524_full -> Patchwork_106377v2_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/index.html

Participating hosts (13 -> 10)
------------------------------

  Missing    (3): pig-skl-6260u pig-kbl-iris pig-glk-j5005 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_106377v2_full:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_exec_whisper@basic-contexts-priority-all:
    - {shard-rkl}:        [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-1/igt@gem_exec_whisper@basic-contexts-priority-all.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-5/igt@gem_exec_whisper@basic-contexts-priority-all.html

  
Known issues
------------

  Here are the changes found in Patchwork_106377v2_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-glk:          [PASS][3] -> [FAIL][4] ([i915#2842]) +2 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-glk5/igt@gem_exec_fair@basic-throttle@rcs0.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-glk5/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@kms_flip@plain-flip-fb-recreate@c-hdmi-a1:
    - shard-glk:          [PASS][5] -> [FAIL][6] ([i915#2122])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-glk4/igt@kms_flip@plain-flip-fb-recreate@c-hdmi-a1.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-glk1/igt@kms_flip@plain-flip-fb-recreate@c-hdmi-a1.html

  
#### Possible fixes ####

  * igt@api_intel_bb@object-reloc-keep-cache:
    - {shard-rkl}:        [SKIP][7] ([i915#3281]) -> [PASS][8] +4 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-1/igt@api_intel_bb@object-reloc-keep-cache.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-5/igt@api_intel_bb@object-reloc-keep-cache.html

  * igt@gem_ctx_persistence@engines-hang@bcs0:
    - {shard-rkl}:        [SKIP][9] ([i915#6252]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-5/igt@gem_ctx_persistence@engines-hang@bcs0.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-1/igt@gem_ctx_persistence@engines-hang@bcs0.html

  * igt@gem_exec_fair@basic-none@vcs0:
    - {shard-rkl}:        [FAIL][11] ([i915#2842]) -> [PASS][12] +2 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-6/igt@gem_exec_fair@basic-none@vcs0.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-5/igt@gem_exec_fair@basic-none@vcs0.html

  * igt@gem_pread@snoop:
    - {shard-rkl}:        [SKIP][13] ([i915#3282]) -> [PASS][14] +2 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-6/igt@gem_pread@snoop.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-5/igt@gem_pread@snoop.html

  * igt@gen9_exec_parse@bb-start-param:
    - {shard-rkl}:        [SKIP][15] ([i915#2527]) -> [PASS][16] +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-6/igt@gen9_exec_parse@bb-start-param.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-5/igt@gen9_exec_parse@bb-start-param.html

  * igt@i915_hangman@gt-engine-error@bcs0:
    - {shard-rkl}:        [SKIP][17] ([i915#6258]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-5/igt@i915_hangman@gt-engine-error@bcs0.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-1/igt@i915_hangman@gt-engine-error@bcs0.html

  * igt@i915_pm_rpm@modeset-non-lpsp:
    - {shard-dg1}:        [SKIP][19] ([i915#1397]) -> [PASS][20] +3 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-dg1-14/igt@i915_pm_rpm@modeset-non-lpsp.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-dg1-17/igt@i915_pm_rpm@modeset-non-lpsp.html

  * igt@i915_pm_rpm@pm-tiling:
    - {shard-rkl}:        [SKIP][21] ([fdo#109308]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-4/igt@i915_pm_rpm@pm-tiling.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-6/igt@i915_pm_rpm@pm-tiling.html

  * igt@kms_atomic@atomic_plane_damage:
    - {shard-rkl}:        [SKIP][23] ([i915#4098]) -> [PASS][24] +1 similar issue
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-5/igt@kms_atomic@atomic_plane_damage.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-6/igt@kms_atomic@atomic_plane_damage.html

  * igt@kms_ccs@pipe-a-bad-pixel-format-y_tiled_gen12_rc_ccs:
    - {shard-rkl}:        [SKIP][25] ([i915#1845] / [i915#4098]) -> [PASS][26] +26 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-4/igt@kms_ccs@pipe-a-bad-pixel-format-y_tiled_gen12_rc_ccs.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-6/igt@kms_ccs@pipe-a-bad-pixel-format-y_tiled_gen12_rc_ccs.html

  * igt@kms_cursor_legacy@flip-vs-cursor@atomic-transitions-varying-size:
    - shard-glk:          [FAIL][27] ([i915#2346]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-glk5/igt@kms_cursor_legacy@flip-vs-cursor@atomic-transitions-varying-size.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-glk5/igt@kms_cursor_legacy@flip-vs-cursor@atomic-transitions-varying-size.html

  * igt@kms_flip@plain-flip-ts-check@c-hdmi-a1:
    - shard-glk:          [FAIL][29] ([i915#2122]) -> [PASS][30]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-glk3/igt@kms_flip@plain-flip-ts-check@c-hdmi-a1.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-glk6/igt@kms_flip@plain-flip-ts-check@c-hdmi-a1.html

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-pri-indfb-draw-render:
    - {shard-rkl}:        [SKIP][31] ([i915#1849] / [i915#4098]) -> [PASS][32] +15 similar issues
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-5/igt@kms_frontbuffer_tracking@psr-1p-primscrn-pri-indfb-draw-render.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-6/igt@kms_frontbuffer_tracking@psr-1p-primscrn-pri-indfb-draw-render.html

  * igt@kms_plane_scaling@plane-scaler-with-pixel-format-unity-scaling@pipe-b-hdmi-a-1:
    - {shard-tglu-10}:    [INCOMPLETE][33] -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-tglu-10/igt@kms_plane_scaling@plane-scaler-with-pixel-format-unity-scaling@pipe-b-hdmi-a-1.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-tglu-10/igt@kms_plane_scaling@plane-scaler-with-pixel-format-unity-scaling@pipe-b-hdmi-a-1.html

  * igt@kms_properties@crtc-properties-legacy:
    - {shard-rkl}:        [SKIP][35] ([i915#1849]) -> [PASS][36] +1 similar issue
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-5/igt@kms_properties@crtc-properties-legacy.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-6/igt@kms_properties@crtc-properties-legacy.html

  * igt@kms_psr@sprite_plane_move:
    - {shard-rkl}:        [SKIP][37] ([i915#1072]) -> [PASS][38] +2 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12524/shard-rkl-4/igt@kms_psr@sprite_plane_move.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/shard-rkl-6/igt@kms_psr@sprite_plane_move.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274
  [fdo#109279]: https://bugs.freedesktop.org/show_bug.cgi?id=109279
  [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280
  [fdo#109283]: https://bugs.freedesktop.org/show_bug.cgi?id=109283
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109291]: https://bugs.freedesktop.org/show_bug.cgi?id=109291
  [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295
  [fdo#109308]: https://bugs.freedesktop.org/show_bug.cgi?id=109308
  [fdo#109312]: https://bugs.freedesktop.org/show_bug.cgi?id=109312
  [fdo#109314]: https://bugs.freedesktop.org/show_bug.cgi?id=109314
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110189]: https://bugs.freedesktop.org/show_bug.cgi?id=110189
  [fdo#110542]: https://bugs.freedesktop.org/show_bug.cgi?id=110542
  [fdo#110723]: https://bugs.freedesktop.org/show_bug.cgi?id=110723
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111614]: https://bugs.freedesktop.org/show_bug.cgi?id=111614
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111644]: https://bugs.freedesktop.org/show_bug.cgi?id=111644
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [fdo#112283]: https://bugs.freedesktop.org/show_bug.cgi?id=112283
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#132]: https://gitlab.freedesktop.org/drm/intel/issues/132
  [i915#1397]: https://gitlab.freedesktop.org/drm/intel/issues/1397
  [i915#1769]: https://gitlab.freedesktop.org/drm/intel/issues/1769
  [i915#1825]: https://gitlab.freedesktop.org/drm/intel/issues/1825
  [i915#1839]: https://gitlab.freedesktop.org/drm/intel/issues/1839
  [i915#1845]: https://gitlab.freedesktop.org/drm/intel/issues/1845
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#1937]: https://gitlab.freedesktop.org/drm/intel/issues/1937
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2346]: https://gitlab.freedesktop.org/drm/intel/issues/2346
  [i915#2434]: https://gitlab.freedesktop.org/drm/intel/issues/2434
  [i915#2437]: https://gitlab.freedesktop.org/drm/intel/issues/2437
  [i915#2527]: https://gitlab.freedesktop.org/drm/intel/issues/2527
  [i915#2575]: https://gitlab.freedesktop.org/drm/intel/issues/2575
  [i915#2587]: https://gitlab.freedesktop.org/drm/intel/issues/2587
  [i915#2658]: https://gitlab.freedesktop.org/drm/intel/issues/2658
  [i915#2672]: https://gitlab.freedesktop.org/drm/intel/issues/2672
  [i915#2681]: https://gitlab.freedesktop.org/drm/intel/issues/2681
  [i915#2705]: https://gitlab.freedesktop.org/drm/intel/issues/2705
  [i915#280]: https://gitlab.freedesktop.org/drm/intel/issues/280
  [i915#2842]: https://gitlab.freedesktop.org/drm/intel/issues/2842
  [i915#2856]: https://gitlab.freedesktop.org/drm/intel/issues/2856
  [i915#2920]: https://gitlab.freedesktop.org/drm/intel/issues/2920
  [i915#2994]: https://gitlab.freedesktop.org/drm/intel/issues/2994
  [i915#3116]: https://gitlab.freedesktop.org/drm/intel/issues/3116
  [i915#3281]: https://gitlab.freedesktop.org/drm/intel/issues/3281
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3297]: https://gitlab.freedesktop.org/drm/intel/issues/3297
  [i915#3299]: https://gitlab.freedesktop.org/drm/intel/issues/3299
  [i915#3359]: https://gitlab.freedesktop.org/drm/intel/issues/3359
  [i915#3469]: https://gitlab.freedesktop.org/drm/intel/issues/3469
  [i915#3546]: https://gitlab.freedesktop.org/drm/intel/issues/3546
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3558]: https://gitlab.freedesktop.org/drm/intel/issues/3558
  [i915#3591]: https://gitlab.freedesktop.org/drm/intel/issues/3591
  [i915#3637]: https://gitlab.freedesktop.org/drm/intel/issues/3637
  [i915#3638]: https://gitlab.freedesktop.org/drm/intel/issues/3638
  [i915#3689]: https://gitlab.freedesktop.org/drm/intel/issues/3689
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#3734]: https://gitlab.freedesktop.org/drm/intel/issues/3734
  [i915#3742]: https://gitlab.freedesktop.org/drm/intel/issues/3742
  [i915#3840]: https://gitlab.freedesktop.org/drm/intel/issues/3840
  [i915#3886]: https://gitlab.freedesktop.org/drm/intel/issues/3886
  [i915#3966]: https://gitlab.freedesktop.org/drm/intel/issues/3966
  [i915#404]: https://gitlab.freedesktop.org/drm/intel/issues/404
  [i915#4070]: https://gitlab.freedesktop.org/drm/intel/issues/4070
  [i915#4078]: https://gitlab.freedesktop.org/drm/intel/issues/4078
  [i915#4098]: https://gitlab.freedesktop.org/drm/intel/issues/4098
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#426]: https://gitlab.freedesktop.org/drm/intel/issues/426
  [i915#4270]: https://gitlab.freedesktop.org/drm/intel/issues/4270
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4767]: https://gitlab.freedesktop.org/drm/intel/issues/4767
  [i915#4991]: https://gitlab.freedesktop.org/drm/intel/issues/4991
  [i915#5176]: https://gitlab.freedesktop.org/drm/intel/issues/5176
  [i915#5235]: https://gitlab.freedesktop.org/drm/intel/issues/5235
  [i915#5286]: https://gitlab.freedesktop.org/drm/intel/issues/5286
  [i915#5289]: https://gitlab.freedesktop.org/drm/intel/issues/5289
  [i915#5325]: https://gitlab.freedesktop.org/drm/intel/issues/5325
  [i915#5327]: https://gitlab.freedesktop.org/drm/intel/issues/5327
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5439]: https://gitlab.freedesktop.org/drm/intel/issues/5439
  [i915#5461]: https://gitlab.freedesktop.org/drm/intel/issues/5461
  [i915#5723]: https://gitlab.freedesktop.org/drm/intel/issues/5723
  [i915#5784]: https://gitlab.freedesktop.org/drm/intel/issues/5784
  [i915#6095]: https://gitlab.freedesktop.org/drm/intel/issues/6095
  [i915#6117]: https://gitlab.freedesktop.org/drm/intel/issues/6117
  [i915#6245]: https://gitlab.freedesktop.org/drm/intel/issues/6245
  [i915#6247]: https://gitlab.freedesktop.org/drm/intel/issues/6247
  [i915#6248]: https://gitlab.freedesktop.org/drm/intel/issues/6248
  [i915#6252]: https://gitlab.freedesktop.org/drm/intel/issues/6252
  [i915#6258]: https://gitlab.freedesktop.org/drm/intel/issues/6258
  [i915#6268]: https://gitlab.freedesktop.org/drm/intel/issues/6268
  [i915#6335]: https://gitlab.freedesktop.org/drm/intel/issues/6335
  [i915#6344]: https://gitlab.freedesktop.org/drm/intel/issues/6344
  [i915#6355]: https://gitlab.freedesktop.org/drm/intel/issues/6355
  [i915#6412]: https://gitlab.freedesktop.org/drm/intel/issues/6412
  [i915#6433]: https://gitlab.freedesktop.org/drm/intel/issues/6433
  [i915#6497]: https://gitlab.freedesktop.org/drm/intel/issues/6497
  [i915#6524]: https://gitlab.freedesktop.org/drm/intel/issues/6524
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#6590]: https://gitlab.freedesktop.org/drm/intel/issues/6590
  [i915#6768]: https://gitlab.freedesktop.org/drm/intel/issues/6768
  [i915#6946]: https://gitlab.freedesktop.org/drm/intel/issues/6946
  [i915#6953]: https://gitlab.freedesktop.org/drm/intel/issues/6953
  [i915#7037]: https://gitlab.freedesktop.org/drm/intel/issues/7037
  [i915#7116]: https://gitlab.freedesktop.org/drm/intel/issues/7116
  [i915#7118]: https://gitlab.freedesktop.org/drm/intel/issues/7118
  [i915#7128]: https://gitlab.freedesktop.org/drm/intel/issues/7128
  [i915#7276]: https://gitlab.freedesktop.org/drm/intel/issues/7276
  [i915#7561]: https://gitlab.freedesktop.org/drm/intel/issues/7561
  [i915#7582]: https://gitlab.freedesktop.org/drm/intel/issues/7582
  [i915#7651]: https://gitlab.freedesktop.org/drm/intel/issues/7651
  [i915#7681]: https://gitlab.freedesktop.org/drm/intel/issues/7681
  [i915#7697]: https://gitlab.freedesktop.org/drm/intel/issues/7697
  [i915#7701]: https://gitlab.freedesktop.org/drm/intel/issues/7701
  [i915#7707]: https://gitlab.freedesktop.org/drm/intel/issues/7707


Build changes
-------------

  * Linux: CI_DRM_12524 -> Patchwork_106377v2
  * Piglit: piglit_4509 -> None

  CI-20190529: 20190529
  CI_DRM_12524: a29956c69a562e85ef8657e39382bc207a339941 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7102: bacfdc84a9c02556c5441deb21e3a3f18a07347d @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_106377v2: a29956c69a562e85ef8657e39382bc207a339941 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_106377v2/index.html

[-- Attachment #2: Type: text/html, Size: 11411 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks
  2022-12-23 13:21 ` [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks Tvrtko Ursulin
@ 2022-12-27 16:33   ` Jani Nikula
  0 siblings, 0 replies; 5+ messages in thread
From: Jani Nikula @ 2022-12-27 16:33 UTC (permalink / raw)
  To: Tvrtko Ursulin, Andi Shyti, intel-gfx, dri-devel; +Cc: Andrzej Hajda

On Fri, 23 Dec 2022, Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote:
> On 23/12/2022 12:18, Andi Shyti wrote:
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>> 
>> On Haswell, in particular, we see an issue where resets fails because
>> the engine resumes from an incorrect RING_HEAD. Since the RING_HEAD
>> doesn't point to the remaining requests to re-run, but may instead point
>> into the uninitialised portion of the ring, the GPU may be then fed
>> invalid instructions from a privileged context, often pushing the GPU
>> into an unrecoverable hang.
>> 
>> If at first the write doesn't succeed, try, try again.
>> 
>> References: https://gitlab.freedesktop.org/drm/intel/-/issues/5432
>> References: https://gitlab.freedesktop.org/drm/intel/-/issues/3303
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
>> ---
>>   .../gpu/drm/i915/gt/intel_ring_submission.c   | 44 +++++++++++++------
>>   drivers/gpu/drm/i915/i915_utils.h             |  8 ++++
>>   2 files changed, 38 insertions(+), 14 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
>> index 827adb0cfaea6..cdf283f5b1427 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
>> @@ -192,6 +192,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
>>   static int xcs_resume(struct intel_engine_cs *engine)
>>   {
>>   	struct intel_ring *ring = engine->legacy.ring;
>> +	ktime_t kt;
>>   
>>   	ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
>>   		     ring->head, ring->tail);
>> @@ -230,9 +231,20 @@ static int xcs_resume(struct intel_engine_cs *engine)
>>   	set_pp_dir(engine);
>>   
>>   	/* First wake the ring up to an empty/idle ring */
>> -	ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
>> +	until_timeout_ns(kt, 2 * NSEC_PER_MSEC) {
>> +		ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
>> +		if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
>> +			break;
>> +	}
>
> 2ms?! Shudder..
>
> #define done \
> ({ \
> 	ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); \
> 	ENGINE_READ_FW(engine, RING_HEAD) == ring->head; \
> })
> _wait_for_atomic(done, 2 * USEC_PER_MSEC, needs_to_be_atomic_or_not?);
> #undef done
>
> Should work and avoid the need to add yet another helper, please 
> double-check. Not as pretty, but accumulating generic sounding helpers 
> in i915_utils.h is a bit frowned upon.

Yeah, please no more helpers like this. They're not helping.

BR,
Jani.


>
> Regards,
>
> Tvrtko
>
>> +
>>   	ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
>> -	ENGINE_POSTING_READ(engine, RING_TAIL);
>> +	if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
>> +		ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
>> +			     ENGINE_READ_FW(engine, RING_HEAD),
>> +			     ENGINE_READ_FW(engine, RING_TAIL),
>> +			     ring->head);
>> +		goto err;
>> +	}
>>   
>>   	ENGINE_WRITE_FW(engine, RING_CTL,
>>   			RING_CTL_SIZE(ring->size) | RING_VALID);
>> @@ -241,12 +253,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
>>   	if (__intel_wait_for_register_fw(engine->uncore,
>>   					 RING_CTL(engine->mmio_base),
>>   					 RING_VALID, RING_VALID,
>> -					 5000, 0, NULL))
>> +					 5000, 0, NULL)) {
>> +		ENGINE_TRACE(engine, "failed to restart\n");
>>   		goto err;
>> +	}
>>   
>> -	if (GRAPHICS_VER(engine->i915) > 2)
>> +	if (GRAPHICS_VER(engine->i915) > 2) {
>>   		ENGINE_WRITE_FW(engine,
>>   				RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
>> +		ENGINE_POSTING_READ(engine, RING_MI_MODE);
>> +	}
>>   
>>   	/* Now awake, let it get started */
>>   	if (ring->tail != ring->head) {
>> @@ -259,16 +275,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
>>   	return 0;
>>   
>>   err:
>> -	drm_err(&engine->i915->drm,
>> -		"%s initialization failed; "
>> -		"ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
>> -		engine->name,
>> -		ENGINE_READ(engine, RING_CTL),
>> -		ENGINE_READ(engine, RING_CTL) & RING_VALID,
>> -		ENGINE_READ(engine, RING_HEAD), ring->head,
>> -		ENGINE_READ(engine, RING_TAIL), ring->tail,
>> -		ENGINE_READ(engine, RING_START),
>> -		i915_ggtt_offset(ring->vma));
>> +	ENGINE_TRACE(engine,
>> +		     "initialization failed; "
>> +		     "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
>> +		     ENGINE_READ(engine, RING_CTL),
>> +		     ENGINE_READ(engine, RING_CTL) & RING_VALID,
>> +		     ENGINE_READ(engine, RING_HEAD), ring->head,
>> +		     ENGINE_READ(engine, RING_TAIL), ring->tail,
>> +		     ENGINE_READ(engine, RING_START),
>> +		     i915_ggtt_offset(ring->vma));
>> +	GEM_TRACE_DUMP();
>>   	return -EIO;
>>   }
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
>> index b64192d9c7daa..f24a25c0685e1 100644
>> --- a/drivers/gpu/drm/i915/i915_utils.h
>> +++ b/drivers/gpu/drm/i915/i915_utils.h
>> @@ -254,6 +254,14 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
>>   	}
>>   }
>>   
>> +/**
>> + * until_timeout_ns - Keep retrying (busy spin) until the duration has passed
>> + */
>> +#define until_timeout_ns(end, timeout_ns) \
>> +	for ((end) = ktime_get() + (timeout_ns); \
>> +	     ktime_before(ktime_get(), (end)); \
>> +	     cpu_relax())
>> + >   /**
>>    * __wait_for - magic wait macro
>>    *

-- 
Jani Nikula, Intel Open Source Graphics Center

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-12-27 16:33 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-12-23 12:18 [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks Andi Shyti
2022-12-23 13:15 ` [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2) Patchwork
2022-12-23 13:21 ` [Intel-gfx] [PATCH] drm/i915/gt: Retry RING_HEAD reset until it sticks Tvrtko Ursulin
2022-12-27 16:33   ` Jani Nikula
2022-12-23 14:30 ` [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/gt: Retry RING_HEAD reset until it sticks (rev2) Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox