All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Auld <matthew.auld@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Subject: [Intel-xe] [PATCH v2] drm/xe: nuke GuC on unload
Date: Thu, 24 Aug 2023 17:04:45 +0100	[thread overview]
Message-ID: <20230824160444.494648-2-matthew.auld@intel.com> (raw)

On PVC unloading followed by reloading the module often results in a
completely dead machine (seems to be plaguing CI). Resetting the GuC
like we do at load seems to cure it at least when locally testing this.

v2:
  - Move pc_fini into guc_fini. We want to do the GuC reset just after
    calling pc_fini, otherwise we encounter communication failures. It
    also seems like a good idea to do the reset before we start releasing
    the various other GuC resources. In the case of pc_fini there is an
    explicit stop, but for other stuff like logs, ads, ctb there is not.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/542
References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/597
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c    | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_guc_pc.c |  8 +-------
 drivers/gpu/drm/xe/xe_guc_pc.h |  1 +
 drivers/gpu/drm/xe/xe_uc.c     |  5 +++++
 drivers/gpu/drm/xe/xe_uc.h     |  1 +
 5 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index e102637c0695..3cea4323113f 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -5,6 +5,8 @@
 
 #include "xe_guc.h"
 
+#include <drm/drm_managed.h>
+
 #include "generated/xe_wa_oob.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_guc_regs.h"
@@ -20,6 +22,7 @@
 #include "xe_guc_submit.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
+#include "xe_uc.h"
 #include "xe_uc_fw.h"
 #include "xe_wa.h"
 #include "xe_wopcm.h"
@@ -217,6 +220,16 @@ static void guc_write_params(struct xe_guc *guc)
 		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
 }
 
+static void guc_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+	xe_guc_pc_fini(&guc->pc);
+	xe_uc_fini_hw(&guc_to_gt(guc)->uc);
+	xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+}
+
 int xe_guc_init(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
@@ -244,6 +257,10 @@ int xe_guc_init(struct xe_guc *guc)
 	if (ret)
 		goto out;
 
+	ret = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, guc_fini, guc);
+	if (ret)
+		return ret;
+
 	guc_init_params(guc);
 
 	if (xe_gt_is_media_type(gt))
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index c03bb58e7049..87de1ce40e07 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -884,10 +884,8 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc)
 	return ret;
 }
 
-static void pc_fini(struct drm_device *drm, void *arg)
+void xe_guc_pc_fini(struct xe_guc_pc *pc)
 {
-	struct xe_guc_pc *pc = arg;
-
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
 	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
@@ -925,9 +923,5 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 	if (err)
 		return err;
 
-	err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc);
-	if (err)
-		return err;
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 81833a53b3c9..43ea582545b5 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -9,6 +9,7 @@
 #include "xe_guc_pc_types.h"
 
 int xe_guc_pc_init(struct xe_guc_pc *pc);
+void xe_guc_pc_fini(struct xe_guc_pc *pc);
 int xe_guc_pc_start(struct xe_guc_pc *pc);
 int xe_guc_pc_stop(struct xe_guc_pc *pc);
 int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index addd6f2681b9..9c8ce504f4da 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -167,6 +167,11 @@ int xe_uc_init_hw(struct xe_uc *uc)
 	return 0;
 }
 
+int xe_uc_fini_hw(struct xe_uc *uc)
+{
+	return xe_uc_sanitize_reset(uc);
+}
+
 int xe_uc_reset_prepare(struct xe_uc *uc)
 {
 	/* GuC submission not enabled, nothing to do */
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 42219b361df5..4109ae7028af 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -12,6 +12,7 @@ int xe_uc_init(struct xe_uc *uc);
 int xe_uc_init_hwconfig(struct xe_uc *uc);
 int xe_uc_init_post_hwconfig(struct xe_uc *uc);
 int xe_uc_init_hw(struct xe_uc *uc);
+int xe_uc_fini_hw(struct xe_uc *uc);
 void xe_uc_gucrc_disable(struct xe_uc *uc);
 int xe_uc_reset_prepare(struct xe_uc *uc);
 void xe_uc_stop_prepare(struct xe_uc *uc);
-- 
2.41.0


             reply	other threads:[~2023-08-24 16:09 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-24 16:04 Matthew Auld [this message]
2023-08-24 16:50 ` [Intel-xe] ✓ CI.Patch_applied: success for drm/xe: nuke GuC on unload Patchwork
2023-08-24 16:50 ` [Intel-xe] ✗ CI.checkpatch: warning " Patchwork
2023-08-24 16:51 ` [Intel-xe] ✓ CI.KUnit: success " Patchwork
2023-08-24 16:55 ` [Intel-xe] ✓ CI.Build: " Patchwork
2023-08-24 16:55 ` [Intel-xe] ✓ CI.Hooks: " Patchwork
2023-08-24 16:56 ` [Intel-xe] ✗ CI.checksparse: warning " Patchwork
2023-08-24 17:24 ` [Intel-xe] ✗ CI.BAT: failure " Patchwork
2023-08-24 17:46 ` [Intel-xe] [PATCH v2] " Matthew Brost
2023-08-25  8:44 ` [Intel-xe] ✗ CI.Patch_applied: failure for drm/xe: nuke GuC on unload (rev2) Patchwork
2023-08-29 11:31 ` [Intel-xe] ✗ CI.Patch_applied: failure for drm/xe: nuke GuC on unload (rev3) Patchwork
2023-08-31 11:24 ` [Intel-xe] ✓ CI.Patch_applied: success for drm/xe: nuke GuC on unload (rev4) Patchwork
2023-08-31 11:24 ` [Intel-xe] ✗ CI.checkpatch: warning " Patchwork
2023-08-31 11:26 ` [Intel-xe] ✓ CI.KUnit: success " Patchwork
2023-08-31 11:33 ` [Intel-xe] ✓ CI.Build: " Patchwork
2023-08-31 11:33 ` [Intel-xe] ✗ CI.Hooks: failure " Patchwork
2023-08-31 11:33 ` [Intel-xe] ✗ CI.checksparse: warning " Patchwork
2023-08-31 12:05 ` [Intel-xe] ✓ CI.BAT: success " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230824160444.494648-2-matthew.auld@intel.com \
    --to=matthew.auld@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.