Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/2] Apply Wa_22019338487
@ 2024-06-17  8:10 Vinay Belgaumkar
  2024-06-17  8:10 ` [PATCH v4 1/2] drm/xe/lnl: " Vinay Belgaumkar
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Vinay Belgaumkar @ 2024-06-17  8:10 UTC (permalink / raw)
  To: intel-xe; +Cc: Vinay Belgaumkar, Rodrigo Vivi

Apply WA for LNL and also ensure frequency request is raised in the
resume path as well. The WA applies to the resume path too.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>

Vinay Belgaumkar (2):
  drm/xe/lnl: Apply Wa_22019338487
  drm/xe/guc: Request max GT freq during resume

 drivers/gpu/drm/xe/Makefile          |  2 +
 drivers/gpu/drm/xe/xe_device.c       |  3 +
 drivers/gpu/drm/xe/xe_ggtt.c         | 22 ++++++++
 drivers/gpu/drm/xe/xe_ggtt_types.h   |  2 +
 drivers/gpu/drm/xe/xe_gsc.c          |  5 ++
 drivers/gpu/drm/xe/xe_gt.c           | 24 ++++++++
 drivers/gpu/drm/xe/xe_gt.h           |  1 +
 drivers/gpu/drm/xe/xe_guc.c          |  4 +-
 drivers/gpu/drm/xe/xe_guc_pc.c       | 84 +++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_guc_pc.h       |  3 +
 drivers/gpu/drm/xe/xe_guc_pc_types.h |  4 ++
 drivers/gpu/drm/xe/xe_wa_oob.rules   |  1 +
 12 files changed, 152 insertions(+), 3 deletions(-)

-- 
2.38.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 1/2] drm/xe/lnl: Apply Wa_22019338487
  2024-06-17  8:10 [PATCH v4 0/2] Apply Wa_22019338487 Vinay Belgaumkar
@ 2024-06-17  8:10 ` Vinay Belgaumkar
  2024-06-17 18:18   ` Rodrigo Vivi
  2024-06-17 19:48   ` Michal Wajdeczko
  2024-06-17  8:10 ` [PATCH v4 2/2] drm/xe/guc: Request max GT freq during resume Vinay Belgaumkar
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 12+ messages in thread
From: Vinay Belgaumkar @ 2024-06-17  8:10 UTC (permalink / raw)
  To: intel-xe; +Cc: Vinay Belgaumkar, Rodrigo Vivi

This WA requires us to limit media GT frequency requests to a certain
cap value during driver load. Freq limits are restored after load
completes, so perf will not be affected during normal operations.

During normal driver operation, this WA requires dummy writes to media
offset 0x380D8C after every ~63 GGTT writes. This will ensure completion
of the LMEM writes originating from Gunit.

During driver unload(before FLR), the WA requires that we set requested
frequency to the cap value again.

v3: Do not use WA number in function name. Call WA wrapper from xe_device.
Rename some variables, check for locks in the correct function (Rodrigo).
Ensure reset path is also covered for this WA.

v4: Fix BAT failure

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |  2 +
 drivers/gpu/drm/xe/xe_device.c       |  3 ++
 drivers/gpu/drm/xe/xe_ggtt.c         | 22 +++++++++
 drivers/gpu/drm/xe/xe_ggtt_types.h   |  2 +
 drivers/gpu/drm/xe/xe_gsc.c          |  5 ++
 drivers/gpu/drm/xe/xe_gt.c           | 24 ++++++++++
 drivers/gpu/drm/xe/xe_gt.h           |  1 +
 drivers/gpu/drm/xe/xe_guc_pc.c       | 71 +++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_guc_pc.h       |  2 +
 drivers/gpu/drm/xe/xe_guc_pc_types.h |  4 ++
 drivers/gpu/drm/xe/xe_wa_oob.rules   |  1 +
 11 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1905a80e61e3..8c72fe38edfd 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -24,9 +24,11 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
 	$(call cmd,wa_oob)
 
 uses_generated_oob := \
+	$(obj)/xe_ggtt.o \
 	$(obj)/xe_gsc.o \
 	$(obj)/xe_guc.o \
 	$(obj)/xe_guc_ads.o \
+	$(obj)/xe_guc_pc.o \
 	$(obj)/xe_migrate.o \
 	$(obj)/xe_ring_ops.o \
 	$(obj)/xe_vm.o \
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 94dbfe5cf19c..fee90f66246f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -668,6 +668,9 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_hwmon_register(xe);
 
+	for_each_gt(gt, xe, id)
+		xe_gt_sanitize_freq(gt);
+
 	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
 
 err_fini_display:
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 8ff91fd1b7c8..e3fddd59fd9f 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -11,6 +11,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_managed.h>
 #include <drm/intel/i915_drm.h>
+#include <generated/xe_wa_oob.h>
 
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_gtt_defs.h"
@@ -23,8 +24,10 @@
 #include "xe_gt_sriov_vf.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_map.h"
+#include "xe_mmio.h"
 #include "xe_pm.h"
 #include "xe_sriov.h"
+#include "xe_wa.h"
 #include "xe_wopcm.h"
 
 static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
@@ -69,12 +72,31 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
 	return ggms ? SZ_1M << ggms : 0;
 }
 
+static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
+{
+	/*
+	 * 22019338487: GMD_ID is a RO register, a dummy write forces gunit
+	 * to wait for completion of prior GTT writes before letting this through.
+	 */
+	lockdep_assert_held(&ggtt->lock);
+
+	++ggtt->access_count;
+
+	if (ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487)) {
+		if ((ggtt->access_count % 63) == 0) {
+			xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0);
+			ggtt->access_count = 0;
+		}
+	}
+}
+
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
 {
 	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
 	xe_tile_assert(ggtt->tile, addr < ggtt->size);
 
 	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
+	ggtt_update_access_counter(ggtt);
 }
 
 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
index d8c584d9a8c3..73fe6837b9cd 100644
--- a/drivers/gpu/drm/xe/xe_ggtt_types.h
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -34,6 +34,8 @@ struct xe_ggtt {
 	const struct xe_ggtt_pt_ops *pt_ops;
 
 	struct drm_mm mm;
+
+	unsigned int access_count;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 80a61934decc..f8239a13fa2b 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -22,6 +22,7 @@
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_printk.h"
+#include "xe_guc_pc.h"
 #include "xe_huc.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
@@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
 		return ret;
 
 	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+
+	/* GSC load is done, restore expected GT frequencies */
+	xe_gt_sanitize_freq(gt);
+
 	xe_gt_dbg(gt, "GSC FW async load completed\n");
 
 	/* HuC auth failure is not fatal */
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 57d84751e160..301594d1e7a4 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -9,6 +9,7 @@
 
 #include <drm/drm_managed.h>
 #include <drm/xe_drm.h>
+#include <generated/xe_wa_oob.h>
 
 #include "instructions/xe_gfxpipe_commands.h"
 #include "instructions/xe_mi_commands.h"
@@ -54,6 +55,7 @@
 #include "xe_sriov.h"
 #include "xe_tuning.h"
 #include "xe_uc.h"
+#include "xe_uc_fw.h"
 #include "xe_vm.h"
 #include "xe_wa.h"
 #include "xe_wopcm.h"
@@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt)
 	/* Get CCS mode in sync between sw/hw */
 	xe_gt_apply_ccs_mode(gt);
 
+	/* Restore GT freq to expected values */
+	xe_gt_sanitize_freq(gt);
+
 	return 0;
 }
 
@@ -801,6 +806,25 @@ int xe_gt_suspend(struct xe_gt *gt)
 	return err;
 }
 
+/**
+ * xe_gt_sanitize_freq() - Restore saved freuencies if necessary.
+ * @gt: the GT object
+ *
+ * Called after driver init/GSC load completes to restore GT frequencies if we
+ * limited them for any WAs.
+ */
+int xe_gt_sanitize_freq(struct xe_gt *gt)
+{
+	int ret = 0;
+
+	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
+	    xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) &&
+	    XE_WA(gt, 22019338487))
+		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
+
+	return ret;
+}
+
 int xe_gt_resume(struct xe_gt *gt)
 {
 	int err;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 9073ac68a777..1123fdfc4ebc 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
 int xe_gt_resume(struct xe_gt *gt);
 void xe_gt_reset_async(struct xe_gt *gt);
 void xe_gt_sanitize(struct xe_gt *gt);
+int xe_gt_sanitize_freq(struct xe_gt *gt);
 void xe_gt_remove(struct xe_gt *gt);
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 666a37106bc5..d9194328b495 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -8,6 +8,7 @@
 #include <linux/delay.h>
 
 #include <drm/drm_managed.h>
+#include <generated/xe_wa_oob.h>
 
 #include "abi/guc_actions_slpc_abi.h"
 #include "regs/xe_gt_regs.h"
@@ -24,6 +25,7 @@
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_pcode.h"
+#include "xe_wa.h"
 
 #define MCHBAR_MIRROR_BASE_SNB	0x140000
 
@@ -41,6 +43,8 @@
 #define GT_FREQUENCY_MULTIPLIER	50
 #define GT_FREQUENCY_SCALER	3
 
+#define LNL_MERT_FREQ_CAP	800
+
 /**
  * DOC: GuC Power Conservation (PC)
  *
@@ -673,6 +677,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
 		tgl_init_fused_rp_values(pc);
 }
 
+static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+
+	if (XE_WA(gt, 22019338487))
+		return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
+	else
+		return pc->rp0_freq;
+}
+
 /**
  * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
  * frequency to allow faster GuC load times
@@ -684,7 +698,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 	pc_init_fused_rp_values(pc);
-	pc_set_cur_freq(pc, pc->rp0_freq);
+	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
 }
 
 static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
@@ -740,6 +754,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
 	return ret;
 }
 
+static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+{
+	int ret = 0;
+
+	if (XE_WA(pc_to_gt(pc), 22019338487)) {
+		/*
+		 * Get updated min/max and stash them.
+		 */
+		ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
+		if (!ret)
+			ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
+		if (ret)
+			return ret;
+
+		/*
+		 * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+		 */
+		mutex_lock(&pc->freq_lock);
+		ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
+		if (!ret)
+			ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
+		mutex_unlock(&pc->freq_lock);
+	}
+
+	return ret;
+}
+
+/**
+ * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values
+ * @pc: The GuC PC
+ *
+ * Returns: 0 on success,
+ *          error code on failure
+ */
+int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc)
+{
+	int ret = 0;
+
+	mutex_lock(&pc->freq_lock);
+	ret = pc_set_max_freq(pc, pc->stashed_max_freq);
+	if (!ret)
+		ret = pc_set_min_freq(pc, pc->stashed_min_freq);
+	mutex_unlock(&pc->freq_lock);
+
+	return ret;
+}
+
 /**
  * xe_guc_pc_gucrc_disable - Disable GuC RC
  * @pc: Xe_GuC_PC instance
@@ -854,6 +915,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 	if (ret)
 		goto out;
 
+	ret = pc_set_mert_freq_cap(pc);
+	if (ret)
+		goto out;
+
 	if (xe->info.platform == XE_PVC) {
 		xe_guc_pc_gucrc_disable(pc);
 		ret = 0;
@@ -902,6 +967,10 @@ static void xe_guc_pc_fini_hw(void *arg)
 	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
+
+	/* Bind requested freq to mert_freq_cap before unload */
+	pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
+
 	xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 532cac985a6d..7ba875c3613b 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 
+struct xe_device;
 struct xe_guc_pc;
 
 int xe_guc_pc_init(struct xe_guc_pc *pc);
@@ -29,5 +30,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
 u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
 u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
 void xe_guc_pc_init_early(struct xe_guc_pc *pc);
+int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
 
 #endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index 2afd0dbc3542..13810be015db 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -25,6 +25,10 @@ struct xe_guc_pc {
 	u32 user_requested_min;
 	/** @user_requested_max: Stash the maximum requested freq by user */
 	u32 user_requested_max;
+	/** @stashed_min_freq: Stash the current minimum freq */
+	u32 stashed_min_freq;
+	/** @stashed_max_freq: Stash the current maximum freq */
+	u32 stashed_max_freq;
 	/** @freq_lock: Let's protect the frequencies */
 	struct mutex freq_lock;
 	/** @freq_ready: Only handle freq changes, if they are really ready */
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 12fe88796a49..a6b897030fde 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -27,3 +27,4 @@
 16022287689	GRAPHICS_VERSION(2001)
 		GRAPHICS_VERSION(2004)
 13011645652	GRAPHICS_VERSION(2004)
+22019338487	MEDIA_VERSION(2000)
-- 
2.38.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v4 2/2] drm/xe/guc: Request max GT freq during resume
  2024-06-17  8:10 [PATCH v4 0/2] Apply Wa_22019338487 Vinay Belgaumkar
  2024-06-17  8:10 ` [PATCH v4 1/2] drm/xe/lnl: " Vinay Belgaumkar
@ 2024-06-17  8:10 ` Vinay Belgaumkar
  2024-06-17 18:20   ` Rodrigo Vivi
  2024-06-17  8:18 ` ✓ CI.Patch_applied: success for Apply Wa_22019338487 (rev2) Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Vinay Belgaumkar @ 2024-06-17  8:10 UTC (permalink / raw)
  To: intel-xe; +Cc: Vinay Belgaumkar, Rodrigo Vivi

We already request max freq in the load path, moving it
to __xe_guc_upload will ensure this speeds up GuC load in
the resume path as well.

v2: Rename xe_guc_pc_init_early since we now call it per
GuC load (Michal W)

v3: Keep pc_init_early() and init RPx values there (Rodrigo)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c    |  4 +++-
 drivers/gpu/drm/xe/xe_guc_pc.c | 15 +++++++++++++--
 drivers/gpu/drm/xe/xe_guc_pc.h |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 0e1a5674ef13..178a0fe59107 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -699,6 +699,9 @@ static int __xe_guc_upload(struct xe_guc *guc)
 {
 	int ret;
 
+	/* Raise GT freq to speed up HuC/GuC load */
+	xe_guc_pc_raise_unslice(&guc->pc);
+
 	guc_write_params(guc);
 	guc_prepare_xfer(guc);
 
@@ -784,7 +787,6 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
 
 	xe_guc_ads_populate_minimal(&guc->ads);
 
-	/* Raise GT freq to speed up HuC/GuC load */
 	xe_guc_pc_init_early(&guc->pc);
 
 	ret = __xe_guc_upload(guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d9194328b495..1775183662a4 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -688,17 +688,28 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
 }
 
 /**
- * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
+ * xe_guc_pc_raise_unslice - Initialize RPx values and request a higher GT
  * frequency to allow faster GuC load times
  * @pc: Xe_GuC_PC instance
  */
+void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
+}
+
+/**
+ * xe_guc_pc_init_early - Initialize RPx values
+ * @pc: Xe_GuC_PC instance
+ */
 void xe_guc_pc_init_early(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 	pc_init_fused_rp_values(pc);
-	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
 }
 
 static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 7ba875c3613b..3b4103f59b19 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -31,5 +31,6 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
 u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
 void xe_guc_pc_init_early(struct xe_guc_pc *pc);
 int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
+void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
 
 #endif /* _XE_GUC_PC_H_ */
-- 
2.38.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* ✓ CI.Patch_applied: success for Apply Wa_22019338487 (rev2)
  2024-06-17  8:10 [PATCH v4 0/2] Apply Wa_22019338487 Vinay Belgaumkar
  2024-06-17  8:10 ` [PATCH v4 1/2] drm/xe/lnl: " Vinay Belgaumkar
  2024-06-17  8:10 ` [PATCH v4 2/2] drm/xe/guc: Request max GT freq during resume Vinay Belgaumkar
@ 2024-06-17  8:18 ` Patchwork
  2024-06-17  8:18 ` ✗ CI.checkpatch: warning " Patchwork
  2024-06-17  8:19 ` ✗ CI.KUnit: failure " Patchwork
  4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2024-06-17  8:18 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-xe

== Series Details ==

Series: Apply Wa_22019338487 (rev2)
URL   : https://patchwork.freedesktop.org/series/134921/
State : success

== Summary ==

=== Applying kernel patches on branch 'drm-tip' with base: ===
Base commit: 9063661ba438 drm-tip: 2024y-06m-17d-07h-42m-51s UTC integration manifest
=== git am output follows ===
Applying: drm/xe/lnl: Apply Wa_22019338487
Applying: drm/xe/guc: Request max GT freq during resume



^ permalink raw reply	[flat|nested] 12+ messages in thread

* ✗ CI.checkpatch: warning for Apply Wa_22019338487 (rev2)
  2024-06-17  8:10 [PATCH v4 0/2] Apply Wa_22019338487 Vinay Belgaumkar
                   ` (2 preceding siblings ...)
  2024-06-17  8:18 ` ✓ CI.Patch_applied: success for Apply Wa_22019338487 (rev2) Patchwork
@ 2024-06-17  8:18 ` Patchwork
  2024-06-17  8:19 ` ✗ CI.KUnit: failure " Patchwork
  4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2024-06-17  8:18 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-xe

== Series Details ==

Series: Apply Wa_22019338487 (rev2)
URL   : https://patchwork.freedesktop.org/series/134921/
State : warning

== Summary ==

+ KERNEL=/kernel
+ git clone https://gitlab.freedesktop.org/drm/maintainer-tools mt
Cloning into 'mt'...
warning: redirecting to https://gitlab.freedesktop.org/drm/maintainer-tools.git/
+ git -C mt rev-list -n1 origin/master
51ce9f6cd981d42d7467409d7dbc559a450abc1e
+ cd /kernel
+ git config --global --add safe.directory /kernel
+ git log -n1
commit 65e50cbea2109c2e70e4bf0249cfc2858fd5f5e5
Author: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date:   Mon Jun 17 01:10:51 2024 -0700

    drm/xe/guc: Request max GT freq during resume
    
    We already request max freq in the load path, moving it
    to __xe_guc_upload will ensure this speeds up GuC load in
    the resume path as well.
    
    v2: Rename xe_guc_pc_init_early since we now call it per
    GuC load (Michal W)
    
    v3: Keep pc_init_early() and init RPx values there (Rodrigo)
    
    Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
    Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
+ /mt/dim checkpatch 9063661ba438a2b1f01b73b6035be9a7156c4345 drm-intel
1da891341513 drm/xe/lnl: Apply Wa_22019338487
-:193: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#193: FILE: drivers/gpu/drm/xe/xe_gt.c:821:
+	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
+	    xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) &&

total: 0 errors, 0 warnings, 1 checks, 294 lines checked
65e50cbea210 drm/xe/guc: Request max GT freq during resume



^ permalink raw reply	[flat|nested] 12+ messages in thread

* ✗ CI.KUnit: failure for Apply Wa_22019338487 (rev2)
  2024-06-17  8:10 [PATCH v4 0/2] Apply Wa_22019338487 Vinay Belgaumkar
                   ` (3 preceding siblings ...)
  2024-06-17  8:18 ` ✗ CI.checkpatch: warning " Patchwork
@ 2024-06-17  8:19 ` Patchwork
  4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2024-06-17  8:19 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-xe

== Series Details ==

Series: Apply Wa_22019338487 (rev2)
URL   : https://patchwork.freedesktop.org/series/134921/
State : failure

== Summary ==

+ trap cleanup EXIT
+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/xe/.kunitconfig
ERROR:root:../drivers/gpu/drm/xe/xe_gt.c:12:10: fatal error: generated/xe_wa_oob.h: No such file or directory
   12 | #include <generated/xe_wa_oob.h>
      |          ^~~~~~~~~~~~~~~~~~~~~~~
compilation terminated.
make[7]: *** [../scripts/Makefile.build:244: drivers/gpu/drm/xe/xe_gt.o] Error 1
make[7]: *** Waiting for unfinished jobs....
make[6]: *** [../scripts/Makefile.build:485: drivers/gpu/drm/xe] Error 2
make[6]: *** Waiting for unfinished jobs....
make[5]: *** [../scripts/Makefile.build:485: drivers/gpu/drm] Error 2
make[4]: *** [../scripts/Makefile.build:485: drivers/gpu] Error 2
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [../scripts/Makefile.build:485: drivers] Error 2
make[3]: *** Waiting for unfinished jobs....
../lib/iomap.c:156:5: warning: no previous prototype for ‘ioread64_lo_hi’ [-Wmissing-prototypes]
  156 | u64 ioread64_lo_hi(const void __iomem *addr)
      |     ^~~~~~~~~~~~~~
../lib/iomap.c:163:5: warning: no previous prototype for ‘ioread64_hi_lo’ [-Wmissing-prototypes]
  163 | u64 ioread64_hi_lo(const void __iomem *addr)
      |     ^~~~~~~~~~~~~~
../lib/iomap.c:170:5: warning: no previous prototype for ‘ioread64be_lo_hi’ [-Wmissing-prototypes]
  170 | u64 ioread64be_lo_hi(const void __iomem *addr)
      |     ^~~~~~~~~~~~~~~~
../lib/iomap.c:178:5: warning: no previous prototype for ‘ioread64be_hi_lo’ [-Wmissing-prototypes]
  178 | u64 ioread64be_hi_lo(const void __iomem *addr)
      |     ^~~~~~~~~~~~~~~~
../lib/iomap.c:264:6: warning: no previous prototype for ‘iowrite64_lo_hi’ [-Wmissing-prototypes]
  264 | void iowrite64_lo_hi(u64 val, void __iomem *addr)
      |      ^~~~~~~~~~~~~~~
../lib/iomap.c:272:6: warning: no previous prototype for ‘iowrite64_hi_lo’ [-Wmissing-prototypes]
  272 | void iowrite64_hi_lo(u64 val, void __iomem *addr)
      |      ^~~~~~~~~~~~~~~
../lib/iomap.c:280:6: warning: no previous prototype for ‘iowrite64be_lo_hi’ [-Wmissing-prototypes]
  280 | void iowrite64be_lo_hi(u64 val, void __iomem *addr)
      |      ^~~~~~~~~~~~~~~~~
../lib/iomap.c:288:6: warning: no previous prototype for ‘iowrite64be_hi_lo’ [-Wmissing-prototypes]
  288 | void iowrite64be_hi_lo(u64 val, void __iomem *addr)
      |      ^~~~~~~~~~~~~~~~~
make[2]: *** [/kernel/Makefile:1934: .] Error 2
make[1]: *** [/kernel/Makefile:240: __sub-make] Error 2
make: *** [Makefile:240: __sub-make] Error 2

[08:18:52] Configuring KUnit Kernel ...
Generating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[08:18:56] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make ARCH=um O=.kunit --jobs=48
+ cleanup
++ stat -c %u:%g /kernel
+ chown -R 1003:1003 /kernel



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/2] drm/xe/lnl: Apply Wa_22019338487
  2024-06-17  8:10 ` [PATCH v4 1/2] drm/xe/lnl: " Vinay Belgaumkar
@ 2024-06-17 18:18   ` Rodrigo Vivi
  2024-06-18 23:45     ` Belgaumkar, Vinay
  2024-06-17 19:48   ` Michal Wajdeczko
  1 sibling, 1 reply; 12+ messages in thread
From: Rodrigo Vivi @ 2024-06-17 18:18 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-xe

On Mon, Jun 17, 2024 at 01:10:50AM -0700, Vinay Belgaumkar wrote:
> This WA requires us to limit media GT frequency requests to a certain
> cap value during driver load. Freq limits are restored after load
> completes, so perf will not be affected during normal operations.
> 
> During normal driver operation, this WA requires dummy writes to media
> offset 0x380D8C after every ~63 GGTT writes. This will ensure completion
> of the LMEM writes originating from Gunit.
> 
> During driver unload(before FLR), the WA requires that we set requested
> frequency to the cap value again.
> 
> v3: Do not use WA number in function name. Call WA wrapper from xe_device.
> Rename some variables, check for locks in the correct function (Rodrigo).
> Ensure reset path is also covered for this WA.
> 
> v4: Fix BAT failure
> 
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> ---
>  drivers/gpu/drm/xe/Makefile          |  2 +
>  drivers/gpu/drm/xe/xe_device.c       |  3 ++
>  drivers/gpu/drm/xe/xe_ggtt.c         | 22 +++++++++
>  drivers/gpu/drm/xe/xe_ggtt_types.h   |  2 +
>  drivers/gpu/drm/xe/xe_gsc.c          |  5 ++
>  drivers/gpu/drm/xe/xe_gt.c           | 24 ++++++++++
>  drivers/gpu/drm/xe/xe_gt.h           |  1 +
>  drivers/gpu/drm/xe/xe_guc_pc.c       | 71 +++++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_guc_pc.h       |  2 +
>  drivers/gpu/drm/xe/xe_guc_pc_types.h |  4 ++
>  drivers/gpu/drm/xe/xe_wa_oob.rules   |  1 +
>  11 files changed, 136 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index 1905a80e61e3..8c72fe38edfd 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -24,9 +24,11 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>  	$(call cmd,wa_oob)
>  
>  uses_generated_oob := \
> +	$(obj)/xe_ggtt.o \
>  	$(obj)/xe_gsc.o \
>  	$(obj)/xe_guc.o \
>  	$(obj)/xe_guc_ads.o \
> +	$(obj)/xe_guc_pc.o \
>  	$(obj)/xe_migrate.o \
>  	$(obj)/xe_ring_ops.o \
>  	$(obj)/xe_vm.o \
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 94dbfe5cf19c..fee90f66246f 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -668,6 +668,9 @@ int xe_device_probe(struct xe_device *xe)
>  
>  	xe_hwmon_register(xe);
>  
> +	for_each_gt(gt, xe, id)
> +		xe_gt_sanitize_freq(gt);
> +
>  	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
>  
>  err_fini_display:
> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
> index 8ff91fd1b7c8..e3fddd59fd9f 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt.c
> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
> @@ -11,6 +11,7 @@
>  #include <drm/drm_drv.h>
>  #include <drm/drm_managed.h>
>  #include <drm/intel/i915_drm.h>
> +#include <generated/xe_wa_oob.h>
>  
>  #include "regs/xe_gt_regs.h"
>  #include "regs/xe_gtt_defs.h"
> @@ -23,8 +24,10 @@
>  #include "xe_gt_sriov_vf.h"
>  #include "xe_gt_tlb_invalidation.h"
>  #include "xe_map.h"
> +#include "xe_mmio.h"
>  #include "xe_pm.h"
>  #include "xe_sriov.h"
> +#include "xe_wa.h"
>  #include "xe_wopcm.h"
>  
>  static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
> @@ -69,12 +72,31 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
>  	return ggms ? SZ_1M << ggms : 0;
>  }
>  
> +static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
> +{
> +	/*
> +	 * 22019338487: GMD_ID is a RO register, a dummy write forces gunit
> +	 * to wait for completion of prior GTT writes before letting this through.
> +	 */
> +	lockdep_assert_held(&ggtt->lock);
> +
> +	++ggtt->access_count;

probably better the ++ as suffix since you are not in the middle of any check
where you need to increase before checking.

> +
> +	if (ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487)) {

This doesn't look right... it looks like we are counting all the tile0 as well
and updating the tile1 always.

I believe we should have something on the beginning like
if (!ggtt->tile->media_gt || ggtt->tile->media_gt != ggtt->tile->gt)
   return;

then do all the operations directly on ggtt->tile->gt instead of tile->media_gt

> +		if ((ggtt->access_count % 63) == 0) {
> +			xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0);
> +			ggtt->access_count = 0;
> +		}
> +	}
> +}
> +
>  void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
>  {
>  	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
>  	xe_tile_assert(ggtt->tile, addr < ggtt->size);
>  
>  	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
> +	ggtt_update_access_counter(ggtt);
>  }
>  
>  static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
> diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
> index d8c584d9a8c3..73fe6837b9cd 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt_types.h
> +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
> @@ -34,6 +34,8 @@ struct xe_ggtt {
>  	const struct xe_ggtt_pt_ops *pt_ops;
>  
>  	struct drm_mm mm;
> +
> +	unsigned int access_count;
>  };
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
> index 80a61934decc..f8239a13fa2b 100644
> --- a/drivers/gpu/drm/xe/xe_gsc.c
> +++ b/drivers/gpu/drm/xe/xe_gsc.c
> @@ -22,6 +22,7 @@
>  #include "xe_gt.h"
>  #include "xe_gt_mcr.h"
>  #include "xe_gt_printk.h"
> +#include "xe_guc_pc.h"
>  #include "xe_huc.h"
>  #include "xe_map.h"
>  #include "xe_mmio.h"
> @@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
>  		return ret;
>  
>  	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
> +
> +	/* GSC load is done, restore expected GT frequencies */
> +	xe_gt_sanitize_freq(gt);
> +
>  	xe_gt_dbg(gt, "GSC FW async load completed\n");
>  
>  	/* HuC auth failure is not fatal */
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 57d84751e160..301594d1e7a4 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -9,6 +9,7 @@
>  
>  #include <drm/drm_managed.h>
>  #include <drm/xe_drm.h>
> +#include <generated/xe_wa_oob.h>
>  
>  #include "instructions/xe_gfxpipe_commands.h"
>  #include "instructions/xe_mi_commands.h"
> @@ -54,6 +55,7 @@
>  #include "xe_sriov.h"
>  #include "xe_tuning.h"
>  #include "xe_uc.h"
> +#include "xe_uc_fw.h"
>  #include "xe_vm.h"
>  #include "xe_wa.h"
>  #include "xe_wopcm.h"
> @@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt)
>  	/* Get CCS mode in sync between sw/hw */
>  	xe_gt_apply_ccs_mode(gt);
>  
> +	/* Restore GT freq to expected values */
> +	xe_gt_sanitize_freq(gt);
> +
>  	return 0;
>  }
>  
> @@ -801,6 +806,25 @@ int xe_gt_suspend(struct xe_gt *gt)
>  	return err;
>  }
>  
> +/**
> + * xe_gt_sanitize_freq() - Restore saved freuencies if necessary.
> + * @gt: the GT object
> + *
> + * Called after driver init/GSC load completes to restore GT frequencies if we
> + * limited them for any WAs.
> + */
> +int xe_gt_sanitize_freq(struct xe_gt *gt)
> +{
> +	int ret = 0;
> +
> +	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
> +	    xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) &&
> +	    XE_WA(gt, 22019338487))
> +		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
> +
> +	return ret;
> +}
> +
>  int xe_gt_resume(struct xe_gt *gt)
>  {
>  	int err;
> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> index 9073ac68a777..1123fdfc4ebc 100644
> --- a/drivers/gpu/drm/xe/xe_gt.h
> +++ b/drivers/gpu/drm/xe/xe_gt.h
> @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
>  int xe_gt_resume(struct xe_gt *gt);
>  void xe_gt_reset_async(struct xe_gt *gt);
>  void xe_gt_sanitize(struct xe_gt *gt);
> +int xe_gt_sanitize_freq(struct xe_gt *gt);
>  void xe_gt_remove(struct xe_gt *gt);
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
> index 666a37106bc5..d9194328b495 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> @@ -8,6 +8,7 @@
>  #include <linux/delay.h>
>  
>  #include <drm/drm_managed.h>
> +#include <generated/xe_wa_oob.h>
>  
>  #include "abi/guc_actions_slpc_abi.h"
>  #include "regs/xe_gt_regs.h"
> @@ -24,6 +25,7 @@
>  #include "xe_map.h"
>  #include "xe_mmio.h"
>  #include "xe_pcode.h"
> +#include "xe_wa.h"
>  
>  #define MCHBAR_MIRROR_BASE_SNB	0x140000
>  
> @@ -41,6 +43,8 @@
>  #define GT_FREQUENCY_MULTIPLIER	50
>  #define GT_FREQUENCY_SCALER	3
>  
> +#define LNL_MERT_FREQ_CAP	800
> +
>  /**
>   * DOC: GuC Power Conservation (PC)
>   *
> @@ -673,6 +677,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
>  		tgl_init_fused_rp_values(pc);
>  }
>  
> +static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
> +{
> +	struct xe_gt *gt = pc_to_gt(pc);
> +
> +	if (XE_WA(gt, 22019338487))

I'm afraid we are missing the check to be sure that we are in the media gt
here as we...

> +		return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
> +	else
> +		return pc->rp0_freq;
> +}
> +
>  /**
>   * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
>   * frequency to allow faster GuC load times
> @@ -684,7 +698,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
>  
>  	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
>  	pc_init_fused_rp_values(pc);
> -	pc_set_cur_freq(pc, pc->rp0_freq);
> +	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
>  }
>  
>  static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
> @@ -740,6 +754,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
>  	return ret;
>  }
>  
> +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
> +{
> +	int ret = 0;
> +
> +	if (XE_WA(pc_to_gt(pc), 22019338487)) {
> +		/*
> +		 * Get updated min/max and stash them.
> +		 */
> +		ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
> +		if (!ret)
> +			ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
> +		if (ret)
> +			return ret;
> +
> +		/*
> +		 * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
> +		 */
> +		mutex_lock(&pc->freq_lock);
> +		ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
> +		if (!ret)
> +			ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
> +		mutex_unlock(&pc->freq_lock);
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values
> + * @pc: The GuC PC
> + *
> + * Returns: 0 on success,
> + *          error code on failure
> + */
> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc)
> +{
> +	int ret = 0;
> +
> +	mutex_lock(&pc->freq_lock);
> +	ret = pc_set_max_freq(pc, pc->stashed_max_freq);
> +	if (!ret)
> +		ret = pc_set_min_freq(pc, pc->stashed_min_freq);
> +	mutex_unlock(&pc->freq_lock);
> +
> +	return ret;
> +}
> +
>  /**
>   * xe_guc_pc_gucrc_disable - Disable GuC RC
>   * @pc: Xe_GuC_PC instance
> @@ -854,6 +915,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
>  	if (ret)
>  		goto out;
>  
> +	ret = pc_set_mert_freq_cap(pc);
> +	if (ret)
> +		goto out;
> +
>  	if (xe->info.platform == XE_PVC) {
>  		xe_guc_pc_gucrc_disable(pc);
>  		ret = 0;
> @@ -902,6 +967,10 @@ static void xe_guc_pc_fini_hw(void *arg)
>  	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
>  	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>  	XE_WARN_ON(xe_guc_pc_stop(pc));
> +
> +	/* Bind requested freq to mert_freq_cap before unload */
> +	pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
> +
>  	xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
> index 532cac985a6d..7ba875c3613b 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.h
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
> @@ -8,6 +8,7 @@
>  
>  #include <linux/types.h>
>  
> +struct xe_device;
>  struct xe_guc_pc;
>  
>  int xe_guc_pc_init(struct xe_guc_pc *pc);
> @@ -29,5 +30,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
>  u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
>  u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
>  void xe_guc_pc_init_early(struct xe_guc_pc *pc);
> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
>  
>  #endif /* _XE_GUC_PC_H_ */
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
> index 2afd0dbc3542..13810be015db 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
> @@ -25,6 +25,10 @@ struct xe_guc_pc {
>  	u32 user_requested_min;
>  	/** @user_requested_max: Stash the maximum requested freq by user */
>  	u32 user_requested_max;
> +	/** @stashed_min_freq: Stash the current minimum freq */
> +	u32 stashed_min_freq;
> +	/** @stashed_max_freq: Stash the current maximum freq */
> +	u32 stashed_max_freq;
>  	/** @freq_lock: Let's protect the frequencies */
>  	struct mutex freq_lock;
>  	/** @freq_ready: Only handle freq changes, if they are really ready */
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index 12fe88796a49..a6b897030fde 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -27,3 +27,4 @@
>  16022287689	GRAPHICS_VERSION(2001)
>  		GRAPHICS_VERSION(2004)
>  13011645652	GRAPHICS_VERSION(2004)
> +22019338487	MEDIA_VERSION(2000)
> -- 
> 2.38.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 2/2] drm/xe/guc: Request max GT freq during resume
  2024-06-17  8:10 ` [PATCH v4 2/2] drm/xe/guc: Request max GT freq during resume Vinay Belgaumkar
@ 2024-06-17 18:20   ` Rodrigo Vivi
  0 siblings, 0 replies; 12+ messages in thread
From: Rodrigo Vivi @ 2024-06-17 18:20 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-xe

On Mon, Jun 17, 2024 at 01:10:51AM -0700, Vinay Belgaumkar wrote:
> We already request max freq in the load path, moving it
> to __xe_guc_upload will ensure this speeds up GuC load in
> the resume path as well.
> 
> v2: Rename xe_guc_pc_init_early since we now call it per
> GuC load (Michal W)
> 
> v3: Keep pc_init_early() and init RPx values there (Rodrigo)

thanks

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

> 
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc.c    |  4 +++-
>  drivers/gpu/drm/xe/xe_guc_pc.c | 15 +++++++++++++--
>  drivers/gpu/drm/xe/xe_guc_pc.h |  1 +
>  3 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index 0e1a5674ef13..178a0fe59107 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -699,6 +699,9 @@ static int __xe_guc_upload(struct xe_guc *guc)
>  {
>  	int ret;
>  
> +	/* Raise GT freq to speed up HuC/GuC load */
> +	xe_guc_pc_raise_unslice(&guc->pc);
> +
>  	guc_write_params(guc);
>  	guc_prepare_xfer(guc);
>  
> @@ -784,7 +787,6 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
>  
>  	xe_guc_ads_populate_minimal(&guc->ads);
>  
> -	/* Raise GT freq to speed up HuC/GuC load */
>  	xe_guc_pc_init_early(&guc->pc);
>  
>  	ret = __xe_guc_upload(guc);
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
> index d9194328b495..1775183662a4 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> @@ -688,17 +688,28 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
>  }
>  
>  /**
> - * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
> + * xe_guc_pc_raise_unslice - Initialize RPx values and request a higher GT
>   * frequency to allow faster GuC load times
>   * @pc: Xe_GuC_PC instance
>   */
> +void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc)
> +{
> +	struct xe_gt *gt = pc_to_gt(pc);
> +
> +	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
> +	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
> +}
> +
> +/**
> + * xe_guc_pc_init_early - Initialize RPx values
> + * @pc: Xe_GuC_PC instance
> + */
>  void xe_guc_pc_init_early(struct xe_guc_pc *pc)
>  {
>  	struct xe_gt *gt = pc_to_gt(pc);
>  
>  	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
>  	pc_init_fused_rp_values(pc);
> -	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
>  }
>  
>  static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
> index 7ba875c3613b..3b4103f59b19 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.h
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
> @@ -31,5 +31,6 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
>  u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
>  void xe_guc_pc_init_early(struct xe_guc_pc *pc);
>  int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
> +void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
>  
>  #endif /* _XE_GUC_PC_H_ */
> -- 
> 2.38.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/2] drm/xe/lnl: Apply Wa_22019338487
  2024-06-17  8:10 ` [PATCH v4 1/2] drm/xe/lnl: " Vinay Belgaumkar
  2024-06-17 18:18   ` Rodrigo Vivi
@ 2024-06-17 19:48   ` Michal Wajdeczko
  2024-06-18 23:47     ` Belgaumkar, Vinay
  1 sibling, 1 reply; 12+ messages in thread
From: Michal Wajdeczko @ 2024-06-17 19:48 UTC (permalink / raw)
  To: Vinay Belgaumkar, intel-xe; +Cc: Rodrigo Vivi



On 17.06.2024 10:10, Vinay Belgaumkar wrote:
> This WA requires us to limit media GT frequency requests to a certain
> cap value during driver load. Freq limits are restored after load
> completes, so perf will not be affected during normal operations.
> 
> During normal driver operation, this WA requires dummy writes to media
> offset 0x380D8C after every ~63 GGTT writes. This will ensure completion
> of the LMEM writes originating from Gunit.
> 
> During driver unload(before FLR), the WA requires that we set requested
> frequency to the cap value again.
> 
> v3: Do not use WA number in function name. Call WA wrapper from xe_device.
> Rename some variables, check for locks in the correct function (Rodrigo).
> Ensure reset path is also covered for this WA.
> 
> v4: Fix BAT failure
> 
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> ---
>  drivers/gpu/drm/xe/Makefile          |  2 +
>  drivers/gpu/drm/xe/xe_device.c       |  3 ++
>  drivers/gpu/drm/xe/xe_ggtt.c         | 22 +++++++++
>  drivers/gpu/drm/xe/xe_ggtt_types.h   |  2 +
>  drivers/gpu/drm/xe/xe_gsc.c          |  5 ++
>  drivers/gpu/drm/xe/xe_gt.c           | 24 ++++++++++
>  drivers/gpu/drm/xe/xe_gt.h           |  1 +
>  drivers/gpu/drm/xe/xe_guc_pc.c       | 71 +++++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_guc_pc.h       |  2 +
>  drivers/gpu/drm/xe/xe_guc_pc_types.h |  4 ++
>  drivers/gpu/drm/xe/xe_wa_oob.rules   |  1 +
>  11 files changed, 136 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index 1905a80e61e3..8c72fe38edfd 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -24,9 +24,11 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>  	$(call cmd,wa_oob)
>  
>  uses_generated_oob := \
> +	$(obj)/xe_ggtt.o \
>  	$(obj)/xe_gsc.o \
>  	$(obj)/xe_guc.o \
>  	$(obj)/xe_guc_ads.o \
> +	$(obj)/xe_guc_pc.o \
>  	$(obj)/xe_migrate.o \
>  	$(obj)/xe_ring_ops.o \
>  	$(obj)/xe_vm.o \
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 94dbfe5cf19c..fee90f66246f 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -668,6 +668,9 @@ int xe_device_probe(struct xe_device *xe)
>  
>  	xe_hwmon_register(xe);
>  
> +	for_each_gt(gt, xe, id)
> +		xe_gt_sanitize_freq(gt);
> +
>  	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
>  
>  err_fini_display:
> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
> index 8ff91fd1b7c8..e3fddd59fd9f 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt.c
> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
> @@ -11,6 +11,7 @@
>  #include <drm/drm_drv.h>
>  #include <drm/drm_managed.h>
>  #include <drm/intel/i915_drm.h>
> +#include <generated/xe_wa_oob.h>
>  
>  #include "regs/xe_gt_regs.h"
>  #include "regs/xe_gtt_defs.h"
> @@ -23,8 +24,10 @@
>  #include "xe_gt_sriov_vf.h"
>  #include "xe_gt_tlb_invalidation.h"
>  #include "xe_map.h"
> +#include "xe_mmio.h"
>  #include "xe_pm.h"
>  #include "xe_sriov.h"
> +#include "xe_wa.h"
>  #include "xe_wopcm.h"
>  
>  static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
> @@ -69,12 +72,31 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
>  	return ggms ? SZ_1M << ggms : 0;
>  }
>  
> +static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
> +{
> +	/*
> +	 * 22019338487: GMD_ID is a RO register, a dummy write forces gunit
> +	 * to wait for completion of prior GTT writes before letting this through.
> +	 */
> +	lockdep_assert_held(&ggtt->lock);
> +
> +	++ggtt->access_count;
> +
> +	if (ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487)) {
> +		if ((ggtt->access_count % 63) == 0) {
> +			xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0);
> +			ggtt->access_count = 0;
> +		}
> +	}
> +}
> +
>  void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
>  {
>  	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
>  	xe_tile_assert(ggtt->tile, addr < ggtt->size);
>  
>  	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
> +	ggtt_update_access_counter(ggtt);

this doesn't seem a perfect solution as now it augments the generic
function used by all platforms with unnecessary logic (that triggers
extra step just for specific WA)

maybe it's time to extend struct xe_ggtt_pt_ops with:

	void (*pte_write)(struct xe_ggtt *, u64, u64);

and setup that ops only where applicable:

static const struct xe_ggtt_pt_ops ops_wa = {
	.pte_encode_bo = ...,
	.pte_write = set_and_track_pte,
};

	if (XE_WA(gt, 22019338487))
		ggtt->ops = ops_wa;
	else
		ggtt->ops = ...;
	

>  }
>  
>  static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
> diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
> index d8c584d9a8c3..73fe6837b9cd 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt_types.h
> +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
> @@ -34,6 +34,8 @@ struct xe_ggtt {
>  	const struct xe_ggtt_pt_ops *pt_ops;
>  
>  	struct drm_mm mm;
> +
> +	unsigned int access_count;

missing kernel-doc

>  };
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
> index 80a61934decc..f8239a13fa2b 100644
> --- a/drivers/gpu/drm/xe/xe_gsc.c
> +++ b/drivers/gpu/drm/xe/xe_gsc.c
> @@ -22,6 +22,7 @@
>  #include "xe_gt.h"
>  #include "xe_gt_mcr.h"
>  #include "xe_gt_printk.h"
> +#include "xe_guc_pc.h"
>  #include "xe_huc.h"
>  #include "xe_map.h"
>  #include "xe_mmio.h"
> @@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
>  		return ret;
>  
>  	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
> +
> +	/* GSC load is done, restore expected GT frequencies */
> +	xe_gt_sanitize_freq(gt);
> +
>  	xe_gt_dbg(gt, "GSC FW async load completed\n");
>  
>  	/* HuC auth failure is not fatal */
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 57d84751e160..301594d1e7a4 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -9,6 +9,7 @@
>  
>  #include <drm/drm_managed.h>
>  #include <drm/xe_drm.h>
> +#include <generated/xe_wa_oob.h>
>  
>  #include "instructions/xe_gfxpipe_commands.h"
>  #include "instructions/xe_mi_commands.h"
> @@ -54,6 +55,7 @@
>  #include "xe_sriov.h"
>  #include "xe_tuning.h"
>  #include "xe_uc.h"
> +#include "xe_uc_fw.h"
>  #include "xe_vm.h"
>  #include "xe_wa.h"
>  #include "xe_wopcm.h"
> @@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt)
>  	/* Get CCS mode in sync between sw/hw */
>  	xe_gt_apply_ccs_mode(gt);
>  
> +	/* Restore GT freq to expected values */
> +	xe_gt_sanitize_freq(gt);
> +
>  	return 0;
>  }
>  
> @@ -801,6 +806,25 @@ int xe_gt_suspend(struct xe_gt *gt)
>  	return err;
>  }
>  
> +/**
> + * xe_gt_sanitize_freq() - Restore saved freuencies if necessary.

typo

> + * @gt: the GT object
> + *
> + * Called after driver init/GSC load completes to restore GT frequencies if we
> + * limited them for any WAs.
> + */
> +int xe_gt_sanitize_freq(struct xe_gt *gt)
> +{
> +	int ret = 0;
> +
> +	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
> +	    xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) &&
> +	    XE_WA(gt, 22019338487))
> +		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
> +
> +	return ret;
> +}
> +
>  int xe_gt_resume(struct xe_gt *gt)
>  {
>  	int err;
> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> index 9073ac68a777..1123fdfc4ebc 100644
> --- a/drivers/gpu/drm/xe/xe_gt.h
> +++ b/drivers/gpu/drm/xe/xe_gt.h
> @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
>  int xe_gt_resume(struct xe_gt *gt);
>  void xe_gt_reset_async(struct xe_gt *gt);
>  void xe_gt_sanitize(struct xe_gt *gt);
> +int xe_gt_sanitize_freq(struct xe_gt *gt);
>  void xe_gt_remove(struct xe_gt *gt);
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
> index 666a37106bc5..d9194328b495 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> @@ -8,6 +8,7 @@
>  #include <linux/delay.h>
>  
>  #include <drm/drm_managed.h>
> +#include <generated/xe_wa_oob.h>
>  
>  #include "abi/guc_actions_slpc_abi.h"
>  #include "regs/xe_gt_regs.h"
> @@ -24,6 +25,7 @@
>  #include "xe_map.h"
>  #include "xe_mmio.h"
>  #include "xe_pcode.h"
> +#include "xe_wa.h"
>  
>  #define MCHBAR_MIRROR_BASE_SNB	0x140000
>  
> @@ -41,6 +43,8 @@
>  #define GT_FREQUENCY_MULTIPLIER	50
>  #define GT_FREQUENCY_SCALER	3
>  
> +#define LNL_MERT_FREQ_CAP	800
> +
>  /**
>   * DOC: GuC Power Conservation (PC)
>   *
> @@ -673,6 +677,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
>  		tgl_init_fused_rp_values(pc);
>  }
>  
> +static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
> +{
> +	struct xe_gt *gt = pc_to_gt(pc);
> +
> +	if (XE_WA(gt, 22019338487))
> +		return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
> +	else
> +		return pc->rp0_freq;
> +}
> +
>  /**
>   * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
>   * frequency to allow faster GuC load times
> @@ -684,7 +698,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
>  
>  	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
>  	pc_init_fused_rp_values(pc);
> -	pc_set_cur_freq(pc, pc->rp0_freq);
> +	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
>  }
>  
>  static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
> @@ -740,6 +754,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
>  	return ret;
>  }
>  
> +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
> +{
> +	int ret = 0;
> +
> +	if (XE_WA(pc_to_gt(pc), 22019338487)) {
> +		/*
> +		 * Get updated min/max and stash them.
> +		 */
> +		ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
> +		if (!ret)
> +			ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
> +		if (ret)
> +			return ret;
> +
> +		/*
> +		 * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
> +		 */
> +		mutex_lock(&pc->freq_lock);
> +		ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
> +		if (!ret)
> +			ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
> +		mutex_unlock(&pc->freq_lock);
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values
> + * @pc: The GuC PC
> + *
> + * Returns: 0 on success,
> + *          error code on failure
> + */
> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc)
> +{
> +	int ret = 0;
> +
> +	mutex_lock(&pc->freq_lock);
> +	ret = pc_set_max_freq(pc, pc->stashed_max_freq);
> +	if (!ret)
> +		ret = pc_set_min_freq(pc, pc->stashed_min_freq);
> +	mutex_unlock(&pc->freq_lock);
> +
> +	return ret;
> +}
> +
>  /**
>   * xe_guc_pc_gucrc_disable - Disable GuC RC
>   * @pc: Xe_GuC_PC instance
> @@ -854,6 +915,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
>  	if (ret)
>  		goto out;
>  
> +	ret = pc_set_mert_freq_cap(pc);
> +	if (ret)
> +		goto out;
> +
>  	if (xe->info.platform == XE_PVC) {
>  		xe_guc_pc_gucrc_disable(pc);
>  		ret = 0;
> @@ -902,6 +967,10 @@ static void xe_guc_pc_fini_hw(void *arg)
>  	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
>  	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>  	XE_WARN_ON(xe_guc_pc_stop(pc));
> +
> +	/* Bind requested freq to mert_freq_cap before unload */
> +	pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
> +
>  	xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
> index 532cac985a6d..7ba875c3613b 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.h
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
> @@ -8,6 +8,7 @@
>  
>  #include <linux/types.h>
>  
> +struct xe_device;

why this ?

>  struct xe_guc_pc;
>  
>  int xe_guc_pc_init(struct xe_guc_pc *pc);
> @@ -29,5 +30,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
>  u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
>  u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
>  void xe_guc_pc_init_early(struct xe_guc_pc *pc);
> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
>  
>  #endif /* _XE_GUC_PC_H_ */
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
> index 2afd0dbc3542..13810be015db 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
> @@ -25,6 +25,10 @@ struct xe_guc_pc {
>  	u32 user_requested_min;
>  	/** @user_requested_max: Stash the maximum requested freq by user */
>  	u32 user_requested_max;
> +	/** @stashed_min_freq: Stash the current minimum freq */
> +	u32 stashed_min_freq;
> +	/** @stashed_max_freq: Stash the current maximum freq */
> +	u32 stashed_max_freq;
>  	/** @freq_lock: Let's protect the frequencies */
>  	struct mutex freq_lock;
>  	/** @freq_ready: Only handle freq changes, if they are really ready */
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index 12fe88796a49..a6b897030fde 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -27,3 +27,4 @@
>  16022287689	GRAPHICS_VERSION(2001)
>  		GRAPHICS_VERSION(2004)
>  13011645652	GRAPHICS_VERSION(2004)
> +22019338487	MEDIA_VERSION(2000)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/2] drm/xe/lnl: Apply Wa_22019338487
  2024-06-17 18:18   ` Rodrigo Vivi
@ 2024-06-18 23:45     ` Belgaumkar, Vinay
  0 siblings, 0 replies; 12+ messages in thread
From: Belgaumkar, Vinay @ 2024-06-18 23:45 UTC (permalink / raw)
  To: Rodrigo Vivi; +Cc: intel-xe


On 6/17/2024 11:18 AM, Rodrigo Vivi wrote:
> On Mon, Jun 17, 2024 at 01:10:50AM -0700, Vinay Belgaumkar wrote:
>> This WA requires us to limit media GT frequency requests to a certain
>> cap value during driver load. Freq limits are restored after load
>> completes, so perf will not be affected during normal operations.
>>
>> During normal driver operation, this WA requires dummy writes to media
>> offset 0x380D8C after every ~63 GGTT writes. This will ensure completion
>> of the LMEM writes originating from Gunit.
>>
>> During driver unload(before FLR), the WA requires that we set requested
>> frequency to the cap value again.
>>
>> v3: Do not use WA number in function name. Call WA wrapper from xe_device.
>> Rename some variables, check for locks in the correct function (Rodrigo).
>> Ensure reset path is also covered for this WA.
>>
>> v4: Fix BAT failure
>>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>> ---
>>   drivers/gpu/drm/xe/Makefile          |  2 +
>>   drivers/gpu/drm/xe/xe_device.c       |  3 ++
>>   drivers/gpu/drm/xe/xe_ggtt.c         | 22 +++++++++
>>   drivers/gpu/drm/xe/xe_ggtt_types.h   |  2 +
>>   drivers/gpu/drm/xe/xe_gsc.c          |  5 ++
>>   drivers/gpu/drm/xe/xe_gt.c           | 24 ++++++++++
>>   drivers/gpu/drm/xe/xe_gt.h           |  1 +
>>   drivers/gpu/drm/xe/xe_guc_pc.c       | 71 +++++++++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_guc_pc.h       |  2 +
>>   drivers/gpu/drm/xe/xe_guc_pc_types.h |  4 ++
>>   drivers/gpu/drm/xe/xe_wa_oob.rules   |  1 +
>>   11 files changed, 136 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 1905a80e61e3..8c72fe38edfd 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -24,9 +24,11 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>>   	$(call cmd,wa_oob)
>>   
>>   uses_generated_oob := \
>> +	$(obj)/xe_ggtt.o \
>>   	$(obj)/xe_gsc.o \
>>   	$(obj)/xe_guc.o \
>>   	$(obj)/xe_guc_ads.o \
>> +	$(obj)/xe_guc_pc.o \
>>   	$(obj)/xe_migrate.o \
>>   	$(obj)/xe_ring_ops.o \
>>   	$(obj)/xe_vm.o \
>> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>> index 94dbfe5cf19c..fee90f66246f 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -668,6 +668,9 @@ int xe_device_probe(struct xe_device *xe)
>>   
>>   	xe_hwmon_register(xe);
>>   
>> +	for_each_gt(gt, xe, id)
>> +		xe_gt_sanitize_freq(gt);
>> +
>>   	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
>>   
>>   err_fini_display:
>> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
>> index 8ff91fd1b7c8..e3fddd59fd9f 100644
>> --- a/drivers/gpu/drm/xe/xe_ggtt.c
>> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
>> @@ -11,6 +11,7 @@
>>   #include <drm/drm_drv.h>
>>   #include <drm/drm_managed.h>
>>   #include <drm/intel/i915_drm.h>
>> +#include <generated/xe_wa_oob.h>
>>   
>>   #include "regs/xe_gt_regs.h"
>>   #include "regs/xe_gtt_defs.h"
>> @@ -23,8 +24,10 @@
>>   #include "xe_gt_sriov_vf.h"
>>   #include "xe_gt_tlb_invalidation.h"
>>   #include "xe_map.h"
>> +#include "xe_mmio.h"
>>   #include "xe_pm.h"
>>   #include "xe_sriov.h"
>> +#include "xe_wa.h"
>>   #include "xe_wopcm.h"
>>   
>>   static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
>> @@ -69,12 +72,31 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
>>   	return ggms ? SZ_1M << ggms : 0;
>>   }
>>   
>> +static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
>> +{
>> +	/*
>> +	 * 22019338487: GMD_ID is a RO register, a dummy write forces gunit
>> +	 * to wait for completion of prior GTT writes before letting this through.
>> +	 */
>> +	lockdep_assert_held(&ggtt->lock);
>> +
>> +	++ggtt->access_count;
> probably better the ++ as suffix since you are not in the middle of any check
> where you need to increase before checking.
ok.
>
>> +
>> +	if (ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487)) {
> This doesn't look right... it looks like we are counting all the tile0 as well
> and updating the tile1 always.
>
> I believe we should have something on the beginning like
> if (!ggtt->tile->media_gt || ggtt->tile->media_gt != ggtt->tile->gt)
>     return;
>
> then do all the operations directly on ggtt->tile->gt instead of tile->media_gt
Any write access to GGTT from CPU needs to be counted as part of this WA.
>
>> +		if ((ggtt->access_count % 63) == 0) {
>> +			xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0);
>> +			ggtt->access_count = 0;
>> +		}
>> +	}
>> +}
>> +
>>   void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
>>   {
>>   	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
>>   	xe_tile_assert(ggtt->tile, addr < ggtt->size);
>>   
>>   	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
>> +	ggtt_update_access_counter(ggtt);
>>   }
>>   
>>   static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
>> diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
>> index d8c584d9a8c3..73fe6837b9cd 100644
>> --- a/drivers/gpu/drm/xe/xe_ggtt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
>> @@ -34,6 +34,8 @@ struct xe_ggtt {
>>   	const struct xe_ggtt_pt_ops *pt_ops;
>>   
>>   	struct drm_mm mm;
>> +
>> +	unsigned int access_count;
>>   };
>>   
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
>> index 80a61934decc..f8239a13fa2b 100644
>> --- a/drivers/gpu/drm/xe/xe_gsc.c
>> +++ b/drivers/gpu/drm/xe/xe_gsc.c
>> @@ -22,6 +22,7 @@
>>   #include "xe_gt.h"
>>   #include "xe_gt_mcr.h"
>>   #include "xe_gt_printk.h"
>> +#include "xe_guc_pc.h"
>>   #include "xe_huc.h"
>>   #include "xe_map.h"
>>   #include "xe_mmio.h"
>> @@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
>>   		return ret;
>>   
>>   	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
>> +
>> +	/* GSC load is done, restore expected GT frequencies */
>> +	xe_gt_sanitize_freq(gt);
>> +
>>   	xe_gt_dbg(gt, "GSC FW async load completed\n");
>>   
>>   	/* HuC auth failure is not fatal */
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index 57d84751e160..301594d1e7a4 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -9,6 +9,7 @@
>>   
>>   #include <drm/drm_managed.h>
>>   #include <drm/xe_drm.h>
>> +#include <generated/xe_wa_oob.h>
>>   
>>   #include "instructions/xe_gfxpipe_commands.h"
>>   #include "instructions/xe_mi_commands.h"
>> @@ -54,6 +55,7 @@
>>   #include "xe_sriov.h"
>>   #include "xe_tuning.h"
>>   #include "xe_uc.h"
>> +#include "xe_uc_fw.h"
>>   #include "xe_vm.h"
>>   #include "xe_wa.h"
>>   #include "xe_wopcm.h"
>> @@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt)
>>   	/* Get CCS mode in sync between sw/hw */
>>   	xe_gt_apply_ccs_mode(gt);
>>   
>> +	/* Restore GT freq to expected values */
>> +	xe_gt_sanitize_freq(gt);
>> +
>>   	return 0;
>>   }
>>   
>> @@ -801,6 +806,25 @@ int xe_gt_suspend(struct xe_gt *gt)
>>   	return err;
>>   }
>>   
>> +/**
>> + * xe_gt_sanitize_freq() - Restore saved freuencies if necessary.
>> + * @gt: the GT object
>> + *
>> + * Called after driver init/GSC load completes to restore GT frequencies if we
>> + * limited them for any WAs.
>> + */
>> +int xe_gt_sanitize_freq(struct xe_gt *gt)
>> +{
>> +	int ret = 0;
>> +
>> +	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
>> +	    xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) &&
>> +	    XE_WA(gt, 22019338487))
>> +		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
>> +
>> +	return ret;
>> +}
>> +
>>   int xe_gt_resume(struct xe_gt *gt)
>>   {
>>   	int err;
>> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
>> index 9073ac68a777..1123fdfc4ebc 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.h
>> +++ b/drivers/gpu/drm/xe/xe_gt.h
>> @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
>>   int xe_gt_resume(struct xe_gt *gt);
>>   void xe_gt_reset_async(struct xe_gt *gt);
>>   void xe_gt_sanitize(struct xe_gt *gt);
>> +int xe_gt_sanitize_freq(struct xe_gt *gt);
>>   void xe_gt_remove(struct xe_gt *gt);
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
>> index 666a37106bc5..d9194328b495 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>> @@ -8,6 +8,7 @@
>>   #include <linux/delay.h>
>>   
>>   #include <drm/drm_managed.h>
>> +#include <generated/xe_wa_oob.h>
>>   
>>   #include "abi/guc_actions_slpc_abi.h"
>>   #include "regs/xe_gt_regs.h"
>> @@ -24,6 +25,7 @@
>>   #include "xe_map.h"
>>   #include "xe_mmio.h"
>>   #include "xe_pcode.h"
>> +#include "xe_wa.h"
>>   
>>   #define MCHBAR_MIRROR_BASE_SNB	0x140000
>>   
>> @@ -41,6 +43,8 @@
>>   #define GT_FREQUENCY_MULTIPLIER	50
>>   #define GT_FREQUENCY_SCALER	3
>>   
>> +#define LNL_MERT_FREQ_CAP	800
>> +
>>   /**
>>    * DOC: GuC Power Conservation (PC)
>>    *
>> @@ -673,6 +677,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
>>   		tgl_init_fused_rp_values(pc);
>>   }
>>   
>> +static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
>> +{
>> +	struct xe_gt *gt = pc_to_gt(pc);
>> +
>> +	if (XE_WA(gt, 22019338487))
> I'm afraid we are missing the check to be sure that we are in the media gt
> here as we...

XE_WA() does check for the specific GT it is defined for (much to my 
surprise as well). So, above code will return false for a non-media GT 
since oob_rules has it defined for media.

Thanks,

Vinay.

>
>> +		return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
>> +	else
>> +		return pc->rp0_freq;
>> +}
>> +
>>   /**
>>    * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
>>    * frequency to allow faster GuC load times
>> @@ -684,7 +698,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
>>   
>>   	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
>>   	pc_init_fused_rp_values(pc);
>> -	pc_set_cur_freq(pc, pc->rp0_freq);
>> +	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
>>   }
>>   
>>   static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
>> @@ -740,6 +754,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
>>   	return ret;
>>   }
>>   
>> +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
>> +{
>> +	int ret = 0;
>> +
>> +	if (XE_WA(pc_to_gt(pc), 22019338487)) {
>> +		/*
>> +		 * Get updated min/max and stash them.
>> +		 */
>> +		ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
>> +		if (!ret)
>> +			ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
>> +		if (ret)
>> +			return ret;
>> +
>> +		/*
>> +		 * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
>> +		 */
>> +		mutex_lock(&pc->freq_lock);
>> +		ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
>> +		if (!ret)
>> +			ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
>> +		mutex_unlock(&pc->freq_lock);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +/**
>> + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values
>> + * @pc: The GuC PC
>> + *
>> + * Returns: 0 on success,
>> + *          error code on failure
>> + */
>> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc)
>> +{
>> +	int ret = 0;
>> +
>> +	mutex_lock(&pc->freq_lock);
>> +	ret = pc_set_max_freq(pc, pc->stashed_max_freq);
>> +	if (!ret)
>> +		ret = pc_set_min_freq(pc, pc->stashed_min_freq);
>> +	mutex_unlock(&pc->freq_lock);
>> +
>> +	return ret;
>> +}
>> +
>>   /**
>>    * xe_guc_pc_gucrc_disable - Disable GuC RC
>>    * @pc: Xe_GuC_PC instance
>> @@ -854,6 +915,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
>>   	if (ret)
>>   		goto out;
>>   
>> +	ret = pc_set_mert_freq_cap(pc);
>> +	if (ret)
>> +		goto out;
>> +
>>   	if (xe->info.platform == XE_PVC) {
>>   		xe_guc_pc_gucrc_disable(pc);
>>   		ret = 0;
>> @@ -902,6 +967,10 @@ static void xe_guc_pc_fini_hw(void *arg)
>>   	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
>>   	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>>   	XE_WARN_ON(xe_guc_pc_stop(pc));
>> +
>> +	/* Bind requested freq to mert_freq_cap before unload */
>> +	pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
>> +
>>   	xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
>> index 532cac985a6d..7ba875c3613b 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
>> @@ -8,6 +8,7 @@
>>   
>>   #include <linux/types.h>
>>   
>> +struct xe_device;
>>   struct xe_guc_pc;
>>   
>>   int xe_guc_pc_init(struct xe_guc_pc *pc);
>> @@ -29,5 +30,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
>>   u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
>>   u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
>>   void xe_guc_pc_init_early(struct xe_guc_pc *pc);
>> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
>>   
>>   #endif /* _XE_GUC_PC_H_ */
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> index 2afd0dbc3542..13810be015db 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> @@ -25,6 +25,10 @@ struct xe_guc_pc {
>>   	u32 user_requested_min;
>>   	/** @user_requested_max: Stash the maximum requested freq by user */
>>   	u32 user_requested_max;
>> +	/** @stashed_min_freq: Stash the current minimum freq */
>> +	u32 stashed_min_freq;
>> +	/** @stashed_max_freq: Stash the current maximum freq */
>> +	u32 stashed_max_freq;
>>   	/** @freq_lock: Let's protect the frequencies */
>>   	struct mutex freq_lock;
>>   	/** @freq_ready: Only handle freq changes, if they are really ready */
>> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> index 12fe88796a49..a6b897030fde 100644
>> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> @@ -27,3 +27,4 @@
>>   16022287689	GRAPHICS_VERSION(2001)
>>   		GRAPHICS_VERSION(2004)
>>   13011645652	GRAPHICS_VERSION(2004)
>> +22019338487	MEDIA_VERSION(2000)
>> -- 
>> 2.38.1
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/2] drm/xe/lnl: Apply Wa_22019338487
  2024-06-17 19:48   ` Michal Wajdeczko
@ 2024-06-18 23:47     ` Belgaumkar, Vinay
  2024-06-19 14:42       ` Rodrigo Vivi
  0 siblings, 1 reply; 12+ messages in thread
From: Belgaumkar, Vinay @ 2024-06-18 23:47 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-xe; +Cc: Rodrigo Vivi


On 6/17/2024 12:48 PM, Michal Wajdeczko wrote:
>
> On 17.06.2024 10:10, Vinay Belgaumkar wrote:
>> This WA requires us to limit media GT frequency requests to a certain
>> cap value during driver load. Freq limits are restored after load
>> completes, so perf will not be affected during normal operations.
>>
>> During normal driver operation, this WA requires dummy writes to media
>> offset 0x380D8C after every ~63 GGTT writes. This will ensure completion
>> of the LMEM writes originating from Gunit.
>>
>> During driver unload(before FLR), the WA requires that we set requested
>> frequency to the cap value again.
>>
>> v3: Do not use WA number in function name. Call WA wrapper from xe_device.
>> Rename some variables, check for locks in the correct function (Rodrigo).
>> Ensure reset path is also covered for this WA.
>>
>> v4: Fix BAT failure
>>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>> ---
>>   drivers/gpu/drm/xe/Makefile          |  2 +
>>   drivers/gpu/drm/xe/xe_device.c       |  3 ++
>>   drivers/gpu/drm/xe/xe_ggtt.c         | 22 +++++++++
>>   drivers/gpu/drm/xe/xe_ggtt_types.h   |  2 +
>>   drivers/gpu/drm/xe/xe_gsc.c          |  5 ++
>>   drivers/gpu/drm/xe/xe_gt.c           | 24 ++++++++++
>>   drivers/gpu/drm/xe/xe_gt.h           |  1 +
>>   drivers/gpu/drm/xe/xe_guc_pc.c       | 71 +++++++++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_guc_pc.h       |  2 +
>>   drivers/gpu/drm/xe/xe_guc_pc_types.h |  4 ++
>>   drivers/gpu/drm/xe/xe_wa_oob.rules   |  1 +
>>   11 files changed, 136 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 1905a80e61e3..8c72fe38edfd 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -24,9 +24,11 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>>   	$(call cmd,wa_oob)
>>   
>>   uses_generated_oob := \
>> +	$(obj)/xe_ggtt.o \
>>   	$(obj)/xe_gsc.o \
>>   	$(obj)/xe_guc.o \
>>   	$(obj)/xe_guc_ads.o \
>> +	$(obj)/xe_guc_pc.o \
>>   	$(obj)/xe_migrate.o \
>>   	$(obj)/xe_ring_ops.o \
>>   	$(obj)/xe_vm.o \
>> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>> index 94dbfe5cf19c..fee90f66246f 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -668,6 +668,9 @@ int xe_device_probe(struct xe_device *xe)
>>   
>>   	xe_hwmon_register(xe);
>>   
>> +	for_each_gt(gt, xe, id)
>> +		xe_gt_sanitize_freq(gt);
>> +
>>   	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
>>   
>>   err_fini_display:
>> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
>> index 8ff91fd1b7c8..e3fddd59fd9f 100644
>> --- a/drivers/gpu/drm/xe/xe_ggtt.c
>> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
>> @@ -11,6 +11,7 @@
>>   #include <drm/drm_drv.h>
>>   #include <drm/drm_managed.h>
>>   #include <drm/intel/i915_drm.h>
>> +#include <generated/xe_wa_oob.h>
>>   
>>   #include "regs/xe_gt_regs.h"
>>   #include "regs/xe_gtt_defs.h"
>> @@ -23,8 +24,10 @@
>>   #include "xe_gt_sriov_vf.h"
>>   #include "xe_gt_tlb_invalidation.h"
>>   #include "xe_map.h"
>> +#include "xe_mmio.h"
>>   #include "xe_pm.h"
>>   #include "xe_sriov.h"
>> +#include "xe_wa.h"
>>   #include "xe_wopcm.h"
>>   
>>   static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
>> @@ -69,12 +72,31 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
>>   	return ggms ? SZ_1M << ggms : 0;
>>   }
>>   
>> +static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
>> +{
>> +	/*
>> +	 * 22019338487: GMD_ID is a RO register, a dummy write forces gunit
>> +	 * to wait for completion of prior GTT writes before letting this through.
>> +	 */
>> +	lockdep_assert_held(&ggtt->lock);
>> +
>> +	++ggtt->access_count;
>> +
>> +	if (ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487)) {
>> +		if ((ggtt->access_count % 63) == 0) {
>> +			xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0);
>> +			ggtt->access_count = 0;
>> +		}
>> +	}
>> +}
>> +
>>   void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
>>   {
>>   	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
>>   	xe_tile_assert(ggtt->tile, addr < ggtt->size);
>>   
>>   	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
>> +	ggtt_update_access_counter(ggtt);
> this doesn't seem a perfect solution as now it augments the generic
> function used by all platforms with unnecessary logic (that triggers
> extra step just for specific WA)
>
> maybe it's time to extend struct xe_ggtt_pt_ops with:
>
> 	void (*pte_write)(struct xe_ggtt *, u64, u64);
>
> and setup that ops only where applicable:
>
> static const struct xe_ggtt_pt_ops ops_wa = {
> 	.pte_encode_bo = ...,
> 	.pte_write = set_and_track_pte,
> };
>
> 	if (XE_WA(gt, 22019338487))
> 		ggtt->ops = ops_wa;
> 	else
> 		ggtt->ops = ...;
> 	
ok.
>
>>   }
>>   
>>   static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
>> diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
>> index d8c584d9a8c3..73fe6837b9cd 100644
>> --- a/drivers/gpu/drm/xe/xe_ggtt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
>> @@ -34,6 +34,8 @@ struct xe_ggtt {
>>   	const struct xe_ggtt_pt_ops *pt_ops;
>>   
>>   	struct drm_mm mm;
>> +
>> +	unsigned int access_count;
> missing kernel-doc
ok. None of the fields seem to have it, for another day, I guess.
>
>>   };
>>   
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
>> index 80a61934decc..f8239a13fa2b 100644
>> --- a/drivers/gpu/drm/xe/xe_gsc.c
>> +++ b/drivers/gpu/drm/xe/xe_gsc.c
>> @@ -22,6 +22,7 @@
>>   #include "xe_gt.h"
>>   #include "xe_gt_mcr.h"
>>   #include "xe_gt_printk.h"
>> +#include "xe_guc_pc.h"
>>   #include "xe_huc.h"
>>   #include "xe_map.h"
>>   #include "xe_mmio.h"
>> @@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
>>   		return ret;
>>   
>>   	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
>> +
>> +	/* GSC load is done, restore expected GT frequencies */
>> +	xe_gt_sanitize_freq(gt);
>> +
>>   	xe_gt_dbg(gt, "GSC FW async load completed\n");
>>   
>>   	/* HuC auth failure is not fatal */
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index 57d84751e160..301594d1e7a4 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -9,6 +9,7 @@
>>   
>>   #include <drm/drm_managed.h>
>>   #include <drm/xe_drm.h>
>> +#include <generated/xe_wa_oob.h>
>>   
>>   #include "instructions/xe_gfxpipe_commands.h"
>>   #include "instructions/xe_mi_commands.h"
>> @@ -54,6 +55,7 @@
>>   #include "xe_sriov.h"
>>   #include "xe_tuning.h"
>>   #include "xe_uc.h"
>> +#include "xe_uc_fw.h"
>>   #include "xe_vm.h"
>>   #include "xe_wa.h"
>>   #include "xe_wopcm.h"
>> @@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt)
>>   	/* Get CCS mode in sync between sw/hw */
>>   	xe_gt_apply_ccs_mode(gt);
>>   
>> +	/* Restore GT freq to expected values */
>> +	xe_gt_sanitize_freq(gt);
>> +
>>   	return 0;
>>   }
>>   
>> @@ -801,6 +806,25 @@ int xe_gt_suspend(struct xe_gt *gt)
>>   	return err;
>>   }
>>   
>> +/**
>> + * xe_gt_sanitize_freq() - Restore saved freuencies if necessary.
> typo
ok.
>
>> + * @gt: the GT object
>> + *
>> + * Called after driver init/GSC load completes to restore GT frequencies if we
>> + * limited them for any WAs.
>> + */
>> +int xe_gt_sanitize_freq(struct xe_gt *gt)
>> +{
>> +	int ret = 0;
>> +
>> +	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
>> +	    xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) &&
>> +	    XE_WA(gt, 22019338487))
>> +		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
>> +
>> +	return ret;
>> +}
>> +
>>   int xe_gt_resume(struct xe_gt *gt)
>>   {
>>   	int err;
>> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
>> index 9073ac68a777..1123fdfc4ebc 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.h
>> +++ b/drivers/gpu/drm/xe/xe_gt.h
>> @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
>>   int xe_gt_resume(struct xe_gt *gt);
>>   void xe_gt_reset_async(struct xe_gt *gt);
>>   void xe_gt_sanitize(struct xe_gt *gt);
>> +int xe_gt_sanitize_freq(struct xe_gt *gt);
>>   void xe_gt_remove(struct xe_gt *gt);
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
>> index 666a37106bc5..d9194328b495 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>> @@ -8,6 +8,7 @@
>>   #include <linux/delay.h>
>>   
>>   #include <drm/drm_managed.h>
>> +#include <generated/xe_wa_oob.h>
>>   
>>   #include "abi/guc_actions_slpc_abi.h"
>>   #include "regs/xe_gt_regs.h"
>> @@ -24,6 +25,7 @@
>>   #include "xe_map.h"
>>   #include "xe_mmio.h"
>>   #include "xe_pcode.h"
>> +#include "xe_wa.h"
>>   
>>   #define MCHBAR_MIRROR_BASE_SNB	0x140000
>>   
>> @@ -41,6 +43,8 @@
>>   #define GT_FREQUENCY_MULTIPLIER	50
>>   #define GT_FREQUENCY_SCALER	3
>>   
>> +#define LNL_MERT_FREQ_CAP	800
>> +
>>   /**
>>    * DOC: GuC Power Conservation (PC)
>>    *
>> @@ -673,6 +677,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
>>   		tgl_init_fused_rp_values(pc);
>>   }
>>   
>> +static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
>> +{
>> +	struct xe_gt *gt = pc_to_gt(pc);
>> +
>> +	if (XE_WA(gt, 22019338487))
>> +		return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
>> +	else
>> +		return pc->rp0_freq;
>> +}
>> +
>>   /**
>>    * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
>>    * frequency to allow faster GuC load times
>> @@ -684,7 +698,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
>>   
>>   	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
>>   	pc_init_fused_rp_values(pc);
>> -	pc_set_cur_freq(pc, pc->rp0_freq);
>> +	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
>>   }
>>   
>>   static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
>> @@ -740,6 +754,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
>>   	return ret;
>>   }
>>   
>> +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
>> +{
>> +	int ret = 0;
>> +
>> +	if (XE_WA(pc_to_gt(pc), 22019338487)) {
>> +		/*
>> +		 * Get updated min/max and stash them.
>> +		 */
>> +		ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
>> +		if (!ret)
>> +			ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
>> +		if (ret)
>> +			return ret;
>> +
>> +		/*
>> +		 * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
>> +		 */
>> +		mutex_lock(&pc->freq_lock);
>> +		ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
>> +		if (!ret)
>> +			ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
>> +		mutex_unlock(&pc->freq_lock);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +/**
>> + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values
>> + * @pc: The GuC PC
>> + *
>> + * Returns: 0 on success,
>> + *          error code on failure
>> + */
>> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc)
>> +{
>> +	int ret = 0;
>> +
>> +	mutex_lock(&pc->freq_lock);
>> +	ret = pc_set_max_freq(pc, pc->stashed_max_freq);
>> +	if (!ret)
>> +		ret = pc_set_min_freq(pc, pc->stashed_min_freq);
>> +	mutex_unlock(&pc->freq_lock);
>> +
>> +	return ret;
>> +}
>> +
>>   /**
>>    * xe_guc_pc_gucrc_disable - Disable GuC RC
>>    * @pc: Xe_GuC_PC instance
>> @@ -854,6 +915,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
>>   	if (ret)
>>   		goto out;
>>   
>> +	ret = pc_set_mert_freq_cap(pc);
>> +	if (ret)
>> +		goto out;
>> +
>>   	if (xe->info.platform == XE_PVC) {
>>   		xe_guc_pc_gucrc_disable(pc);
>>   		ret = 0;
>> @@ -902,6 +967,10 @@ static void xe_guc_pc_fini_hw(void *arg)
>>   	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
>>   	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>>   	XE_WARN_ON(xe_guc_pc_stop(pc));
>> +
>> +	/* Bind requested freq to mert_freq_cap before unload */
>> +	pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
>> +
>>   	xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
>> index 532cac985a6d..7ba875c3613b 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
>> @@ -8,6 +8,7 @@
>>   
>>   #include <linux/types.h>
>>   
>> +struct xe_device;
> why this ?

removed.

Thanks,

Vinay.

>
>>   struct xe_guc_pc;
>>   
>>   int xe_guc_pc_init(struct xe_guc_pc *pc);
>> @@ -29,5 +30,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
>>   u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
>>   u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
>>   void xe_guc_pc_init_early(struct xe_guc_pc *pc);
>> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
>>   
>>   #endif /* _XE_GUC_PC_H_ */
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> index 2afd0dbc3542..13810be015db 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> @@ -25,6 +25,10 @@ struct xe_guc_pc {
>>   	u32 user_requested_min;
>>   	/** @user_requested_max: Stash the maximum requested freq by user */
>>   	u32 user_requested_max;
>> +	/** @stashed_min_freq: Stash the current minimum freq */
>> +	u32 stashed_min_freq;
>> +	/** @stashed_max_freq: Stash the current maximum freq */
>> +	u32 stashed_max_freq;
>>   	/** @freq_lock: Let's protect the frequencies */
>>   	struct mutex freq_lock;
>>   	/** @freq_ready: Only handle freq changes, if they are really ready */
>> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> index 12fe88796a49..a6b897030fde 100644
>> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> @@ -27,3 +27,4 @@
>>   16022287689	GRAPHICS_VERSION(2001)
>>   		GRAPHICS_VERSION(2004)
>>   13011645652	GRAPHICS_VERSION(2004)
>> +22019338487	MEDIA_VERSION(2000)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/2] drm/xe/lnl: Apply Wa_22019338487
  2024-06-18 23:47     ` Belgaumkar, Vinay
@ 2024-06-19 14:42       ` Rodrigo Vivi
  0 siblings, 0 replies; 12+ messages in thread
From: Rodrigo Vivi @ 2024-06-19 14:42 UTC (permalink / raw)
  To: Belgaumkar, Vinay; +Cc: Michal Wajdeczko, intel-xe

On Tue, Jun 18, 2024 at 04:47:03PM -0700, Belgaumkar, Vinay wrote:
> 
> On 6/17/2024 12:48 PM, Michal Wajdeczko wrote:
> > 
> > On 17.06.2024 10:10, Vinay Belgaumkar wrote:
> > > This WA requires us to limit media GT frequency requests to a certain
> > > cap value during driver load. Freq limits are restored after load
> > > completes, so perf will not be affected during normal operations.
> > > 
> > > During normal driver operation, this WA requires dummy writes to media
> > > offset 0x380D8C after every ~63 GGTT writes. This will ensure completion
> > > of the LMEM writes originating from Gunit.
> > > 
> > > During driver unload(before FLR), the WA requires that we set requested
> > > frequency to the cap value again.
> > > 
> > > v3: Do not use WA number in function name. Call WA wrapper from xe_device.
> > > Rename some variables, check for locks in the correct function (Rodrigo).
> > > Ensure reset path is also covered for this WA.
> > > 
> > > v4: Fix BAT failure
> > > 
> > > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > > Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> > > ---
> > >   drivers/gpu/drm/xe/Makefile          |  2 +
> > >   drivers/gpu/drm/xe/xe_device.c       |  3 ++
> > >   drivers/gpu/drm/xe/xe_ggtt.c         | 22 +++++++++
> > >   drivers/gpu/drm/xe/xe_ggtt_types.h   |  2 +
> > >   drivers/gpu/drm/xe/xe_gsc.c          |  5 ++
> > >   drivers/gpu/drm/xe/xe_gt.c           | 24 ++++++++++
> > >   drivers/gpu/drm/xe/xe_gt.h           |  1 +
> > >   drivers/gpu/drm/xe/xe_guc_pc.c       | 71 +++++++++++++++++++++++++++-
> > >   drivers/gpu/drm/xe/xe_guc_pc.h       |  2 +
> > >   drivers/gpu/drm/xe/xe_guc_pc_types.h |  4 ++
> > >   drivers/gpu/drm/xe/xe_wa_oob.rules   |  1 +
> > >   11 files changed, 136 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> > > index 1905a80e61e3..8c72fe38edfd 100644
> > > --- a/drivers/gpu/drm/xe/Makefile
> > > +++ b/drivers/gpu/drm/xe/Makefile
> > > @@ -24,9 +24,11 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
> > >   	$(call cmd,wa_oob)
> > >   uses_generated_oob := \
> > > +	$(obj)/xe_ggtt.o \
> > >   	$(obj)/xe_gsc.o \
> > >   	$(obj)/xe_guc.o \
> > >   	$(obj)/xe_guc_ads.o \
> > > +	$(obj)/xe_guc_pc.o \
> > >   	$(obj)/xe_migrate.o \
> > >   	$(obj)/xe_ring_ops.o \
> > >   	$(obj)/xe_vm.o \
> > > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> > > index 94dbfe5cf19c..fee90f66246f 100644
> > > --- a/drivers/gpu/drm/xe/xe_device.c
> > > +++ b/drivers/gpu/drm/xe/xe_device.c
> > > @@ -668,6 +668,9 @@ int xe_device_probe(struct xe_device *xe)
> > >   	xe_hwmon_register(xe);
> > > +	for_each_gt(gt, xe, id)
> > > +		xe_gt_sanitize_freq(gt);
> > > +
> > >   	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
> > >   err_fini_display:
> > > diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
> > > index 8ff91fd1b7c8..e3fddd59fd9f 100644
> > > --- a/drivers/gpu/drm/xe/xe_ggtt.c
> > > +++ b/drivers/gpu/drm/xe/xe_ggtt.c
> > > @@ -11,6 +11,7 @@
> > >   #include <drm/drm_drv.h>
> > >   #include <drm/drm_managed.h>
> > >   #include <drm/intel/i915_drm.h>
> > > +#include <generated/xe_wa_oob.h>
> > >   #include "regs/xe_gt_regs.h"
> > >   #include "regs/xe_gtt_defs.h"
> > > @@ -23,8 +24,10 @@
> > >   #include "xe_gt_sriov_vf.h"
> > >   #include "xe_gt_tlb_invalidation.h"
> > >   #include "xe_map.h"
> > > +#include "xe_mmio.h"
> > >   #include "xe_pm.h"
> > >   #include "xe_sriov.h"
> > > +#include "xe_wa.h"
> > >   #include "xe_wopcm.h"
> > >   static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
> > > @@ -69,12 +72,31 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
> > >   	return ggms ? SZ_1M << ggms : 0;
> > >   }
> > > +static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
> > > +{
> > > +	/*
> > > +	 * 22019338487: GMD_ID is a RO register, a dummy write forces gunit
> > > +	 * to wait for completion of prior GTT writes before letting this through.
> > > +	 */
> > > +	lockdep_assert_held(&ggtt->lock);
> > > +
> > > +	++ggtt->access_count;
> > > +
> > > +	if (ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487)) {
> > > +		if ((ggtt->access_count % 63) == 0) {
> > > +			xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0);
> > > +			ggtt->access_count = 0;
> > > +		}
> > > +	}
> > > +}
> > > +
> > >   void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
> > >   {
> > >   	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
> > >   	xe_tile_assert(ggtt->tile, addr < ggtt->size);
> > >   	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
> > > +	ggtt_update_access_counter(ggtt);
> > this doesn't seem a perfect solution as now it augments the generic
> > function used by all platforms with unnecessary logic (that triggers
> > extra step just for specific WA)
> > 
> > maybe it's time to extend struct xe_ggtt_pt_ops with:
> > 
> > 	void (*pte_write)(struct xe_ggtt *, u64, u64);
> > 
> > and setup that ops only where applicable:
> > 
> > static const struct xe_ggtt_pt_ops ops_wa = {
> > 	.pte_encode_bo = ...,
> > 	.pte_write = set_and_track_pte,
> > };
> > 
> > 	if (XE_WA(gt, 22019338487))
> > 		ggtt->ops = ops_wa;
> > 	else
> > 		ggtt->ops = ...;
> > 	
> ok.
> > 
> > >   }
> > >   static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
> > > diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
> > > index d8c584d9a8c3..73fe6837b9cd 100644
> > > --- a/drivers/gpu/drm/xe/xe_ggtt_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
> > > @@ -34,6 +34,8 @@ struct xe_ggtt {
> > >   	const struct xe_ggtt_pt_ops *pt_ops;
> > >   	struct drm_mm mm;
> > > +
> > > +	unsigned int access_count;
> > missing kernel-doc
> ok. None of the fields seem to have it, for another day, I guess.

I will take care of that whenever addressing the review comments I got in my
series that adds doc to this component:

https://lore.kernel.org/intel-xe/20240613215357.307061-1-rodrigo.vivi@intel.com/

Thanks,
Rodrigo.

> > 
> > >   };
> > >   #endif
> > > diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
> > > index 80a61934decc..f8239a13fa2b 100644
> > > --- a/drivers/gpu/drm/xe/xe_gsc.c
> > > +++ b/drivers/gpu/drm/xe/xe_gsc.c
> > > @@ -22,6 +22,7 @@
> > >   #include "xe_gt.h"
> > >   #include "xe_gt_mcr.h"
> > >   #include "xe_gt_printk.h"
> > > +#include "xe_guc_pc.h"
> > >   #include "xe_huc.h"
> > >   #include "xe_map.h"
> > >   #include "xe_mmio.h"
> > > @@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
> > >   		return ret;
> > >   	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
> > > +
> > > +	/* GSC load is done, restore expected GT frequencies */
> > > +	xe_gt_sanitize_freq(gt);
> > > +
> > >   	xe_gt_dbg(gt, "GSC FW async load completed\n");
> > >   	/* HuC auth failure is not fatal */
> > > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> > > index 57d84751e160..301594d1e7a4 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt.c
> > > @@ -9,6 +9,7 @@
> > >   #include <drm/drm_managed.h>
> > >   #include <drm/xe_drm.h>
> > > +#include <generated/xe_wa_oob.h>
> > >   #include "instructions/xe_gfxpipe_commands.h"
> > >   #include "instructions/xe_mi_commands.h"
> > > @@ -54,6 +55,7 @@
> > >   #include "xe_sriov.h"
> > >   #include "xe_tuning.h"
> > >   #include "xe_uc.h"
> > > +#include "xe_uc_fw.h"
> > >   #include "xe_vm.h"
> > >   #include "xe_wa.h"
> > >   #include "xe_wopcm.h"
> > > @@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt)
> > >   	/* Get CCS mode in sync between sw/hw */
> > >   	xe_gt_apply_ccs_mode(gt);
> > > +	/* Restore GT freq to expected values */
> > > +	xe_gt_sanitize_freq(gt);
> > > +
> > >   	return 0;
> > >   }
> > > @@ -801,6 +806,25 @@ int xe_gt_suspend(struct xe_gt *gt)
> > >   	return err;
> > >   }
> > > +/**
> > > + * xe_gt_sanitize_freq() - Restore saved freuencies if necessary.
> > typo
> ok.
> > 
> > > + * @gt: the GT object
> > > + *
> > > + * Called after driver init/GSC load completes to restore GT frequencies if we
> > > + * limited them for any WAs.
> > > + */
> > > +int xe_gt_sanitize_freq(struct xe_gt *gt)
> > > +{
> > > +	int ret = 0;
> > > +
> > > +	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
> > > +	    xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) &&
> > > +	    XE_WA(gt, 22019338487))
> > > +		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > >   int xe_gt_resume(struct xe_gt *gt)
> > >   {
> > >   	int err;
> > > diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> > > index 9073ac68a777..1123fdfc4ebc 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt.h
> > > +++ b/drivers/gpu/drm/xe/xe_gt.h
> > > @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
> > >   int xe_gt_resume(struct xe_gt *gt);
> > >   void xe_gt_reset_async(struct xe_gt *gt);
> > >   void xe_gt_sanitize(struct xe_gt *gt);
> > > +int xe_gt_sanitize_freq(struct xe_gt *gt);
> > >   void xe_gt_remove(struct xe_gt *gt);
> > >   /**
> > > diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
> > > index 666a37106bc5..d9194328b495 100644
> > > --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> > > +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> > > @@ -8,6 +8,7 @@
> > >   #include <linux/delay.h>
> > >   #include <drm/drm_managed.h>
> > > +#include <generated/xe_wa_oob.h>
> > >   #include "abi/guc_actions_slpc_abi.h"
> > >   #include "regs/xe_gt_regs.h"
> > > @@ -24,6 +25,7 @@
> > >   #include "xe_map.h"
> > >   #include "xe_mmio.h"
> > >   #include "xe_pcode.h"
> > > +#include "xe_wa.h"
> > >   #define MCHBAR_MIRROR_BASE_SNB	0x140000
> > > @@ -41,6 +43,8 @@
> > >   #define GT_FREQUENCY_MULTIPLIER	50
> > >   #define GT_FREQUENCY_SCALER	3
> > > +#define LNL_MERT_FREQ_CAP	800
> > > +
> > >   /**
> > >    * DOC: GuC Power Conservation (PC)
> > >    *
> > > @@ -673,6 +677,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
> > >   		tgl_init_fused_rp_values(pc);
> > >   }
> > > +static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
> > > +{
> > > +	struct xe_gt *gt = pc_to_gt(pc);
> > > +
> > > +	if (XE_WA(gt, 22019338487))
> > > +		return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
> > > +	else
> > > +		return pc->rp0_freq;
> > > +}
> > > +
> > >   /**
> > >    * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
> > >    * frequency to allow faster GuC load times
> > > @@ -684,7 +698,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
> > >   	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
> > >   	pc_init_fused_rp_values(pc);
> > > -	pc_set_cur_freq(pc, pc->rp0_freq);
> > > +	pc_set_cur_freq(pc, pc_max_freq_cap(pc));
> > >   }
> > >   static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
> > > @@ -740,6 +754,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
> > >   	return ret;
> > >   }
> > > +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
> > > +{
> > > +	int ret = 0;
> > > +
> > > +	if (XE_WA(pc_to_gt(pc), 22019338487)) {
> > > +		/*
> > > +		 * Get updated min/max and stash them.
> > > +		 */
> > > +		ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
> > > +		if (!ret)
> > > +			ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
> > > +		if (ret)
> > > +			return ret;
> > > +
> > > +		/*
> > > +		 * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
> > > +		 */
> > > +		mutex_lock(&pc->freq_lock);
> > > +		ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
> > > +		if (!ret)
> > > +			ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
> > > +		mutex_unlock(&pc->freq_lock);
> > > +	}
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +/**
> > > + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values
> > > + * @pc: The GuC PC
> > > + *
> > > + * Returns: 0 on success,
> > > + *          error code on failure
> > > + */
> > > +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc)
> > > +{
> > > +	int ret = 0;
> > > +
> > > +	mutex_lock(&pc->freq_lock);
> > > +	ret = pc_set_max_freq(pc, pc->stashed_max_freq);
> > > +	if (!ret)
> > > +		ret = pc_set_min_freq(pc, pc->stashed_min_freq);
> > > +	mutex_unlock(&pc->freq_lock);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > >   /**
> > >    * xe_guc_pc_gucrc_disable - Disable GuC RC
> > >    * @pc: Xe_GuC_PC instance
> > > @@ -854,6 +915,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
> > >   	if (ret)
> > >   		goto out;
> > > +	ret = pc_set_mert_freq_cap(pc);
> > > +	if (ret)
> > > +		goto out;
> > > +
> > >   	if (xe->info.platform == XE_PVC) {
> > >   		xe_guc_pc_gucrc_disable(pc);
> > >   		ret = 0;
> > > @@ -902,6 +967,10 @@ static void xe_guc_pc_fini_hw(void *arg)
> > >   	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
> > >   	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
> > >   	XE_WARN_ON(xe_guc_pc_stop(pc));
> > > +
> > > +	/* Bind requested freq to mert_freq_cap before unload */
> > > +	pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
> > > +
> > >   	xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
> > >   }
> > > diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
> > > index 532cac985a6d..7ba875c3613b 100644
> > > --- a/drivers/gpu/drm/xe/xe_guc_pc.h
> > > +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
> > > @@ -8,6 +8,7 @@
> > >   #include <linux/types.h>
> > > +struct xe_device;
> > why this ?
> 
> removed.
> 
> Thanks,
> 
> Vinay.
> 
> > 
> > >   struct xe_guc_pc;
> > >   int xe_guc_pc_init(struct xe_guc_pc *pc);
> > > @@ -29,5 +30,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
> > >   u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
> > >   u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
> > >   void xe_guc_pc_init_early(struct xe_guc_pc *pc);
> > > +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
> > >   #endif /* _XE_GUC_PC_H_ */
> > > diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
> > > index 2afd0dbc3542..13810be015db 100644
> > > --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
> > > @@ -25,6 +25,10 @@ struct xe_guc_pc {
> > >   	u32 user_requested_min;
> > >   	/** @user_requested_max: Stash the maximum requested freq by user */
> > >   	u32 user_requested_max;
> > > +	/** @stashed_min_freq: Stash the current minimum freq */
> > > +	u32 stashed_min_freq;
> > > +	/** @stashed_max_freq: Stash the current maximum freq */
> > > +	u32 stashed_max_freq;
> > >   	/** @freq_lock: Let's protect the frequencies */
> > >   	struct mutex freq_lock;
> > >   	/** @freq_ready: Only handle freq changes, if they are really ready */
> > > diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
> > > index 12fe88796a49..a6b897030fde 100644
> > > --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> > > +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> > > @@ -27,3 +27,4 @@
> > >   16022287689	GRAPHICS_VERSION(2001)
> > >   		GRAPHICS_VERSION(2004)
> > >   13011645652	GRAPHICS_VERSION(2004)
> > > +22019338487	MEDIA_VERSION(2000)

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2024-06-19 14:42 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-17  8:10 [PATCH v4 0/2] Apply Wa_22019338487 Vinay Belgaumkar
2024-06-17  8:10 ` [PATCH v4 1/2] drm/xe/lnl: " Vinay Belgaumkar
2024-06-17 18:18   ` Rodrigo Vivi
2024-06-18 23:45     ` Belgaumkar, Vinay
2024-06-17 19:48   ` Michal Wajdeczko
2024-06-18 23:47     ` Belgaumkar, Vinay
2024-06-19 14:42       ` Rodrigo Vivi
2024-06-17  8:10 ` [PATCH v4 2/2] drm/xe/guc: Request max GT freq during resume Vinay Belgaumkar
2024-06-17 18:20   ` Rodrigo Vivi
2024-06-17  8:18 ` ✓ CI.Patch_applied: success for Apply Wa_22019338487 (rev2) Patchwork
2024-06-17  8:18 ` ✗ CI.checkpatch: warning " Patchwork
2024-06-17  8:19 ` ✗ CI.KUnit: failure " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox