* [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340
@ 2024-07-02 15:06 Matthew Auld
2024-07-02 15:06 ` [PATCH v3 2/2] drm/i915: disable fbc due to Wa_16023588340 Matthew Auld
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Matthew Auld @ 2024-07-02 15:06 UTC (permalink / raw)
To: intel-xe
Cc: intel-gfx, Jonathan Cavitt, Matt Roper, Lucas De Marchi,
Vinod Govindapillai
This involves enabling l2 caching of host side memory access to VRAM
through the CPU BAR. The main fallout here is with display since VRAM
writes from CPU can now be cached in GPU l2, and display is never
coherent with caches, so needs various manual flushing. In the case of
fbc we disable it due to complications in getting this to work
correctly (in a later patch).
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Vinod Govindapillai <vinod.govindapillai@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
---
drivers/gpu/drm/xe/Makefile | 2 +
drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 8 ++++
drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 ++
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 ++++
drivers/gpu/drm/xe/xe_device.c | 30 ++++++++++++
drivers/gpu/drm/xe/xe_device.h | 1 +
drivers/gpu/drm/xe/xe_gt.c | 54 ++++++++++++++++++++++
drivers/gpu/drm/xe/xe_pat.c | 11 ++++-
drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
9 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b1e03bfe4a68..970c5c09e20a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
uses_generated_oob := \
$(obj)/xe_ggtt.o \
+ $(obj)/xe_device.o \
$(obj)/xe_gsc.o \
$(obj)/xe_gt.o \
$(obj)/xe_guc.o \
$(obj)/xe_guc_ads.o \
$(obj)/xe_guc_pc.o \
$(obj)/xe_migrate.o \
+ $(obj)/xe_pat.o \
$(obj)/xe_ring_ops.o \
$(obj)/xe_vm.o \
$(obj)/xe_wa.o \
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index 9e860c61f4b3..ccd0d87d438a 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -7,6 +7,8 @@
#include "intel_display_types.h"
#include "intel_dsb_buffer.h"
#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
#include "xe_gt.h"
u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
@@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
+ xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
+ xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 423f367c7065..d7db44e79eaf 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -10,6 +10,7 @@
#include "intel_fb.h"
#include "intel_fb_pin.h"
#include "xe_bo.h"
+#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_pm.h"
@@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
+ /* Ensure DPT writes are flushed */
+ xe_device_l2_flush(xe);
return vma;
err_unpin:
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d44564bad009..fd9d94174efb 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -80,6 +80,9 @@
#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
+#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
+#define CG_DIS_CNTLBUS REG_BIT(6)
+
#define CCS_AUX_INV XE_REG(0x4208)
#define VD0_AUX_INV XE_REG(0x4218)
@@ -372,6 +375,11 @@
#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
+#define XE2_GLOBAL_INVAL XE_REG(0xb404)
+
+#define SCRATCH1LPFC XE_REG(0xb474)
+#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
+
#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
#define XE2_TDF_CTRL XE_REG(0xb418)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index cfda7cb5df2c..b0f79ef6bce1 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -54,6 +54,9 @@
#include "xe_vm.h"
#include "xe_vram.h"
#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
@@ -779,6 +782,11 @@ void xe_device_td_flush(struct xe_device *xe)
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
@@ -802,6 +810,28 @@ void xe_device_td_flush(struct xe_device *xe)
}
}
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index bb07f5669dbb..0a2a3e7fd402 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
void xe_device_td_flush(struct xe_device *xe);
+void xe_device_l2_flush(struct xe_device *xe);
static inline bool xe_device_wedged(struct xe_device *xe)
{
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 29e8ea94d05e..006d3594ba55 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -11,6 +11,8 @@
#include <drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
+#include <generated/xe_wa_oob.h>
+
#include "instructions/xe_gfxpipe_commands.h"
#include "instructions/xe_mi_commands.h"
#include "regs/xe_gt_regs.h"
@@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
gt->uc.guc.submission_state.enabled = false;
}
+static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
+{
+ u32 reg;
+ int err;
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (WARN_ON(err))
+ return;
+
+ if (!xe_gt_is_media_type(gt)) {
+ xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
+ reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+ reg |= CG_DIS_CNTLBUS;
+ xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+ }
+
+ xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
+{
+ u32 reg;
+ int err;
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ if (xe_gt_is_media_type(gt))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (WARN_ON(err))
+ return;
+
+ reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+ reg &= ~CG_DIS_CNTLBUS;
+ xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
/**
* xe_gt_remove() - Clean up the GT structures before driver removal
* @gt: the GT object
@@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(>->fence_irq[i]);
+
+ xe_gt_disable_host_l2_vram(gt);
}
static void gt_reset_worker(struct work_struct *w);
@@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
+ xe_gt_enable_host_l2_vram(gt);
err = xe_uc_init(>->uc);
if (err)
@@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
xe_pat_init(gt);
+ xe_gt_enable_host_l2_vram(gt);
+
xe_gt_mcr_set_implicit_defaults(gt);
xe_reg_sr_apply_mmio(>->reg_sr, gt);
@@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_idle_disable_pg(gt);
+ xe_gt_disable_host_l2_vram(gt);
+
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_gt_dbg(gt, "suspended\n");
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 4ee32ee1cc88..722278cc23fc 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -7,6 +7,8 @@
#include <drm/xe_drm.h>
+#include <generated/xe_wa_oob.h>
+
#include "regs/xe_reg_defs.h"
#include "xe_assert.h"
#include "xe_device.h"
@@ -15,6 +17,7 @@
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
+#include "xe_wa.h"
#define _PAT_ATS 0x47fc
#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
@@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
if (GRAPHICS_VER(xe) == 20) {
xe->pat.ops = &xe2_pat_ops;
xe->pat.table = xe2_pat_table;
- xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+
+ /* Wa_16023588340. XXX: Should use XE_WA */
+ if (GRAPHICS_VERx100(xe) == 2001)
+ xe->pat.n_entries = 28; /* Disable CLOS3 */
+ else
+ xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+
xe->pat.idx[XE_CACHE_NONE] = 3;
xe->pat.idx[XE_CACHE_WT] = 15;
xe->pat.idx[XE_CACHE_WB] = 2;
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index a6b897030fde..c6d8941621c6 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -28,3 +28,4 @@
GRAPHICS_VERSION(2004)
13011645652 GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
+16023588340 GRAPHICS_VERSION(2001)
--
2.45.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 2/2] drm/i915: disable fbc due to Wa_16023588340
2024-07-02 15:06 [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340 Matthew Auld
@ 2024-07-02 15:06 ` Matthew Auld
2024-07-03 9:24 ` [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340 Thomas Hellström
2024-07-03 11:26 ` ✗ CI.Patch_applied: failure for series starting with [v3,1/2] " Patchwork
2 siblings, 0 replies; 5+ messages in thread
From: Matthew Auld @ 2024-07-02 15:06 UTC (permalink / raw)
To: intel-xe
Cc: intel-gfx, Jonathan Cavitt, Matt Roper, Lucas De Marchi,
Vinod Govindapillai, Jani Nikula, Rodrigo Vivi
On BMG-G21 we need to disable fbc due to complications around the WA.
v2:
- Try to handle with i915_drv.h and compat layer. (Rodrigo)
v3:
- For simplicity retreat back to the original design for now.
- Drop the extra \ from the Makefile (Jani)
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Vinod Govindapillai <vinod.govindapillai@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: intel-gfx@lists.freedesktop.org
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
drivers/gpu/drm/i915/display/intel_display_wa.h | 8 ++++++++
drivers/gpu/drm/i915/display/intel_fbc.c | 6 ++++++
drivers/gpu/drm/xe/Makefile | 4 +++-
drivers/gpu/drm/xe/display/xe_display_wa.c | 16 ++++++++++++++++
4 files changed, 33 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/xe/display/xe_display_wa.c
diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.h b/drivers/gpu/drm/i915/display/intel_display_wa.h
index 63201d09852c..be644ab6ae00 100644
--- a/drivers/gpu/drm/i915/display/intel_display_wa.h
+++ b/drivers/gpu/drm/i915/display/intel_display_wa.h
@@ -6,8 +6,16 @@
#ifndef __INTEL_DISPLAY_WA_H__
#define __INTEL_DISPLAY_WA_H__
+#include <linux/types.h>
+
struct drm_i915_private;
void intel_display_wa_apply(struct drm_i915_private *i915);
+#ifdef I915
+static inline bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915) { return false; }
+#else
+bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915);
+#endif
+
#endif
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 67116c9f1464..8488f82143a4 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -56,6 +56,7 @@
#include "intel_display_device.h"
#include "intel_display_trace.h"
#include "intel_display_types.h"
+#include "intel_display_wa.h"
#include "intel_fbc.h"
#include "intel_fbc_regs.h"
#include "intel_frontbuffer.h"
@@ -1237,6 +1238,11 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
return 0;
}
+ if (intel_display_needs_wa_16023588340(i915)) {
+ plane_state->no_fbc_reason = "Wa_16023588340";
+ return 0;
+ }
+
/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
if (i915_vtd_active(i915) && (IS_SKYLAKE(i915) || IS_BROXTON(i915))) {
plane_state->no_fbc_reason = "VT-d enabled";
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 970c5c09e20a..9e1bb2f6e03c 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -36,7 +36,8 @@ uses_generated_oob := \
$(obj)/xe_ring_ops.o \
$(obj)/xe_vm.o \
$(obj)/xe_wa.o \
- $(obj)/xe_ttm_stolen_mgr.o
+ $(obj)/xe_ttm_stolen_mgr.o \
+ $(obj)/display/xe_display_wa.o
$(uses_generated_oob): $(generated_oob)
@@ -194,6 +195,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
display/xe_display.o \
display/xe_display_misc.o \
display/xe_display_rps.o \
+ display/xe_display_wa.o \
display/xe_dsb_buffer.o \
display/xe_fb_pin.o \
display/xe_hdcp_gsc.o \
diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c
new file mode 100644
index 000000000000..68e3d1959ad6
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_wa.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "intel_display_wa.h"
+
+#include "xe_device.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
+
+bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915)
+{
+ return XE_WA(xe_root_mmio_gt(i915), 16023588340);
+}
--
2.45.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340
2024-07-02 15:06 [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340 Matthew Auld
2024-07-02 15:06 ` [PATCH v3 2/2] drm/i915: disable fbc due to Wa_16023588340 Matthew Auld
@ 2024-07-03 9:24 ` Thomas Hellström
2024-07-03 9:41 ` Matthew Auld
2024-07-03 11:26 ` ✗ CI.Patch_applied: failure for series starting with [v3,1/2] " Patchwork
2 siblings, 1 reply; 5+ messages in thread
From: Thomas Hellström @ 2024-07-03 9:24 UTC (permalink / raw)
To: Matthew Auld, intel-xe
Cc: intel-gfx, Jonathan Cavitt, Matt Roper, Lucas De Marchi,
Vinod Govindapillai
Hi, Matt
On Tue, 2024-07-02 at 16:06 +0100, Matthew Auld wrote:
> This involves enabling l2 caching of host side memory access to VRAM
> through the CPU BAR. The main fallout here is with display since VRAM
> writes from CPU can now be cached in GPU l2, and display is never
> coherent with caches, so needs various manual flushing. In the case
> of
> fbc we disable it due to complications in getting this to work
> correctly (in a later patch).
What about user-space accesses to framebuffers?
/Thomas
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Vinod Govindapillai <vinod.govindapillai@intel.com>
> Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
> ---
> drivers/gpu/drm/xe/Makefile | 2 +
> drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 8 ++++
> drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 ++
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 ++++
> drivers/gpu/drm/xe/xe_device.c | 30 ++++++++++++
> drivers/gpu/drm/xe/xe_device.h | 1 +
> drivers/gpu/drm/xe/xe_gt.c | 54
> ++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_pat.c | 11 ++++-
> drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
> 9 files changed, 117 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/Makefile
> b/drivers/gpu/drm/xe/Makefile
> index b1e03bfe4a68..970c5c09e20a 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c
> $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>
> uses_generated_oob := \
> $(obj)/xe_ggtt.o \
> + $(obj)/xe_device.o \
> $(obj)/xe_gsc.o \
> $(obj)/xe_gt.o \
> $(obj)/xe_guc.o \
> $(obj)/xe_guc_ads.o \
> $(obj)/xe_guc_pc.o \
> $(obj)/xe_migrate.o \
> + $(obj)/xe_pat.o \
> $(obj)/xe_ring_ops.o \
> $(obj)/xe_vm.o \
> $(obj)/xe_wa.o \
> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> index 9e860c61f4b3..ccd0d87d438a 100644
> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> @@ -7,6 +7,8 @@
> #include "intel_display_types.h"
> #include "intel_dsb_buffer.h"
> #include "xe_bo.h"
> +#include "xe_device.h"
> +#include "xe_device_types.h"
> #include "xe_gt.h"
>
> u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
> @@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct
> intel_dsb_buffer *dsb_buf)
>
> void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32
> idx, u32 val)
> {
> + struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
> +
> iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
> + xe_device_l2_flush(xe);
> }
>
> u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
> @@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer
> *dsb_buf, u32 idx)
>
> void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32
> idx, u32 val, size_t size)
> {
> + struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
> +
> WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf-
> >cmd_buf));
>
> iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val,
> size);
> + xe_device_l2_flush(xe);
> }
>
> bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct
> intel_dsb_buffer *dsb_buf, size_t size)
> diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> index 423f367c7065..d7db44e79eaf 100644
> --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> @@ -10,6 +10,7 @@
> #include "intel_fb.h"
> #include "intel_fb_pin.h"
> #include "xe_bo.h"
> +#include "xe_device.h"
> #include "xe_ggtt.h"
> #include "xe_gt.h"
> #include "xe_pm.h"
> @@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const
> struct intel_framebuffer *fb,
> if (ret)
> goto err_unpin;
>
> + /* Ensure DPT writes are flushed */
> + xe_device_l2_flush(xe);
> return vma;
>
> err_unpin:
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index d44564bad009..fd9d94174efb 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -80,6 +80,9 @@
> #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
> #define
> LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
>
> +#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
> +#define CG_DIS_CNTLBUS REG_BIT(6)
> +
> #define CCS_AUX_INV XE_REG(0x4208)
>
> #define VD0_AUX_INV XE_REG(0x4218)
> @@ -372,6 +375,11 @@
>
> #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 +
> (i) * 8)
>
> +#define XE2_GLOBAL_INVAL XE_REG(0xb404)
> +
> +#define SCRATCH1LPFC XE_REG(0xb474)
> +#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
> +
> #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
>
> #define XE2_TDF_CTRL XE_REG(0xb418)
> diff --git a/drivers/gpu/drm/xe/xe_device.c
> b/drivers/gpu/drm/xe/xe_device.c
> index cfda7cb5df2c..b0f79ef6bce1 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -54,6 +54,9 @@
> #include "xe_vm.h"
> #include "xe_vram.h"
> #include "xe_wait_user_fence.h"
> +#include "xe_wa.h"
> +
> +#include <generated/xe_wa_oob.h>
>
> static int xe_file_open(struct drm_device *dev, struct drm_file
> *file)
> {
> @@ -779,6 +782,11 @@ void xe_device_td_flush(struct xe_device *xe)
> if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
> return;
>
> + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
> + xe_device_l2_flush(xe);
> + return;
> + }
> +
> for_each_gt(gt, xe, id) {
> if (xe_gt_is_media_type(gt))
> continue;
> @@ -802,6 +810,28 @@ void xe_device_td_flush(struct xe_device *xe)
> }
> }
>
> +void xe_device_l2_flush(struct xe_device *xe)
> +{
> + struct xe_gt *gt;
> + int err;
> +
> + gt = xe_root_mmio_gt(xe);
> +
> + if (!XE_WA(gt, 16023588340))
> + return;
> +
> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> + if (err)
> + return;
> +
> + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
> +
> + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150,
> NULL, true))
> + xe_gt_err_once(gt, "Global invalidation timeout\n");
> +
> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
> u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
> {
> return xe_device_has_flat_ccs(xe) ?
> diff --git a/drivers/gpu/drm/xe/xe_device.h
> b/drivers/gpu/drm/xe/xe_device.h
> index bb07f5669dbb..0a2a3e7fd402 100644
> --- a/drivers/gpu/drm/xe/xe_device.h
> +++ b/drivers/gpu/drm/xe/xe_device.h
> @@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device
> *xe, u64 address);
> u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64
> address);
>
> void xe_device_td_flush(struct xe_device *xe);
> +void xe_device_l2_flush(struct xe_device *xe);
>
> static inline bool xe_device_wedged(struct xe_device *xe)
> {
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 29e8ea94d05e..006d3594ba55 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -11,6 +11,8 @@
> #include <drm/xe_drm.h>
> #include <generated/xe_wa_oob.h>
>
> +#include <generated/xe_wa_oob.h>
> +
> #include "instructions/xe_gfxpipe_commands.h"
> #include "instructions/xe_mi_commands.h"
> #include "regs/xe_gt_regs.h"
> @@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
> gt->uc.guc.submission_state.enabled = false;
> }
>
> +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
> +{
> + u32 reg;
> + int err;
> +
> + if (!XE_WA(gt, 16023588340))
> + return;
> +
> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> + if (WARN_ON(err))
> + return;
> +
> + if (!xe_gt_is_media_type(gt)) {
> + xe_mmio_write32(gt, SCRATCH1LPFC,
> EN_L3_RW_CCS_CACHE_FLUSH);
> + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
> + reg |= CG_DIS_CNTLBUS;
> + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
> + }
> +
> + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
> +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
> +{
> + u32 reg;
> + int err;
> +
> + if (!XE_WA(gt, 16023588340))
> + return;
> +
> + if (xe_gt_is_media_type(gt))
> + return;
> +
> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> + if (WARN_ON(err))
> + return;
> +
> + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
> + reg &= ~CG_DIS_CNTLBUS;
> + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
> +
> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
> /**
> * xe_gt_remove() - Clean up the GT structures before driver removal
> * @gt: the GT object
> @@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
>
> for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
> xe_hw_fence_irq_finish(>->fence_irq[i]);
> +
> + xe_gt_disable_host_l2_vram(gt);
> }
>
> static void gt_reset_worker(struct work_struct *w);
> @@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
>
> xe_gt_mcr_init_early(gt);
> xe_pat_init(gt);
> + xe_gt_enable_host_l2_vram(gt);
>
> err = xe_uc_init(>->uc);
> if (err)
> @@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
>
> xe_pat_init(gt);
>
> + xe_gt_enable_host_l2_vram(gt);
> +
> xe_gt_mcr_set_implicit_defaults(gt);
> xe_reg_sr_apply_mmio(>->reg_sr, gt);
>
> @@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
>
> xe_gt_idle_disable_pg(gt);
>
> + xe_gt_disable_host_l2_vram(gt);
> +
> XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt),
> XE_FORCEWAKE_ALL));
> xe_gt_dbg(gt, "suspended\n");
>
> diff --git a/drivers/gpu/drm/xe/xe_pat.c
> b/drivers/gpu/drm/xe/xe_pat.c
> index 4ee32ee1cc88..722278cc23fc 100644
> --- a/drivers/gpu/drm/xe/xe_pat.c
> +++ b/drivers/gpu/drm/xe/xe_pat.c
> @@ -7,6 +7,8 @@
>
> #include <drm/xe_drm.h>
>
> +#include <generated/xe_wa_oob.h>
> +
> #include "regs/xe_reg_defs.h"
> #include "xe_assert.h"
> #include "xe_device.h"
> @@ -15,6 +17,7 @@
> #include "xe_gt_mcr.h"
> #include "xe_mmio.h"
> #include "xe_sriov.h"
> +#include "xe_wa.h"
>
> #define _PAT_ATS 0x47fc
> #define
> _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
> @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
> if (GRAPHICS_VER(xe) == 20) {
> xe->pat.ops = &xe2_pat_ops;
> xe->pat.table = xe2_pat_table;
> - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
> +
> + /* Wa_16023588340. XXX: Should use XE_WA */
> + if (GRAPHICS_VERx100(xe) == 2001)
> + xe->pat.n_entries = 28; /* Disable CLOS3 */
> + else
> + xe->pat.n_entries =
> ARRAY_SIZE(xe2_pat_table);
> +
> xe->pat.idx[XE_CACHE_NONE] = 3;
> xe->pat.idx[XE_CACHE_WT] = 15;
> xe->pat.idx[XE_CACHE_WB] = 2;
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules
> b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index a6b897030fde..c6d8941621c6 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -28,3 +28,4 @@
> GRAPHICS_VERSION(2004)
> 13011645652 GRAPHICS_VERSION(2004)
> 22019338487 MEDIA_VERSION(2000)
> +16023588340 GRAPHICS_VERSION(2001)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340
2024-07-03 9:24 ` [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340 Thomas Hellström
@ 2024-07-03 9:41 ` Matthew Auld
0 siblings, 0 replies; 5+ messages in thread
From: Matthew Auld @ 2024-07-03 9:41 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: intel-gfx, Jonathan Cavitt, Matt Roper, Lucas De Marchi,
Vinod Govindapillai
Hi,
On 03/07/2024 10:24, Thomas Hellström wrote:
> Hi, Matt
>
> On Tue, 2024-07-02 at 16:06 +0100, Matthew Auld wrote:
>> This involves enabling l2 caching of host side memory access to VRAM
>> through the CPU BAR. The main fallout here is with display since VRAM
>> writes from CPU can now be cached in GPU l2, and display is never
>> coherent with caches, so needs various manual flushing. In the case
>> of
>> fbc we disable it due to complications in getting this to work
>> correctly (in a later patch).
>
> What about user-space accesses to framebuffers?
There should be a manual flush of entire l2 before flip etc. For
simplicity we piggy back off of xe_device_td_flush() which should
already be called in the right places from i915-display. With td_flush
we were already flushing l2, but only cache entries marked as transient
display.
>
> /Thomas
>
>
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Vinod Govindapillai <vinod.govindapillai@intel.com>
>> Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
>> ---
>> drivers/gpu/drm/xe/Makefile | 2 +
>> drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 8 ++++
>> drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 ++
>> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 ++++
>> drivers/gpu/drm/xe/xe_device.c | 30 ++++++++++++
>> drivers/gpu/drm/xe/xe_device.h | 1 +
>> drivers/gpu/drm/xe/xe_gt.c | 54
>> ++++++++++++++++++++++
>> drivers/gpu/drm/xe/xe_pat.c | 11 ++++-
>> drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
>> 9 files changed, 117 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile
>> b/drivers/gpu/drm/xe/Makefile
>> index b1e03bfe4a68..970c5c09e20a 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c
>> $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>>
>> uses_generated_oob := \
>> $(obj)/xe_ggtt.o \
>> + $(obj)/xe_device.o \
>> $(obj)/xe_gsc.o \
>> $(obj)/xe_gt.o \
>> $(obj)/xe_guc.o \
>> $(obj)/xe_guc_ads.o \
>> $(obj)/xe_guc_pc.o \
>> $(obj)/xe_migrate.o \
>> + $(obj)/xe_pat.o \
>> $(obj)/xe_ring_ops.o \
>> $(obj)/xe_vm.o \
>> $(obj)/xe_wa.o \
>> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> index 9e860c61f4b3..ccd0d87d438a 100644
>> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> @@ -7,6 +7,8 @@
>> #include "intel_display_types.h"
>> #include "intel_dsb_buffer.h"
>> #include "xe_bo.h"
>> +#include "xe_device.h"
>> +#include "xe_device_types.h"
>> #include "xe_gt.h"
>>
>> u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
>> @@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct
>> intel_dsb_buffer *dsb_buf)
>>
>> void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32
>> idx, u32 val)
>> {
>> + struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
>> +
>> iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
>> + xe_device_l2_flush(xe);
>> }
>>
>> u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
>> @@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer
>> *dsb_buf, u32 idx)
>>
>> void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32
>> idx, u32 val, size_t size)
>> {
>> + struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
>> +
>> WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf-
>>> cmd_buf));
>>
>> iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val,
>> size);
>> + xe_device_l2_flush(xe);
>> }
>>
>> bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct
>> intel_dsb_buffer *dsb_buf, size_t size)
>> diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c
>> b/drivers/gpu/drm/xe/display/xe_fb_pin.c
>> index 423f367c7065..d7db44e79eaf 100644
>> --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
>> +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
>> @@ -10,6 +10,7 @@
>> #include "intel_fb.h"
>> #include "intel_fb_pin.h"
>> #include "xe_bo.h"
>> +#include "xe_device.h"
>> #include "xe_ggtt.h"
>> #include "xe_gt.h"
>> #include "xe_pm.h"
>> @@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const
>> struct intel_framebuffer *fb,
>> if (ret)
>> goto err_unpin;
>>
>> + /* Ensure DPT writes are flushed */
>> + xe_device_l2_flush(xe);
>> return vma;
>>
>> err_unpin:
>> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> index d44564bad009..fd9d94174efb 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> @@ -80,6 +80,9 @@
>> #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
>> #define
>> LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
>>
>> +#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
>> +#define CG_DIS_CNTLBUS REG_BIT(6)
>> +
>> #define CCS_AUX_INV XE_REG(0x4208)
>>
>> #define VD0_AUX_INV XE_REG(0x4218)
>> @@ -372,6 +375,11 @@
>>
>> #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 +
>> (i) * 8)
>>
>> +#define XE2_GLOBAL_INVAL XE_REG(0xb404)
>> +
>> +#define SCRATCH1LPFC XE_REG(0xb474)
>> +#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
>> +
>> #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
>>
>> #define XE2_TDF_CTRL XE_REG(0xb418)
>> diff --git a/drivers/gpu/drm/xe/xe_device.c
>> b/drivers/gpu/drm/xe/xe_device.c
>> index cfda7cb5df2c..b0f79ef6bce1 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -54,6 +54,9 @@
>> #include "xe_vm.h"
>> #include "xe_vram.h"
>> #include "xe_wait_user_fence.h"
>> +#include "xe_wa.h"
>> +
>> +#include <generated/xe_wa_oob.h>
>>
>> static int xe_file_open(struct drm_device *dev, struct drm_file
>> *file)
>> {
>> @@ -779,6 +782,11 @@ void xe_device_td_flush(struct xe_device *xe)
>> if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
>> return;
>>
>> + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
>> + xe_device_l2_flush(xe);
>> + return;
>> + }
>> +
>> for_each_gt(gt, xe, id) {
>> if (xe_gt_is_media_type(gt))
>> continue;
>> @@ -802,6 +810,28 @@ void xe_device_td_flush(struct xe_device *xe)
>> }
>> }
>>
>> +void xe_device_l2_flush(struct xe_device *xe)
>> +{
>> + struct xe_gt *gt;
>> + int err;
>> +
>> + gt = xe_root_mmio_gt(xe);
>> +
>> + if (!XE_WA(gt, 16023588340))
>> + return;
>> +
>> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>> + if (err)
>> + return;
>> +
>> + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
>> +
>> + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150,
>> NULL, true))
>> + xe_gt_err_once(gt, "Global invalidation timeout\n");
>> +
>> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
>> +}
>> +
>> u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
>> {
>> return xe_device_has_flat_ccs(xe) ?
>> diff --git a/drivers/gpu/drm/xe/xe_device.h
>> b/drivers/gpu/drm/xe/xe_device.h
>> index bb07f5669dbb..0a2a3e7fd402 100644
>> --- a/drivers/gpu/drm/xe/xe_device.h
>> +++ b/drivers/gpu/drm/xe/xe_device.h
>> @@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device
>> *xe, u64 address);
>> u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64
>> address);
>>
>> void xe_device_td_flush(struct xe_device *xe);
>> +void xe_device_l2_flush(struct xe_device *xe);
>>
>> static inline bool xe_device_wedged(struct xe_device *xe)
>> {
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index 29e8ea94d05e..006d3594ba55 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -11,6 +11,8 @@
>> #include <drm/xe_drm.h>
>> #include <generated/xe_wa_oob.h>
>>
>> +#include <generated/xe_wa_oob.h>
>> +
>> #include "instructions/xe_gfxpipe_commands.h"
>> #include "instructions/xe_mi_commands.h"
>> #include "regs/xe_gt_regs.h"
>> @@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
>> gt->uc.guc.submission_state.enabled = false;
>> }
>>
>> +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
>> +{
>> + u32 reg;
>> + int err;
>> +
>> + if (!XE_WA(gt, 16023588340))
>> + return;
>> +
>> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>> + if (WARN_ON(err))
>> + return;
>> +
>> + if (!xe_gt_is_media_type(gt)) {
>> + xe_mmio_write32(gt, SCRATCH1LPFC,
>> EN_L3_RW_CCS_CACHE_FLUSH);
>> + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
>> + reg |= CG_DIS_CNTLBUS;
>> + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
>> + }
>> +
>> + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
>> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
>> +}
>> +
>> +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
>> +{
>> + u32 reg;
>> + int err;
>> +
>> + if (!XE_WA(gt, 16023588340))
>> + return;
>> +
>> + if (xe_gt_is_media_type(gt))
>> + return;
>> +
>> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>> + if (WARN_ON(err))
>> + return;
>> +
>> + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
>> + reg &= ~CG_DIS_CNTLBUS;
>> + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
>> +
>> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
>> +}
>> +
>> /**
>> * xe_gt_remove() - Clean up the GT structures before driver removal
>> * @gt: the GT object
>> @@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
>>
>> for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
>> xe_hw_fence_irq_finish(>->fence_irq[i]);
>> +
>> + xe_gt_disable_host_l2_vram(gt);
>> }
>>
>> static void gt_reset_worker(struct work_struct *w);
>> @@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
>>
>> xe_gt_mcr_init_early(gt);
>> xe_pat_init(gt);
>> + xe_gt_enable_host_l2_vram(gt);
>>
>> err = xe_uc_init(>->uc);
>> if (err)
>> @@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
>>
>> xe_pat_init(gt);
>>
>> + xe_gt_enable_host_l2_vram(gt);
>> +
>> xe_gt_mcr_set_implicit_defaults(gt);
>> xe_reg_sr_apply_mmio(>->reg_sr, gt);
>>
>> @@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
>>
>> xe_gt_idle_disable_pg(gt);
>>
>> + xe_gt_disable_host_l2_vram(gt);
>> +
>> XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt),
>> XE_FORCEWAKE_ALL));
>> xe_gt_dbg(gt, "suspended\n");
>>
>> diff --git a/drivers/gpu/drm/xe/xe_pat.c
>> b/drivers/gpu/drm/xe/xe_pat.c
>> index 4ee32ee1cc88..722278cc23fc 100644
>> --- a/drivers/gpu/drm/xe/xe_pat.c
>> +++ b/drivers/gpu/drm/xe/xe_pat.c
>> @@ -7,6 +7,8 @@
>>
>> #include <drm/xe_drm.h>
>>
>> +#include <generated/xe_wa_oob.h>
>> +
>> #include "regs/xe_reg_defs.h"
>> #include "xe_assert.h"
>> #include "xe_device.h"
>> @@ -15,6 +17,7 @@
>> #include "xe_gt_mcr.h"
>> #include "xe_mmio.h"
>> #include "xe_sriov.h"
>> +#include "xe_wa.h"
>>
>> #define _PAT_ATS 0x47fc
>> #define
>> _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
>> @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
>> if (GRAPHICS_VER(xe) == 20) {
>> xe->pat.ops = &xe2_pat_ops;
>> xe->pat.table = xe2_pat_table;
>> - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
>> +
>> + /* Wa_16023588340. XXX: Should use XE_WA */
>> + if (GRAPHICS_VERx100(xe) == 2001)
>> + xe->pat.n_entries = 28; /* Disable CLOS3 */
>> + else
>> + xe->pat.n_entries =
>> ARRAY_SIZE(xe2_pat_table);
>> +
>> xe->pat.idx[XE_CACHE_NONE] = 3;
>> xe->pat.idx[XE_CACHE_WT] = 15;
>> xe->pat.idx[XE_CACHE_WB] = 2;
>> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules
>> b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> index a6b897030fde..c6d8941621c6 100644
>> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> @@ -28,3 +28,4 @@
>> GRAPHICS_VERSION(2004)
>> 13011645652 GRAPHICS_VERSION(2004)
>> 22019338487 MEDIA_VERSION(2000)
>> +16023588340 GRAPHICS_VERSION(2001)
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* ✗ CI.Patch_applied: failure for series starting with [v3,1/2] drm/xe/bmg: implement Wa_16023588340
2024-07-02 15:06 [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340 Matthew Auld
2024-07-02 15:06 ` [PATCH v3 2/2] drm/i915: disable fbc due to Wa_16023588340 Matthew Auld
2024-07-03 9:24 ` [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340 Thomas Hellström
@ 2024-07-03 11:26 ` Patchwork
2 siblings, 0 replies; 5+ messages in thread
From: Patchwork @ 2024-07-03 11:26 UTC (permalink / raw)
To: Matthew Auld; +Cc: intel-xe
== Series Details ==
Series: series starting with [v3,1/2] drm/xe/bmg: implement Wa_16023588340
URL : https://patchwork.freedesktop.org/series/135656/
State : failure
== Summary ==
=== Applying kernel patches on branch 'drm-tip' with base: ===
Base commit: 5ca7296d32d5 drm-tip: 2024y-07m-03d-08h-11m-25s UTC integration manifest
=== git am output follows ===
error: patch failed: drivers/gpu/drm/xe/xe_wa_oob.rules:28
error: drivers/gpu/drm/xe/xe_wa_oob.rules: patch does not apply
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Applying: drm/xe/bmg: implement Wa_16023588340
Patch failed at 0001 drm/xe/bmg: implement Wa_16023588340
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-07-03 11:26 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-02 15:06 [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340 Matthew Auld
2024-07-02 15:06 ` [PATCH v3 2/2] drm/i915: disable fbc due to Wa_16023588340 Matthew Auld
2024-07-03 9:24 ` [PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340 Thomas Hellström
2024-07-03 9:41 ` Matthew Auld
2024-07-03 11:26 ` ✗ CI.Patch_applied: failure for series starting with [v3,1/2] " Patchwork
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox