Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] drm/xe/migrate: Atomicize CCS copy command setup
@ 2025-09-29 16:45 Satyanarayana K V P
  2025-09-29 16:51 ` ✗ CI.KUnit: failure for drm/xe/migrate: Atomicize CCS copy command setup (rev3) Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Satyanarayana K V P @ 2025-09-29 16:45 UTC (permalink / raw)
  To: intel-xe; +Cc: Satyanarayana K V P, Michal Wajdeczko, Matthew Brost,
	Matthew Auld

The CCS copy command is a 5-dword sequence. If the vCPU halts during
save/restore while this sequence is being programmed, partial writes may
trigger page faults when saving IGPU CCS metadata. Use the VMOVDQU
instruction to write the sequence atomically.

Since VMOVDQU operates on 256-bit chunks, update EMIT_COPY_CCS_DW to emit
8 dwords instead of 5 dwords.

Update emit_flush_invalidate() to use VMOVDQU operating with 128-bit
chunks.

Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>

---
V2 -> V3:
- Added support for 128 bit and 256 bit instructions with memcpy_vmovdqu
- Updated emit_flush_invalidate() to use vmovdqu instruction.

V1 -> V2:
- Use memcpy_vmovdqu only for x86 arch and for VF. Else use memcpy
  (Auld, Matthew)
  - Fix issues reported by patchworks.
---
 drivers/gpu/drm/xe/xe_migrate.c | 92 +++++++++++++++++++++++++++------
 1 file changed, 77 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 1d667fa36cf3..a37d4cb28aac 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -5,6 +5,7 @@
 
 #include "xe_migrate.h"
 
+#include <asm/fpu/api.h>
 #include <linux/bitfield.h>
 #include <linux/sizes.h>
 
@@ -644,18 +645,64 @@ static void emit_pte(struct xe_migrate *m,
 	}
 }
 
-#define EMIT_COPY_CCS_DW 5
+/*
+ * The CCS copy command is a 5-dword sequence. If the vCPU halts during
+ * save/restore while this sequence is being issued, partial writes may trigger
+ * page faults when saving iGPU CCS metadata. Use the VMOVDQU instruction to
+ * write the sequence atomically.
+ */
+static void memcpy_vmovdqu(void *dst, const void *src, u32 size)
+{
+	kernel_fpu_begin();
+
+#ifdef CONFIG_X86
+	if (size == SZ_128) {
+		asm("vmovdqu (%0), %%xmm0\n"
+		    "vmovups %%xmm0,   (%1)\n"
+		    :: "r" (src), "r" (dst) : "memory");
+	} else if (size == SZ_256) {
+		asm("vmovdqu (%0), %%ymm0\n"
+		    "vmovups %%ymm0,   (%1)\n"
+		    :: "r" (src), "r" (dst) : "memory");
+	}
+#endif
+	kernel_fpu_end();
+}
+
+static int xe_migrate_memcpy_atomic(struct xe_gt *gt, void *dst, const void
+				     *src, u32 size)
+{
+	int instr_size = size * SZ_8;
+
+	if (IS_SRIOV_VF(gt_to_xe(gt)) && static_cpu_has(X86_FEATURE_AVX)) {
+		if (instr_size != SZ_128 && instr_size != SZ_256) {
+			drm_dbg(&gt_to_xe(gt)->drm,
+				"Invalid size received for atomic copy %d", instr_size);
+			return -EINVAL;
+		}
+
+		memcpy_vmovdqu(dst, src, instr_size);
+	} else {
+		memcpy(dst, src, size);
+	}
+
+	return 0;
+}
+
+#define EMIT_COPY_CCS_DW 8
 static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 			  u64 dst_ofs, bool dst_is_indirect,
 			  u64 src_ofs, bool src_is_indirect,
 			  u32 size)
 {
+	u32 dw[EMIT_COPY_CCS_DW] = {MI_NOOP};
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 *cs = bb->cs + bb->len;
 	u32 num_ccs_blks;
 	u32 num_pages;
 	u32 ccs_copy_size;
 	u32 mocs;
+	u32 i = 0;
 
 	if (GRAPHICS_VERx100(xe) >= 2000) {
 		num_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
@@ -673,14 +720,17 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 		mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
 	}
 
-	*cs++ = XY_CTRL_SURF_COPY_BLT |
-		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
-		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
-		ccs_copy_size;
-	*cs++ = lower_32_bits(src_ofs);
-	*cs++ = upper_32_bits(src_ofs) | mocs;
-	*cs++ = lower_32_bits(dst_ofs);
-	*cs++ = upper_32_bits(dst_ofs) | mocs;
+	dw[i++] = XY_CTRL_SURF_COPY_BLT |
+		  (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
+		  (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
+		  ccs_copy_size;
+	dw[i++] = lower_32_bits(src_ofs);
+	dw[i++] = upper_32_bits(src_ofs) | mocs;
+	dw[i++] = lower_32_bits(dst_ofs);
+	dw[i++] = upper_32_bits(dst_ofs) | mocs;
+
+	if (!xe_migrate_memcpy_atomic(gt, cs, dw, sizeof(u32) * EMIT_COPY_CCS_DW))
+		cs += EMIT_COPY_CCS_DW;
 
 	bb->len = cs - bb->cs;
 }
@@ -980,16 +1030,28 @@ struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate)
 	return migrate->q->lrc[0];
 }
 
+/*
+ * The MI_FLUSH_DW command is a 4-dword sequence. If the vCPU halts during
+ * save/restore while this sequence is being issued, partial writes may
+ * trigger page faults when saving iGPU CCS metadata. Use
+ * xe_migrate_memcpy_atomic() to write the sequence atomically.
+ */
 static int emit_flush_invalidate(struct xe_exec_queue *q, u32 *dw, int i,
 				 u32 flags)
 {
 	struct xe_lrc *lrc = xe_exec_queue_lrc(q);
-	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
-		  MI_FLUSH_IMM_DW | flags;
-	dw[i++] = lower_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc)) |
-		  MI_FLUSH_DW_USE_GTT;
-	dw[i++] = upper_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc));
-	dw[i++] = MI_NOOP;
+	u32 tmp_dw[SZ_4] = {MI_NOOP}, j = 0;
+
+	tmp_dw[j++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
+		      MI_FLUSH_IMM_DW | flags;
+	tmp_dw[j++] = lower_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc)) |
+		      MI_FLUSH_DW_USE_GTT;
+	tmp_dw[j++] = upper_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc));
+	tmp_dw[j++] = MI_NOOP;
+
+	if (!xe_migrate_memcpy_atomic(q->gt, &dw[i], tmp_dw, sizeof(u32) * j))
+		i += j;
+
 	dw[i++] = MI_NOOP;
 
 	return i;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-10-10 19:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-29 16:45 [PATCH v3] drm/xe/migrate: Atomicize CCS copy command setup Satyanarayana K V P
2025-09-29 16:51 ` ✗ CI.KUnit: failure for drm/xe/migrate: Atomicize CCS copy command setup (rev3) Patchwork
2025-10-10 19:12   ` Rodrigo Vivi
2025-09-29 18:54 ` [PATCH v3] drm/xe/migrate: Atomicize CCS copy command setup Michal Wajdeczko
2025-10-01  2:31   ` Matthew Brost
2025-10-01  2:50 ` Matthew Brost

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox