[PATCH v3 1/5] drm/xe: Decouple job seqno and lrc seqno

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Matthew Brost" <matthew.brost@intel.com>,
	"Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Subject: [PATCH v3 1/5] drm/xe: Decouple job seqno and lrc seqno
Date: Fri, 24 May 2024 09:19:36 +0200	[thread overview]
Message-ID: <20240524071940.83042-2-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20240524071940.83042-1-thomas.hellstrom@linux.intel.com>

From: Matthew Brost <matthew.brost@intel.com>

Tightly coupling these seqno presents problems if alternative fences for
jobs are used. Decouple these for correctness.

v2:
- Slightly reword commit message (Thomas)
- Make sure the lrc fence ops are used in comparison (Thomas)
- Assume seqno is unsigned rather than signed in format string (Thomas)

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c      |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c      |  5 +++--
 drivers/gpu/drm/xe/xe_ring_ops.c        | 12 ++++++------
 drivers/gpu/drm/xe/xe_sched_job.c       | 16 ++++++++--------
 drivers/gpu/drm/xe/xe_sched_job.h       |  5 +++++
 drivers/gpu/drm/xe/xe_sched_job_types.h |  2 ++
 drivers/gpu/drm/xe/xe_trace.h           |  7 +++++--
 7 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 0fd61fb4d104..e8bf250f5b6a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -98,7 +98,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
 
 	if (xe_exec_queue_is_parallel(q)) {
 		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
-		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
+		q->parallel.composite_fence_seqno = 0;
 	}
 
 	return q;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 54778189cfd5..53ab98c5ef5e 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -940,8 +940,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 		return DRM_GPU_SCHED_STAT_NOMINAL;
 	}
 
-	drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
-		   xe_sched_job_seqno(job), q->guc->id, q->flags);
+	drm_notice(&xe->drm, "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
+		   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
+		   q->guc->id, q->flags);
 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
 		   "Kernel-submitted job timed out\n");
 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index a3ca718456f6..2705d1f9d572 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -398,7 +398,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
 
 	__emit_job_gen12_simple(job, job->q->lrc,
 				job->batch_addr[0],
-				xe_sched_job_seqno(job));
+				xe_sched_job_lrc_seqno(job));
 }
 
 static void emit_job_gen12_copy(struct xe_sched_job *job)
@@ -407,14 +407,14 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
 
 	if (xe_sched_job_is_migration(job->q)) {
 		emit_migration_job_gen12(job, job->q->lrc,
-					 xe_sched_job_seqno(job));
+					 xe_sched_job_lrc_seqno(job));
 		return;
 	}
 
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_simple(job, job->q->lrc + i,
-				        job->batch_addr[i],
-				        xe_sched_job_seqno(job));
+					job->batch_addr[i],
+					xe_sched_job_lrc_seqno(job));
 }
 
 static void emit_job_gen12_video(struct xe_sched_job *job)
@@ -425,7 +425,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_video(job, job->q->lrc + i,
 				       job->batch_addr[i],
-				       xe_sched_job_seqno(job));
+				       xe_sched_job_lrc_seqno(job));
 }
 
 static void emit_job_gen12_render_compute(struct xe_sched_job *job)
@@ -435,7 +435,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_render_compute(job, job->q->lrc + i,
 						job->batch_addr[i],
-						xe_sched_job_seqno(job));
+						xe_sched_job_lrc_seqno(job));
 }
 
 static const struct xe_ring_ops ring_ops_gen12_gsc = {
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index a4e030f5e019..874450be327e 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -117,6 +117,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 			err = PTR_ERR(job->fence);
 			goto err_sched_job;
 		}
+		job->lrc_seqno = job->fence->seqno;
 	} else {
 		struct dma_fence_array *cf;
 
@@ -132,6 +133,8 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 				err = PTR_ERR(fences[j]);
 				goto err_fences;
 			}
+			if (!j)
+				job->lrc_seqno = fences[0]->seqno;
 		}
 
 		cf = dma_fence_array_create(q->width, fences,
@@ -144,10 +147,6 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 			goto err_fences;
 		}
 
-		/* Sanity check */
-		for (j = 0; j < q->width; ++j)
-			xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
-
 		job->fence = &cf->base;
 	}
 
@@ -229,9 +228,9 @@ bool xe_sched_job_started(struct xe_sched_job *job)
 {
 	struct xe_lrc *lrc = job->q->lrc;
 
-	return !__dma_fence_is_later(xe_sched_job_seqno(job),
+	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
 				     xe_lrc_start_seqno(lrc),
-				     job->fence->ops);
+				     dma_fence_array_first(job->fence)->ops);
 }
 
 bool xe_sched_job_completed(struct xe_sched_job *job)
@@ -243,8 +242,9 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
 	 * parallel handshake is done.
 	 */
 
-	return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
-				     job->fence->ops);
+	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
+				     xe_lrc_seqno(lrc),
+				     dma_fence_array_first(job->fence)->ops);
 }
 
 void xe_sched_job_arm(struct xe_sched_job *job)
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index c75018f4660d..002c3b5c0a5c 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -73,6 +73,11 @@ static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
 	return job->fence->seqno;
 }
 
+static inline u32 xe_sched_job_lrc_seqno(struct xe_sched_job *job)
+{
+	return job->lrc_seqno;
+}
+
 static inline void
 xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
 {
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 5e12724219fd..990ddac55ed6 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -37,6 +37,8 @@ struct xe_sched_job {
 		/** @user_fence.value: write back value */
 		u64 value;
 	} user_fence;
+	/** @lrc_seqno: LRC seqno */
+	u32 lrc_seqno;
 	/** @migrate_flush_flags: Additional flush flags for migration jobs */
 	u32 migrate_flush_flags;
 	/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 2d56cfc09e42..6c6cecc58f63 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -254,6 +254,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 
 		    TP_STRUCT__entry(
 			     __field(u32, seqno)
+			     __field(u32, lrc_seqno)
 			     __field(u16, guc_id)
 			     __field(u32, guc_state)
 			     __field(u32, flags)
@@ -264,6 +265,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 
 		    TP_fast_assign(
 			   __entry->seqno = xe_sched_job_seqno(job);
+			   __entry->lrc_seqno = xe_sched_job_lrc_seqno(job);
 			   __entry->guc_id = job->q->guc->id;
 			   __entry->guc_state =
 			   atomic_read(&job->q->guc->state);
@@ -273,8 +275,9 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 			   __entry->batch_addr = (u64)job->batch_addr[0];
 			   ),
 
-		    TP_printk("fence=%p, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
-			      __entry->fence, __entry->seqno, __entry->guc_id,
+		    TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+			      __entry->fence, __entry->seqno,
+			      __entry->lrc_seqno, __entry->guc_id,
 			      __entry->batch_addr, __entry->guc_state,
 			      __entry->flags, __entry->error)
 );
-- 
2.44.0

next prev parent reply	other threads:[~2024-05-24  7:19 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-24  7:19 [PATCH v3 0/5] drm/xe: Allow migrate vm gpu submissions from reclaim context Thomas Hellström
2024-05-24  7:19 ` Thomas Hellström [this message]
2024-05-24 12:26   ` [PATCH v3 1/5] drm/xe: Decouple job seqno and lrc seqno Rodrigo Vivi
2024-05-24 12:56     ` Thomas Hellström
2024-05-24 19:44       ` Rodrigo Vivi
2024-05-24  7:19 ` [PATCH v3 2/5] drm/xe: Split lrc seqno fence creation up Thomas Hellström
2024-05-24 12:32   ` Rodrigo Vivi
2024-05-24  7:19 ` [PATCH v3 3/5] drm/xe: Don't initialize fences at xe_sched_job_create() Thomas Hellström
2024-05-24 13:12   ` Rodrigo Vivi
2024-05-24 13:22     ` Thomas Hellström
2024-05-24 19:45       ` Rodrigo Vivi
2024-05-24  7:19 ` [PATCH v3 4/5] drm/xe: Remove xe_lrc_create_seqno_fence() Thomas Hellström
2024-05-24 12:33   ` Rodrigo Vivi
2024-05-24  7:19 ` [PATCH v3 5/5] drm/xe: Move job creation out of the struct xe_migrate::job_mutex Thomas Hellström
2024-05-24 13:14   ` Rodrigo Vivi
2024-05-24  8:18 ` ✓ CI.Patch_applied: success for drm/xe: Allow migrate vm gpu submissions from reclaim context (rev3) Patchwork
2024-05-24  8:18 ` ✓ CI.checkpatch: " Patchwork
2024-05-24  8:19 ` ✓ CI.KUnit: " Patchwork
2024-05-24  8:31 ` ✓ CI.Build: " Patchwork
2024-05-24  8:33 ` ✓ CI.Hooks: " Patchwork
2024-05-24  8:35 ` ✓ CI.checksparse: " Patchwork
2024-05-24  9:05 ` ✓ CI.BAT: " Patchwork
2024-05-24 10:48 ` ✗ CI.FULL: failure " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:0fd61fb4d10 dfblob:e8bf250f5b6 dfblob:54778189cfd
dfblob:53ab98c5ef5 dfblob:a3ca718456f dfblob:2705d1f9d57
dfblob:a4e030f5e01 dfblob:874450be327 dfblob:c75018f4660
dfblob:002c3b5c0a5 dfblob:5e12724219f dfblob:990ddac55ed
dfblob:2d56cfc09e4 dfblob:6c6cecc58f6 )
 OR (
bs:"[PATCH v3 1/5] drm/xe: Decouple job seqno and lrc seqno" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240524071940.83042-2-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox