All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: <intel-xe@lists.freedesktop.org>
Subject: [Intel-xe] [RFC PATCH 6/7] drm/xe: Add last fence as dependency for jobs on user exec queues
Date: Wed,  6 Dec 2023 21:57:28 -0800	[thread overview]
Message-ID: <20231207055729.438642-7-matthew.brost@intel.com> (raw)
In-Reply-To: <20231207055729.438642-1-matthew.brost@intel.com>

The last fence must be added as a dependency for jobs on user exec
queues as it is possible for the last fence to be a composite software
fence (unordered, ioctl with zero bb or binds) rather than hardware
fence (ordered, previous job on queue).

Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c       |  4 ++++
 drivers/gpu/drm/xe/xe_exec_queue.c |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c    | 14 +++++++++++---
 drivers/gpu/drm/xe/xe_sched_job.c  | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_sched_job.h  |  4 ++++
 5 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 438e34585e1e..92b0da6580e8 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -313,6 +313,10 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto err_put_job;
 
 	if (!xe_vm_in_lr_mode(vm)) {
+		err = xe_sched_job_last_fence_add_dep(job, vm);
+		if (err)
+			goto err_put_job;
+
 		err = down_read_interruptible(&vm->userptr.notifier_lock);
 		if (err)
 			goto err_put_job;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 67e3fd9dfc5f..3911d14522ee 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -887,7 +887,7 @@ static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
 						    struct xe_vm *vm)
 {
 	if (q->flags & EXEC_QUEUE_FLAG_VM)
-		lockdep_assert_held_write(&vm->lock);
+		lockdep_assert_held(&vm->lock);
 	else
 		xe_vm_assert_held(vm);
 }
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index e8b567708ac0..ce14498b416a 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1163,17 +1163,24 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 	return fence;
 }
 
-static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
+static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
+			struct xe_sync_entry *syncs, u32 num_syncs)
 {
+	struct dma_fence *fence;
 	int i;
 
 	for (i = 0; i < num_syncs; i++) {
-		struct dma_fence *fence = syncs[i].fence;
+		fence = syncs[i].fence;
 
 		if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				       &fence->flags))
 			return false;
 	}
+	if (q) {
+		fence = xe_exec_queue_last_fence_get(q, vm);
+		if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			return false;
+	}
 
 	return true;
 }
@@ -1234,7 +1241,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
 
 	/* Use the CPU if no in syncs and engine is idle */
-	if (no_in_syncs(syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
+	if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
 		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
 							num_updates,
 							first_munmap_rebind,
@@ -1351,6 +1358,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 			goto err_job;
 	}
 
+	err = xe_sched_job_last_fence_add_dep(job, vm);
 	for (i = 0; !err && i < num_syncs; i++)
 		err = xe_sync_entry_add_deps(&syncs[i], job);
 
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index b467d5bfa4ac..b7d714522ae1 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -260,3 +260,20 @@ void xe_sched_job_push(struct xe_sched_job *job)
 	drm_sched_entity_push_job(&job->drm);
 	xe_sched_job_put(job);
 }
+
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+
+	fence = xe_exec_queue_last_fence_get(job->q, vm);
+
+	/* Only wait on unsignaled software fences */
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
+	    !(fence->context == job->drm.entity->fence_context ||
+	      fence->context == job->drm.entity->fence_context + 1)) {
+		dma_fence_get(fence);
+		return drm_sched_job_add_dependency(&job->drm, fence);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 6ca1d426c036..34f475ba7f50 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -8,6 +8,8 @@
 
 #include "xe_sched_job_types.h"
 
+struct xe_vm;
+
 #define XE_SCHED_HANG_LIMIT 1
 #define XE_SCHED_JOB_TIMEOUT LONG_MAX
 
@@ -54,6 +56,8 @@ bool xe_sched_job_completed(struct xe_sched_job *job);
 void xe_sched_job_arm(struct xe_sched_job *job);
 void xe_sched_job_push(struct xe_sched_job *job);
 
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
+
 static inline struct xe_sched_job *
 to_xe_sched_job(struct drm_sched_job *drm)
 {
-- 
2.34.1


  parent reply	other threads:[~2023-12-07  5:57 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-07  5:57 [Intel-xe] [RFC PATCH 0/7] Syncs vs async exec/bind uAPI change Matthew Brost
2023-12-07  5:57 ` [Intel-xe] [RFC PATCH 1/7] drm/xe: Use a flags field instead of bools for VMA create Matthew Brost
2023-12-07  5:57 ` [Intel-xe] [RFC PATCH 2/7] drm/xe: Use a flags field instead of bools for sync parse Matthew Brost
2023-12-07  5:57 ` [Intel-xe] [RFC PATCH 3/7] drm/xe: Allow num_binds == 0 in VM bind IOCTL Matthew Brost
2023-12-07  5:57 ` [Intel-xe] [RFC PATCH 4/7] drm/xe: Allow num_batch_buffer == 0 in exec IOCTL Matthew Brost
2023-12-07  5:57 ` [Intel-xe] [RFC PATCH 5/7] drm/xe: Take in-syncs into account when num_execs or num_binds == 0 Matthew Brost
2023-12-08 15:04   ` Thomas Hellström
2023-12-12 17:18     ` Matthew Brost
2023-12-07  5:57 ` Matthew Brost [this message]
2023-12-07  5:57 ` [Intel-xe] [RFC PATCH 7/7] drm/xe/uapi: Uniform async vs sync handling Matthew Brost
2023-12-07 19:51   ` Rodrigo Vivi
2023-12-08 15:00   ` Thomas Hellström
2023-12-08  9:45     ` Matthew Brost
2023-12-11 15:43       ` Thomas Hellström
2023-12-11 16:49         ` Matthew Brost
2023-12-11 18:11           ` Thomas Hellström
2023-12-11 21:11             ` Matthew Brost
2023-12-12  8:43               ` Thomas Hellström
2023-12-08 12:24     ` Matthew Brost
2023-12-11 15:34       ` Thomas Hellström
2023-12-11 16:50         ` Matthew Brost
2023-12-07  7:38 ` [Intel-xe] ✗ CI.Patch_applied: failure for Syncs vs async exec/bind uAPI change Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231207055729.438642-7-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.