From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: kenneth.w.graunke@intel.com, lionel.g.landwerlin@intel.com,
jose.souza@intel.com, simona.vetter@ffwll.ch,
thomas.hellstrom@linux.intel.com, boris.brezillon@collabora.com,
airlied@gmail.com, christian.koenig@amd.com,
mihail.atanassov@arm.com, steven.price@arm.com,
shashank.sharma@amd.com
Subject: [RFC PATCH 28/29] drm/xe: Add VM convert fence IOCTL
Date: Mon, 18 Nov 2024 15:37:56 -0800 [thread overview]
Message-ID: <20241118233757.2374041-29-matthew.brost@intel.com> (raw)
In-Reply-To: <20241118233757.2374041-1-matthew.brost@intel.com>
Basically a version of the resume worker which also converts user syncs
to kerenl syncs (dma-fences) and vise versa. The expoxrted dma-fences in
the conversion guard against preemption which is required to avoid
breaking dma fence rules (no memory allocations in path of dma-fence,
resume requires memory allocations).
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_device.c | 1 +
drivers/gpu/drm/xe/xe_preempt_fence.c | 9 +
drivers/gpu/drm/xe/xe_vm.c | 247 +++++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_vm.h | 2 +
drivers/gpu/drm/xe/xe_vm_types.h | 4 +
5 files changed, 254 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 573b5f3df0c8..56dd26eddd92 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -191,6 +191,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_CONVERT_FENCE, xe_vm_convert_fence_ioctl, DRM_RENDER_ALLOW),
};
static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 80a8bc82f3cc..c225f3cc82a3 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -12,6 +12,14 @@ static struct xe_exec_queue *to_exec_queue(struct dma_fence_preempt *fence)
return container_of(fence, struct xe_preempt_fence, base)->q;
}
+static struct dma_fence *
+xe_preempt_fence_preempt_delay(struct dma_fence_preempt *fence)
+{
+ struct xe_exec_queue *q = to_exec_queue(fence);
+
+ return q->vm->preempt.exported_fence ?: dma_fence_get_stub();
+}
+
static int xe_preempt_fence_preempt(struct dma_fence_preempt *fence)
{
struct xe_exec_queue *q = to_exec_queue(fence);
@@ -35,6 +43,7 @@ static void xe_preempt_fence_preempt_finished(struct dma_fence_preempt *fence)
}
static const struct dma_fence_preempt_ops xe_preempt_fence_ops = {
+ .preempt_delay = xe_preempt_fence_preempt_delay,
.preempt = xe_preempt_fence_preempt,
.preempt_wait = xe_preempt_fence_preempt_wait,
.preempt_finished = xe_preempt_fence_preempt_finished,
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 16bc1b82d950..5078aeea2bd8 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -6,6 +6,7 @@
#include "xe_vm.h"
#include <linux/dma-fence-array.h>
+#include <linux/dma-fence-chain.h>
#include <linux/nospec.h>
#include <drm/drm_exec.h>
@@ -441,29 +442,44 @@ int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
}
static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
- bool *done)
+ int extra_fence_count, bool *done)
{
int err;
+ *done = false;
+
err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
if (err)
return err;
- if (xe_vm_is_idle(vm)) {
+ if (xe_vm_in_preempt_fence_mode(vm) && xe_vm_is_idle(vm)) {
vm->preempt.rebind_deactivated = true;
*done = true;
return 0;
}
+ err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
+ if (err)
+ return err;
+
if (!preempt_fences_waiting(vm)) {
*done = true;
+
+ if (extra_fence_count) {
+ struct drm_gem_object *obj;
+ unsigned long index;
+
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ err = dma_resv_reserve_fences(obj->resv,
+ extra_fence_count);
+ if (err)
+ return err;
+ }
+ }
+
return 0;
}
- err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
- if (err)
- return err;
-
err = wait_for_existing_preempt_fences(vm);
if (err)
return err;
@@ -474,7 +490,8 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
* The fence reservation here is intended for the new preempt fences
* we attach at the end of the rebind work.
*/
- return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
+ return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues +
+ extra_fence_count);
}
static void preempt_rebind_work_func(struct work_struct *w)
@@ -509,9 +526,9 @@ static void preempt_rebind_work_func(struct work_struct *w)
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
- bool done = false;
+ bool done;
- err = xe_preempt_work_begin(&exec, vm, &done);
+ err = xe_preempt_work_begin(&exec, vm, 0, &done);
drm_exec_retry_on_contention(&exec);
if (err || done) {
drm_exec_fini(&exec);
@@ -1638,6 +1655,7 @@ static void vm_destroy_work_func(struct work_struct *w)
container_of(w, struct xe_vm, destroy_work);
struct xe_device *xe = vm->xe;
struct xe_tile *tile;
+ struct dma_fence *fence;
u8 id;
/* xe_vm_close_and_put was not called? */
@@ -1660,6 +1678,9 @@ static void vm_destroy_work_func(struct work_struct *w)
if (vm->xef)
xe_file_put(vm->xef);
+ dma_fence_chain_for_each(fence, vm->preempt.exported_fence);
+ dma_fence_put(vm->preempt.exported_fence);
+
kfree(vm);
}
@@ -3403,3 +3424,211 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
}
kvfree(snap);
}
+
+static int check_semaphores(struct xe_vm *vm, struct xe_sync_entry *syncs,
+ struct drm_exec *exec, int num_syncs)
+{
+ int i, j;
+
+ for (i = 0; i < num_syncs; ++i) {
+ struct xe_bo *bo = syncs[i].bo;
+ struct drm_gem_object *obj = &bo->ttm.base;
+
+ if (bo->vm == vm)
+ continue;
+
+ for (j = 0; j < exec->num_objects; ++j) {
+ if (obj == exec->objects[j])
+ break;
+ }
+
+ if (j == exec->num_objects)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int xe_vm_convert_fence_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_vm_convert_fence __user *args = data;
+ struct drm_xe_sync __user *syncs_user;
+ struct drm_xe_semaphore __user *semaphores_user;
+ struct xe_sync_entry *syncs = NULL;
+ struct xe_vm *vm;
+ int err = 0, i, num_syncs = 0;
+ bool done = false;
+ struct drm_exec exec;
+ unsigned int fence_count = 0;
+ LIST_HEAD(preempt_fences);
+ ktime_t end = 0;
+ long wait;
+ int __maybe_unused tries = 0;
+ struct dma_fence *fence, *prev = NULL;
+
+ if (XE_IOCTL_DBG(xe, args->extensions || args->flags ||
+ args->reserved[0] || args->reserved[1] ||
+ args->pad))
+ return -EINVAL;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_DBG(xe, !vm))
+ return -EINVAL;
+
+ err = down_write_killable(&vm->lock);
+ if (err)
+ goto put_vm;
+
+ if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+ err = -ENOENT;
+ goto release_vm_lock;
+ }
+
+ syncs = kcalloc(args->num_syncs * 2, sizeof(*syncs), GFP_KERNEL);
+ if (!syncs) {
+ err = -ENOMEM;
+ goto release_vm_lock;
+ }
+
+ syncs_user = u64_to_user_ptr(args->syncs);
+ semaphores_user = u64_to_user_ptr(args->semaphores);
+ for (i = 0; i < args->num_syncs; i++, num_syncs++) {
+ struct xe_sync_entry *sync = &syncs[i];
+ struct xe_sync_entry *semaphore_sync =
+ &syncs[args->num_syncs + i];
+
+ err = xe_sync_entry_parse(xe, xef, sync, &syncs_user[i],
+ SYNC_PARSE_FLAG_DISALLOW_USER_FENCE);
+ if (err)
+ goto release_syncs;
+
+ err = xe_sync_semaphore_parse(xe, xef, semaphore_sync,
+ &semaphores_user[i],
+ sync->flags);
+ if (err) {
+ xe_sync_entry_cleanup(&syncs[i]);
+ goto release_syncs;
+ }
+ }
+
+retry:
+ if (xe_vm_userptr_check_repin(vm)) {
+ err = xe_vm_userptr_pin(vm);
+ if (err)
+ goto release_syncs;
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+
+ drm_exec_until_all_locked(&exec) {
+ err = xe_preempt_work_begin(&exec, vm, num_syncs, &done);
+ drm_exec_retry_on_contention(&exec);
+ if (err) {
+ drm_exec_fini(&exec);
+ if (err && xe_vm_validate_should_retry(&exec, err, &end))
+ err = -EAGAIN;
+
+ goto release_syncs;
+ }
+ }
+
+ if (XE_IOCTL_DBG(xe, check_semaphores(vm, syncs + num_syncs,
+ &exec, num_syncs))) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (!done) {
+ err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
+ if (err)
+ goto out_unlock;
+
+ wait = dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (wait <= 0) {
+ err = -ETIME;
+ goto out_unlock;
+ }
+ }
+
+#define retry_required(__tries, __vm) \
+ (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
+ (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
+ __xe_vm_userptr_needs_repin(__vm))
+
+ down_read(&vm->userptr.notifier_lock);
+ if (retry_required(tries, vm)) {
+ up_read(&vm->userptr.notifier_lock);
+ err = -EAGAIN;
+ goto out_unlock;
+ }
+
+#undef retry_required
+
+ /* Point of no return. */
+ xe_assert(vm->xe, list_empty(&vm->rebind_list));
+
+ for (i = 0; i < num_syncs; i++) {
+ struct xe_sync_entry *sync = &syncs[i];
+ struct xe_sync_entry *semaphore_sync = &syncs[num_syncs + i];
+
+ if (sync->flags & DRM_XE_SYNC_FLAG_SIGNAL) {
+ xe_sync_entry_signal(sync, semaphore_sync->fence);
+ xe_sync_entry_hw_fence_installed(semaphore_sync);
+
+ dma_fence_put(prev);
+ prev = dma_fence_get(vm->preempt.exported_fence);
+
+ dma_fence_chain_init(semaphore_sync->chain_fence,
+ prev, semaphore_sync->fence,
+ vm->preempt.seqno++);
+
+ vm->preempt.exported_fence =
+ &semaphore_sync->chain_fence->base;
+ semaphore_sync->chain_fence = NULL;
+
+ semaphore_sync->fence = NULL; /* Ref owned by chain */
+ } else {
+ xe_sync_entry_signal(semaphore_sync, sync->fence);
+ drm_gpuvm_resv_add_fence(&vm->gpuvm, &exec,
+ dma_fence_chain_contained(sync->fence),
+ DMA_RESV_USAGE_BOOKKEEP,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ }
+
+ dma_fence_chain_for_each(fence, prev);
+ dma_fence_put(prev);
+
+ if (!done) {
+ spin_lock(&vm->xe->ttm.lru_lock);
+ ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+ spin_unlock(&vm->xe->ttm.lru_lock);
+
+ arm_preempt_fences(vm, &preempt_fences);
+ resume_and_reinstall_preempt_fences(vm, &exec);
+ }
+ up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+ drm_exec_fini(&exec);
+release_syncs:
+ while (err != -EAGAIN && num_syncs--) {
+ xe_sync_entry_cleanup(&syncs[num_syncs]);
+ xe_sync_entry_cleanup(&syncs[args->num_syncs + num_syncs]);
+ }
+release_vm_lock:
+ if (err == -EAGAIN)
+ goto retry;
+ up_write(&vm->lock);
+put_vm:
+ xe_vm_put(vm);
+ free_preempt_fences(&preempt_fences);
+ kfree(syncs);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 4391dbaeba51..c1c70239cc91 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -181,6 +181,8 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+int xe_vm_convert_fence_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
void xe_vm_close_and_put(struct xe_vm *vm);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 7f9a303e51d8..c5cb83722706 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -254,6 +254,10 @@ struct xe_vm {
* BOs
*/
struct work_struct rebind_work;
+ /** @seqno: Seqno of exported dma-fences */
+ u64 seqno;
+ /** @exported_fence: Chain of exported dma-fences */
+ struct dma_fence *exported_fence;
} preempt;
/** @um: unified memory state */
--
2.34.1
next prev parent reply other threads:[~2024-11-18 23:37 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-18 23:37 [RFC PATCH 00/29] UMD direct submission in Xe Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 01/29] dma-fence: Add dma_fence_preempt base class Matthew Brost
2024-11-20 13:31 ` Christian König
2024-11-20 17:36 ` Matthew Brost
2024-11-21 10:04 ` Christian König
2024-11-21 18:41 ` Matthew Brost
2024-11-22 10:56 ` Christian König
2024-11-18 23:37 ` [RFC PATCH 02/29] dma-fence: Add dma_fence_user_fence Matthew Brost
2024-11-20 13:38 ` Christian König
2024-11-20 22:50 ` Matthew Brost
2024-11-21 9:31 ` Christian König
2024-11-22 2:35 ` Matthew Brost
2024-11-22 10:28 ` Christian König
2024-11-18 23:37 ` [RFC PATCH 03/29] drm/xe: Use dma_fence_preempt base class Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 04/29] drm/xe: Allocate doorbells for UMD exec queues Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 05/29] drm/xe: Add doorbell ID to snapshot capture Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 06/29] drm/xe: Break submission ring out into its own BO Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 07/29] drm/xe: Break indirect ring state " Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 08/29] drm/xe: Clear GGTT in xe_bo_restore_kernel Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 09/29] FIXME: drm/xe: Add pad to ring and indirect state Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 10/29] drm/xe: Enable indirect ring on media GT Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 11/29] drm/xe: Don't add pinned mappings to VM bulk move Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 12/29] drm/xe: Add exec queue post init extension processing Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 13/29] drm/xe/mmap: Add mmap support for PCI memory barrier Matthew Brost
2024-11-19 10:00 ` Christian König
2024-11-19 11:57 ` Joonas Lahtinen
2024-11-19 12:42 ` Mrozek, Michal
2024-12-18 12:59 ` Upadhyay, Tejas
2024-11-18 23:37 ` [RFC PATCH 14/29] drm/xe: Add support for mmapping doorbells to user space Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 15/29] drm/xe: Add support for mmapping submission ring and indirect ring state " Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 16/29] drm/xe/uapi: Define UMD exec queue mapping uAPI Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 17/29] drm/xe: Add usermap exec queue extension Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 18/29] drm/xe: Drop EXEC_QUEUE_FLAG_UMD_SUBMISSION flag Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 19/29] drm/xe: Do not allow usermap exec queues in exec IOCTL Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 20/29] drm/xe: Teach GuC backend to kill usermap queues Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 21/29] drm/xe: Enable preempt fences on " Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 22/29] drm/xe/uapi: Add uAPI to convert user semaphore to / from drm syncobj Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 23/29] drm/xe: Add user fence IRQ handler Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 24/29] drm/xe: Add xe_hw_fence_user_init Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 25/29] drm/xe: Add a message lock to the Xe GPU scheduler Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 26/29] drm/xe: Always wait on preempt fences in vma_check_userptr Matthew Brost
2024-11-18 23:37 ` [RFC PATCH 27/29] drm/xe: Teach xe_sync layer about drm_xe_semaphore Matthew Brost
2024-11-18 23:37 ` Matthew Brost [this message]
2024-11-18 23:37 ` [RFC PATCH 29/29] drm/xe: Add user fence TDR Matthew Brost
2024-11-18 23:55 ` ✓ CI.Patch_applied: success for UMD direct submission in Xe Patchwork
2024-11-18 23:56 ` ✗ CI.checkpatch: warning " Patchwork
2024-11-18 23:57 ` ✓ CI.KUnit: success " Patchwork
2024-11-19 0:15 ` ✓ CI.Build: " Patchwork
2024-11-19 0:17 ` ✗ CI.Hooks: failure " Patchwork
2024-11-19 0:19 ` ✓ CI.checksparse: success " Patchwork
2024-11-19 0:39 ` ✗ CI.BAT: failure " Patchwork
2024-11-19 11:44 ` ✗ CI.FULL: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241118233757.2374041-29-matthew.brost@intel.com \
--to=matthew.brost@intel.com \
--cc=airlied@gmail.com \
--cc=boris.brezillon@collabora.com \
--cc=christian.koenig@amd.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-xe@lists.freedesktop.org \
--cc=jose.souza@intel.com \
--cc=kenneth.w.graunke@intel.com \
--cc=lionel.g.landwerlin@intel.com \
--cc=mihail.atanassov@arm.com \
--cc=shashank.sharma@amd.com \
--cc=simona.vetter@ffwll.ch \
--cc=steven.price@arm.com \
--cc=thomas.hellstrom@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox