From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: stuart.summers@intel.com, arvind.yadav@intel.com,
himal.prasad.ghimiray@intel.com,
thomas.hellstrom@linux.intel.com, francois.dugast@intel.com
Subject: [PATCH v3 16/25] drm/xe: Add CPU bind layer
Date: Fri, 27 Feb 2026 17:34:52 -0800 [thread overview]
Message-ID: <20260228013501.106680-17-matthew.brost@intel.com> (raw)
In-Reply-To: <20260228013501.106680-1-matthew.brost@intel.com>
With CPU binds, it no longer makes sense to implement CPU bind handling
in the migrate layer, as these operations are entirely decoupled from
hardware. Introduce a dedicated CPU bind layer stored at the device
level.
Since CPU binds are tile-independent, update the PT layer to generate a
single bind job even when pages are mirrored across tiles.
This patch is large because the refactor touches multiple file / layers
and ensures functional equivalence before and after the change.
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/Makefile | 1 +
drivers/gpu/drm/xe/xe_cpu_bind.c | 296 +++++++++++++
drivers/gpu/drm/xe/xe_cpu_bind.h | 118 +++++
drivers/gpu/drm/xe/xe_device.c | 5 +
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_exec_queue.c | 3 +-
drivers/gpu/drm/xe/xe_guc_submit.c | 41 +-
drivers/gpu/drm/xe/xe_migrate.c | 248 -----------
drivers/gpu/drm/xe/xe_migrate.h | 95 ----
drivers/gpu/drm/xe/xe_pt.c | 553 ++++++++++++------------
drivers/gpu/drm/xe/xe_pt.h | 8 +-
drivers/gpu/drm/xe/xe_pt_types.h | 14 -
drivers/gpu/drm/xe/xe_sched_job.c | 10 +-
drivers/gpu/drm/xe/xe_sched_job_types.h | 11 +-
drivers/gpu/drm/xe/xe_tlb_inval_job.c | 13 +-
drivers/gpu/drm/xe/xe_tlb_inval_job.h | 2 -
drivers/gpu/drm/xe/xe_vm.c | 156 ++-----
drivers/gpu/drm/xe/xe_vm_types.h | 20 +-
18 files changed, 818 insertions(+), 780 deletions(-)
create mode 100644 drivers/gpu/drm/xe/xe_cpu_bind.c
create mode 100644 drivers/gpu/drm/xe/xe_cpu_bind.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index ff778fb2d4ff..f923e54c1082 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -35,6 +35,7 @@ $(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe
xe-y += xe_bb.o \
xe_bo.o \
xe_bo_evict.o \
+ xe_cpu_bind.o \
xe_dep_scheduler.o \
xe_devcoredump.o \
xe_device.o \
diff --git a/drivers/gpu/drm/xe/xe_cpu_bind.c b/drivers/gpu/drm/xe/xe_cpu_bind.c
new file mode 100644
index 000000000000..4a9c72250ca9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_cpu_bind.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+#include <linux/mutex.h>
+
+#include "xe_cpu_bind.h"
+#include "xe_device_types.h"
+#include "xe_exec_queue.h"
+#include "xe_pt.h"
+#include "xe_sched_job.h"
+#include "xe_trace_bo.h"
+#include "xe_vm.h"
+
+/**
+ * struct xe_cpu_bind - cpu_bind context.
+ */
+struct xe_cpu_bind {
+ /** @xe: Xe device */
+ struct xe_device *xe;
+ /** @q: Default exec queue used for kernel binds */
+ struct xe_exec_queue *q;
+ /** @job_mutex: Timeline mutex for @q. */
+ struct mutex job_mutex;
+};
+
+static bool is_cpu_bind_queue(struct xe_cpu_bind *cpu_bind,
+ struct xe_exec_queue *q)
+{
+ return cpu_bind->q == q;
+}
+
+static void xe_cpu_bind_fini(void *arg)
+{
+ struct xe_cpu_bind *cpu_bind = arg;
+
+ mutex_destroy(&cpu_bind->job_mutex);
+ xe_exec_queue_put(cpu_bind->q);
+}
+
+/**
+ * xe_cpu_bind_init() - Initialize a cpu_bind context
+ * @xe: &xe_device
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int xe_cpu_bind_init(struct xe_device *xe)
+{
+ struct xe_cpu_bind *cpu_bind =
+ drmm_kzalloc(&xe->drm, sizeof(*cpu_bind), GFP_KERNEL);
+ struct xe_exec_queue *q;
+
+ q = xe_exec_queue_create_bind(xe, xe_device_get_root_tile(xe), NULL,
+ EXEC_QUEUE_FLAG_KERNEL |
+ EXEC_QUEUE_FLAG_PERMANENT |
+ EXEC_QUEUE_FLAG_MIGRATE, 0);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
+
+ cpu_bind->xe = xe;
+ cpu_bind->q = q;
+ xe->cpu_bind = cpu_bind;
+
+ mutex_init(&cpu_bind->job_mutex);
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&cpu_bind->job_mutex);
+ fs_reclaim_release(GFP_KERNEL);
+
+ return devm_add_action_or_reset(cpu_bind->xe->drm.dev, xe_cpu_bind_fini,
+ cpu_bind);
+}
+
+/**
+ * xe_cpu_bind_queue() - Get the bind queue from cpu_bind context.
+ * @cpu_bind: The cpu bind context.
+ *
+ * Return: Pointer to bind queue on success, error on failure
+ */
+struct xe_exec_queue *xe_cpu_bind_queue(struct xe_cpu_bind *cpu_bind)
+{
+ return cpu_bind->q;
+}
+
+/**
+ * xe_cpu_bind_update_pgtables_cpu_execute() - Update a VM's PTEs via the CPU
+ * @vm: The VM being updated
+ * @tile: The tile being updated
+ * @ops: The migrate PT update ops
+ * @pt_ops: The VM PT update ops
+ * @num_ops: The number of The VM PT update ops
+ *
+ * Execute the VM PT update ops array which results in a VM's PTEs being updated
+ * via the CPU.
+ */
+void
+xe_cpu_bind_update_pgtables_execute(struct xe_vm *vm, struct xe_tile *tile,
+ const struct xe_cpu_bind_pt_update_ops *ops,
+ struct xe_vm_pgtable_update_op *pt_op,
+ int num_ops)
+{
+ u32 j, i;
+
+ for (j = 0; j < num_ops; ++j, ++pt_op) {
+ for (i = 0; i < pt_op->num_entries; i++) {
+ const struct xe_vm_pgtable_update *update =
+ &pt_op->entries[i];
+
+ xe_assert(vm->xe, update);
+ xe_assert(vm->xe, update->pt_bo);
+ xe_assert(vm->xe, !iosys_map_is_null(&update->pt_bo->vmap));
+
+ if (pt_op->bind)
+ ops->populate(tile, &update->pt_bo->vmap,
+ update->ofs, update->qwords,
+ update);
+ else
+ ops->clear(vm, tile, &update->pt_bo->vmap,
+ update->ofs, update->qwords,
+ update);
+ }
+ }
+
+ trace_xe_vm_cpu_bind(vm);
+ xe_device_wmb(vm->xe);
+}
+
+static struct dma_fence *
+xe_cpu_bind_update_pgtables_no_job(struct xe_cpu_bind *cpu_bind,
+ struct xe_cpu_bind_pt_update *pt_update)
+{
+ const struct xe_cpu_bind_pt_update_ops *ops = pt_update->ops;
+ struct xe_vm *vm = pt_update->vops->vm;
+ struct xe_tile *tile;
+ int err, id;
+
+ if (ops->pre_commit) {
+ pt_update->job = NULL;
+ err = ops->pre_commit(pt_update);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ for_each_tile(tile, vm->xe, id) {
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &pt_update->vops->pt_update_ops[tile->id];
+
+ if (!pt_update_ops->pt_job_ops)
+ continue;
+
+ xe_cpu_bind_update_pgtables_execute(vm, tile, ops,
+ pt_update_ops->pt_job_ops->ops,
+ pt_update_ops->pt_job_ops->current_op);
+ }
+
+ return dma_fence_get_stub();
+}
+
+static struct dma_fence *
+xe_cpu_bind_update_pgtables_job(struct xe_cpu_bind *cpu_bind,
+ struct xe_cpu_bind_pt_update *pt_update)
+{
+ const struct xe_cpu_bind_pt_update_ops *ops = pt_update->ops;
+ struct xe_exec_queue *q = pt_update->vops->q;
+ struct xe_device *xe = cpu_bind->xe;
+ struct xe_sched_job *job;
+ struct dma_fence *fence;
+ struct xe_tile *tile;
+ int err, id;
+ bool is_cpu_bind = is_cpu_bind_queue(cpu_bind, q);
+
+ job = xe_sched_job_create(q, NULL);
+ if (IS_ERR(job))
+ return ERR_CAST(job);
+
+ xe_assert(xe, job->is_pt_job);
+
+ if (ops->pre_commit) {
+ pt_update->job = job;
+ err = ops->pre_commit(pt_update);
+ if (err)
+ goto err_job;
+ }
+
+ if (is_cpu_bind)
+ mutex_lock(&cpu_bind->job_mutex);
+
+ job->pt_update[0].vm = pt_update->vops->vm;
+ job->pt_update[0].ops = ops;
+ for_each_tile(tile, xe, id) {
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &pt_update->vops->pt_update_ops[tile->id];
+
+ job->pt_update[0].pt_job_ops[tile->id] =
+ xe_pt_job_ops_get(pt_update_ops->pt_job_ops);
+ }
+
+ xe_sched_job_arm(job);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ if (is_cpu_bind)
+ mutex_unlock(&cpu_bind->job_mutex);
+
+ return fence;
+
+err_job:
+ xe_sched_job_put(job);
+ return ERR_PTR(err);
+}
+
+/**
+ * xe_cpu_bind_update_pgtables() - Pipelined page-table update
+ * @cpu_bind: The cpu bind context.
+ * @pt_update: PT update arguments
+ *
+ * Perform a pipelined page-table update. The update descriptors are typically
+ * built under the same lock critical section as a call to this function. If
+ * using the default engine for the updates, they will be performed in the
+ * order they grab the job_mutex. If different engines are used, external
+ * synchronization is needed for overlapping updates to maintain page-table
+ * consistency. Note that the meaning of "overlapping" is that the updates
+ * touch the same page-table, which might be a higher-level page-directory.
+ * If no pipelining is needed, then updates may be performed by the cpu.
+ *
+ * Return: A dma_fence that, when signaled, indicates the update completion.
+ */
+struct dma_fence *
+xe_cpu_bind_update_pgtables(struct xe_cpu_bind *cpu_bind,
+ struct xe_cpu_bind_pt_update *pt_update)
+{
+ struct dma_fence *fence;
+
+ fence = xe_cpu_bind_update_pgtables_no_job(cpu_bind, pt_update);
+
+ /* -ETIME indicates a job is needed, anything else is legit error */
+ if (!IS_ERR(fence) || PTR_ERR(fence) != -ETIME)
+ return fence;
+
+ return xe_cpu_bind_update_pgtables_job(cpu_bind, pt_update);
+}
+
+/**
+ * xe_cpu_bind_job_lock() - Lock cpu_bind job lock
+ * @cpu_bind: The cpu bind context.
+ * @q: Queue associated with the operation which requires a lock
+ *
+ * Lock the cpu_bind job lock if the queue is a cpu bind queue, otherwise
+ * assert the VM's dma-resv is held (user queue's have own locking).
+ */
+void xe_cpu_bind_job_lock(struct xe_cpu_bind *cpu_bind,
+ struct xe_exec_queue *q)
+{
+ bool is_cpu_bind = is_cpu_bind_queue(cpu_bind, q);
+
+ if (is_cpu_bind)
+ mutex_lock(&cpu_bind->job_mutex);
+ else
+ xe_vm_assert_held(q->user_vm); /* User queues VM's should be locked */
+}
+
+/**
+ * xe_cpu_bind_job_unlock() - Unlock cpu_bind job lock
+ * @cpu_bind: The cpu bind context.
+ * @q: Queue associated with the operation which requires a lock
+ *
+ * Unlock the cpu_bind job lock if the queue is a cpu bind queue, otherwise
+ * assert the VM's dma-resv is held (user queue's have own locking).
+ */
+void xe_cpu_bind_job_unlock(struct xe_cpu_bind *cpu_bind,
+ struct xe_exec_queue *q)
+{
+ bool is_cpu_bind = is_cpu_bind_queue(cpu_bind, q);
+
+ if (is_cpu_bind)
+ mutex_unlock(&cpu_bind->job_mutex);
+ else
+ xe_vm_assert_held(q->user_vm); /* User queues VM's should be locked */
+}
+
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+/**
+ * xe_cpu_bind_job_lock_assert() - Assert cpu_bind job lock held of queue
+ * @q: cpu bind queue
+ */
+void xe_cpu_bind_job_lock_assert(struct xe_exec_queue *q)
+{
+ struct xe_device *xe = gt_to_xe(q->gt);
+ struct xe_cpu_bind *cpu_bind = xe->cpu_bind;
+
+ xe_assert(xe, q == cpu_bind->q);
+ lockdep_assert_held(&cpu_bind->job_mutex);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_cpu_bind.h b/drivers/gpu/drm/xe/xe_cpu_bind.h
new file mode 100644
index 000000000000..95996a6a5c20
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_cpu_bind.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_CPU_BIND_H_
+#define _XE_CPU_BIND_H_
+
+#include <linux/types.h>
+
+struct dma_fence;
+struct iosys_map;
+struct xe_cpu_bind;
+struct xe_cpu_bind_pt_update;
+struct xe_device;
+struct xe_tlb_inval_job;
+struct xe_tile;
+struct xe_vm;
+struct xe_vm_pgtable_update;
+struct xe_vm_pgtable_update_op;
+struct xe_vma_ops;
+
+/**
+ * struct xe_cpu_bind_pt_update_ops - Callbacks for the
+ * xe_cpu_bind_update_pgtables() function.
+ */
+struct xe_cpu_bind_pt_update_ops {
+ /**
+ * @populate: Populate a command buffer or page-table with ptes.
+ * @tile: The tile for the current operation.
+ * @map: struct iosys_map into the memory to be populated.
+ * @ofs: qword offset into @map, unused if @map is NULL.
+ * @num_qwords: Number of qwords to write.
+ * @update: Information about the PTEs to be inserted.
+ *
+ * This interface is intended to be used as a callback into the
+ * page-table system to populate command buffers or shared
+ * page-tables with PTEs.
+ */
+ void (*populate)(struct xe_tile *tile, struct iosys_map *map,
+ u32 ofs, u32 num_qwords,
+ const struct xe_vm_pgtable_update *update);
+ /**
+ * @clear: Clear a command buffer or page-table with ptes.
+ * @vm: VM being updated
+ * @tile: The tile for the current operation.
+ * @map: struct iosys_map into the memory to be populated.
+ * @ofs: qword offset into @map, unused if @map is NULL.
+ * @num_qwords: Number of qwords to write.
+ * @update: Information about the PTEs to be inserted.
+ *
+ * This interface is intended to be used as a callback into the
+ * page-table system to populate command buffers or shared
+ * page-tables with PTEs.
+ */
+ void (*clear)(struct xe_vm *vm, struct xe_tile *tile,
+ struct iosys_map *map, u32 ofs, u32 num_qwords,
+ const struct xe_vm_pgtable_update *update);
+
+ /**
+ * @pre_commit: Callback to be called just before arming the
+ * sched_job.
+ * @pt_update: Pointer to embeddable callback argument.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+ int (*pre_commit)(struct xe_cpu_bind_pt_update *pt_update);
+};
+
+/**
+ * struct xe_cpu_bind_pt_update - Argument to the struct
+ * xe_cpu_bind_pt_update_ops callbacks.
+ *
+ * Intended to be subclassed to support additional arguments if necessary.
+ */
+struct xe_cpu_bind_pt_update {
+ /** @ops: Pointer to the struct xe_cpu_bind_pt_update_ops callbacks */
+ const struct xe_cpu_bind_pt_update_ops *ops;
+ /** @vops: VMA operations */
+ struct xe_vma_ops *vops;
+ /** @job: The job if a GPU page-table update. NULL otherwise */
+ struct xe_sched_job *job;
+ /**
+ * @ijobs: The TLB invalidation jobs, individual instances can be NULL
+ */
+#define XE_CPU_BIND_INVAL_JOB_COUNT 4
+ struct xe_tlb_inval_job *ijobs[XE_CPU_BIND_INVAL_JOB_COUNT];
+};
+
+int xe_cpu_bind_init(struct xe_device *xe);
+
+struct xe_exec_queue *xe_cpu_bind_queue(struct xe_cpu_bind *cpu_bind);
+
+void
+xe_cpu_bind_update_pgtables_execute(struct xe_vm *vm, struct xe_tile *tile,
+ const struct xe_cpu_bind_pt_update_ops *ops,
+ struct xe_vm_pgtable_update_op *pt_op,
+ int num_ops);
+
+struct dma_fence *
+xe_cpu_bind_update_pgtables(struct xe_cpu_bind *cpu_bind,
+ struct xe_cpu_bind_pt_update *pt_update);
+
+void xe_cpu_bind_job_lock(struct xe_cpu_bind *cpu_bind,
+ struct xe_exec_queue *q);
+
+void xe_cpu_bind_job_unlock(struct xe_cpu_bind *cpu_bind,
+ struct xe_exec_queue *q);
+
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+void xe_cpu_bind_job_lock_assert(struct xe_exec_queue *q);
+#else
+static inline void xe_cpu_bind_job_lock_assert(struct xe_exec_queue *q)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 3462645ca13c..b7ad7f97e68c 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -25,6 +25,7 @@
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_bo_evict.h"
+#include "xe_cpu_bind.h"
#include "xe_debugfs.h"
#include "xe_defaults.h"
#include "xe_devcoredump.h"
@@ -929,6 +930,10 @@ int xe_device_probe(struct xe_device *xe)
return err;
}
+ err = xe_cpu_bind_init(xe);
+ if (err)
+ return err;
+
err = xe_pagefault_init(xe);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index caa8f34a6744..776e9e190320 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -36,6 +36,7 @@
struct drm_pagemap_shrinker;
struct intel_display;
struct intel_dg_nvm_dev;
+struct xe_cpu_bind;
struct xe_ggtt;
struct xe_i2c;
struct xe_pat_ops;
@@ -512,6 +513,9 @@ struct xe_device {
/** @i2c: I2C host controller */
struct xe_i2c *i2c;
+ /** @cpu_bind: CPU bind object */
+ struct xe_cpu_bind *cpu_bind;
+
/** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */
u32 atomic_svm_timeslice_ms;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 0201b8159e63..ee2119cf45c1 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -14,6 +14,7 @@
#include <uapi/drm/xe_drm.h>
#include "xe_bo.h"
+#include "xe_cpu_bind.h"
#include "xe_dep_scheduler.h"
#include "xe_device.h"
#include "xe_gt.h"
@@ -1454,7 +1455,7 @@ static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
struct xe_vm *vm)
{
if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
- xe_migrate_job_lock_assert(q);
+ xe_cpu_bind_job_lock_assert(q);
} else if (q->flags & EXEC_QUEUE_FLAG_VM) {
lockdep_assert_held(&vm->lock);
} else {
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 1d6ac7a6563b..f7b56a1eaed4 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -17,6 +17,7 @@
#include "abi/guc_klvs_abi.h"
#include "xe_assert.h"
#include "xe_bo.h"
+#include "xe_cpu_bind.h"
#include "xe_devcoredump.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
@@ -36,7 +37,6 @@
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_map.h"
-#include "xe_migrate.h"
#include "xe_mocs.h"
#include "xe_pm.h"
#include "xe_pt.h"
@@ -1190,13 +1190,36 @@ static bool is_pt_job(struct xe_sched_job *job)
return job->is_pt_job;
}
-static void run_pt_job(struct xe_sched_job *job)
+static void run_pt_job(struct xe_device *xe, struct xe_sched_job *job)
{
- xe_migrate_update_pgtables_cpu_execute(job->pt_update[0].vm,
- job->pt_update[0].tile,
- job->pt_update[0].ops,
- job->pt_update[0].pt_job_ops->ops,
- job->pt_update[0].pt_job_ops->current_op);
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ struct xe_pt_job_ops *pt_job_ops =
+ job->pt_update[0].pt_job_ops[id];
+
+ if (!pt_job_ops || !pt_job_ops->current_op)
+ continue;
+
+ xe_cpu_bind_update_pgtables_execute(job->pt_update[0].vm, tile,
+ job->pt_update[0].ops,
+ pt_job_ops->ops,
+ pt_job_ops->current_op);
+ }
+}
+
+static void put_pt_job(struct xe_device *xe, struct xe_sched_job *job)
+{
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ struct xe_pt_job_ops *pt_job_ops =
+ job->pt_update[0].pt_job_ops[id];
+
+ xe_pt_job_ops_put(pt_job_ops);
+ }
}
static struct dma_fence *
@@ -1228,7 +1251,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
if (is_pt_job(job)) {
xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
- run_pt_job(job);
+ run_pt_job(guc_to_xe(guc), job);
} else {
if (!exec_queue_registered(q))
register_exec_queue(q, GUC_CONTEXT_NORMAL);
@@ -1240,7 +1263,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
}
if (is_pt_job(job)) {
- xe_pt_job_ops_put(job->pt_update[0].pt_job_ops);
+ put_pt_job(guc_to_xe(guc), job);
dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */
return NULL;
}
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index fe5c9bdcb555..b5d4fc4d4c62 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -49,8 +49,6 @@
struct xe_migrate {
/** @q: Default exec queue used for migration */
struct xe_exec_queue *q;
- /** @bind_q: Default exec queue used for binds */
- struct xe_exec_queue *bind_q;
/** @tile: Backpointer to the tile this struct xe_migrate belongs to. */
struct xe_tile *tile;
/** @job_mutex: Timeline mutex for @eng. */
@@ -108,7 +106,6 @@ static void xe_migrate_fini(void *arg)
mutex_destroy(&m->job_mutex);
xe_vm_close_and_put(m->q->vm);
xe_exec_queue_put(m->q);
- xe_exec_queue_put(m->bind_q);
}
static u64 xe_migrate_vm_addr(u64 slot, u32 level)
@@ -448,16 +445,6 @@ int xe_migrate_init(struct xe_migrate *m)
goto err_out;
}
- m->bind_q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
- EXEC_QUEUE_FLAG_KERNEL |
- EXEC_QUEUE_FLAG_PERMANENT |
- EXEC_QUEUE_FLAG_HIGH_PRIORITY |
- EXEC_QUEUE_FLAG_MIGRATE, 0);
- if (IS_ERR(m->bind_q)) {
- err = PTR_ERR(m->bind_q);
- goto err_out;
- }
-
/*
* XXX: Currently only reserving 1 (likely slow) BCS instance on
* PVC, may want to revisit if performance is needed.
@@ -469,16 +456,6 @@ int xe_migrate_init(struct xe_migrate *m)
EXEC_QUEUE_FLAG_MIGRATE |
EXEC_QUEUE_FLAG_LOW_LATENCY, 0);
} else {
- m->bind_q = xe_exec_queue_create_class(xe, primary_gt, vm,
- XE_ENGINE_CLASS_COPY,
- EXEC_QUEUE_FLAG_KERNEL |
- EXEC_QUEUE_FLAG_PERMANENT |
- EXEC_QUEUE_FLAG_MIGRATE, 0);
- if (IS_ERR(m->bind_q)) {
- err = PTR_ERR(m->bind_q);
- goto err_out;
- }
-
m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
XE_ENGINE_CLASS_COPY,
EXEC_QUEUE_FLAG_KERNEL |
@@ -515,8 +492,6 @@ int xe_migrate_init(struct xe_migrate *m)
return err;
err_out:
- if (!IS_ERR_OR_NULL(m->bind_q))
- xe_exec_queue_put(m->bind_q);
xe_vm_close_and_put(vm);
return err;
@@ -1403,17 +1378,6 @@ struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_off
return fence;
}
-/**
- * xe_get_migrate_bind_queue() - Get the bind queue from migrate context.
- * @migrate: Migrate context.
- *
- * Return: Pointer to bind queue on success, error on failure
- */
-struct xe_exec_queue *xe_migrate_bind_queue(struct xe_migrate *migrate)
-{
- return migrate->bind_q;
-}
-
static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u32 size, u32 pitch)
{
@@ -1684,168 +1648,6 @@ struct migrate_test_params {
container_of(_priv, struct migrate_test_params, base)
#endif
-/**
- * xe_migrate_update_pgtables_cpu_execute() - Update a VM's PTEs via the CPU
- * @vm: The VM being updated
- * @tile: The tile being updated
- * @ops: The migrate PT update ops
- * @pt_ops: The VM PT update ops
- * @num_ops: The number of The VM PT update ops
- *
- * Execute the VM PT update ops array which results in a VM's PTEs being updated
- * via the CPU.
- */
-void
-xe_migrate_update_pgtables_cpu_execute(struct xe_vm *vm, struct xe_tile *tile,
- const struct xe_migrate_pt_update_ops *ops,
- struct xe_vm_pgtable_update_op *pt_op,
- int num_ops)
-{
- u32 j, i;
-
- for (j = 0; j < num_ops; ++j, ++pt_op) {
- for (i = 0; i < pt_op->num_entries; i++) {
- const struct xe_vm_pgtable_update *update =
- &pt_op->entries[i];
-
- xe_tile_assert(tile, update);
- xe_tile_assert(tile, update->pt_bo);
- xe_tile_assert(tile, !iosys_map_is_null(&update->pt_bo->vmap));
-
- if (pt_op->bind)
- ops->populate(tile, &update->pt_bo->vmap,
- update->ofs, update->qwords,
- update);
- else
- ops->clear(vm, tile, &update->pt_bo->vmap,
- update->ofs, update->qwords,
- update);
- }
- }
-
- trace_xe_vm_cpu_bind(vm);
- xe_device_wmb(vm->xe);
-}
-
-static struct dma_fence *
-xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
- struct xe_migrate_pt_update *pt_update)
-{
- XE_TEST_DECLARE(struct migrate_test_params *test =
- to_migrate_test_params
- (xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));)
- const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
- struct xe_vm *vm = pt_update->vops->vm;
- struct xe_vm_pgtable_update_ops *pt_update_ops =
- &pt_update->vops->pt_update_ops[pt_update->tile_id];
- int err;
-
- if (XE_TEST_ONLY(test && test->force_gpu))
- return ERR_PTR(-ETIME);
-
- if (ops->pre_commit) {
- pt_update->job = NULL;
- err = ops->pre_commit(pt_update);
- if (err)
- return ERR_PTR(err);
- }
-
- xe_migrate_update_pgtables_cpu_execute(vm, m->tile, ops,
- pt_update_ops->pt_job_ops->ops,
- pt_update_ops->num_ops);
-
- return dma_fence_get_stub();
-}
-
-static bool is_migrate_queue(struct xe_migrate *m, struct xe_exec_queue *q)
-{
- return m->bind_q == q;
-}
-
-static struct dma_fence *
-__xe_migrate_update_pgtables(struct xe_migrate *m,
- struct xe_migrate_pt_update *pt_update,
- struct xe_vm_pgtable_update_ops *pt_update_ops)
-{
- const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
- struct xe_tile *tile = m->tile;
- struct xe_sched_job *job;
- struct dma_fence *fence;
- bool is_migrate = is_migrate_queue(m, pt_update_ops->q);
- int err;
-
- job = xe_sched_job_create(pt_update_ops->q, NULL);
- if (IS_ERR(job)) {
- err = PTR_ERR(job);
- goto err_out;
- }
-
- xe_tile_assert(tile, job->is_pt_job);
-
- if (ops->pre_commit) {
- pt_update->job = job;
- err = ops->pre_commit(pt_update);
- if (err)
- goto err_job;
- }
- if (is_migrate)
- mutex_lock(&m->job_mutex);
-
- job->pt_update[0].vm = pt_update->vops->vm;
- job->pt_update[0].tile = tile;
- job->pt_update[0].ops = ops;
- job->pt_update[0].pt_job_ops =
- xe_pt_job_ops_get(pt_update_ops->pt_job_ops);
-
- xe_sched_job_arm(job);
- fence = dma_fence_get(&job->drm.s_fence->finished);
- xe_sched_job_push(job);
-
- if (is_migrate)
- mutex_unlock(&m->job_mutex);
-
- return fence;
-
-err_job:
- xe_sched_job_put(job);
-err_out:
- return ERR_PTR(err);
-}
-
-/**
- * xe_migrate_update_pgtables() - Pipelined page-table update
- * @m: The migrate context.
- * @pt_update: PT update arguments
- *
- * Perform a pipelined page-table update. The update descriptors are typically
- * built under the same lock critical section as a call to this function. If
- * using the default engine for the updates, they will be performed in the
- * order they grab the job_mutex. If different engines are used, external
- * synchronization is needed for overlapping updates to maintain page-table
- * consistency. Note that the meaning of "overlapping" is that the updates
- * touch the same page-table, which might be a higher-level page-directory.
- * If no pipelining is needed, then updates may be performed by the cpu.
- *
- * Return: A dma_fence that, when signaled, indicates the update completion.
- */
-struct dma_fence *
-xe_migrate_update_pgtables(struct xe_migrate *m,
- struct xe_migrate_pt_update *pt_update)
-
-{
- struct xe_vm_pgtable_update_ops *pt_update_ops =
- &pt_update->vops->pt_update_ops[pt_update->tile_id];
- struct dma_fence *fence;
-
- fence = xe_migrate_update_pgtables_cpu(m, pt_update);
-
- /* -ETIME indicates a job is needed, anything else is legit error */
- if (!IS_ERR(fence) || PTR_ERR(fence) != -ETIME)
- return fence;
-
- return __xe_migrate_update_pgtables(m, pt_update, pt_update_ops);
-}
-
/**
* xe_migrate_wait() - Complete all operations using the xe_migrate context
* @m: Migrate context to wait for.
@@ -2347,56 +2149,6 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
return IS_ERR(fence) ? PTR_ERR(fence) : 0;
}
-/**
- * xe_migrate_job_lock() - Lock migrate job lock
- * @m: The migration context.
- * @q: Queue associated with the operation which requires a lock
- *
- * Lock the migrate job lock if the queue is a migration queue, otherwise
- * assert the VM's dma-resv is held (user queue's have own locking).
- */
-void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q)
-{
- bool is_migrate = is_migrate_queue(m, q);
-
- if (is_migrate)
- mutex_lock(&m->job_mutex);
- else
- xe_vm_assert_held(q->user_vm); /* User queues VM's should be locked */
-}
-
-/**
- * xe_migrate_job_unlock() - Unlock migrate job lock
- * @m: The migration context.
- * @q: Queue associated with the operation which requires a lock
- *
- * Unlock the migrate job lock if the queue is a migration queue, otherwise
- * assert the VM's dma-resv is held (user queue's have own locking).
- */
-void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q)
-{
- bool is_migrate = is_migrate_queue(m, q);
-
- if (is_migrate)
- mutex_unlock(&m->job_mutex);
- else
- xe_vm_assert_held(q->user_vm); /* User queues VM's should be locked */
-}
-
-#if IS_ENABLED(CONFIG_PROVE_LOCKING)
-/**
- * xe_migrate_job_lock_assert() - Assert migrate job lock held of queue
- * @q: Migrate queue
- */
-void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
-{
- struct xe_migrate *m = gt_to_tile(q->gt)->migrate;
-
- xe_gt_assert(q->gt, q == m->bind_q);
- lockdep_assert_held(&m->job_mutex);
-}
-#endif
-
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_migrate.c"
#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index ae979f6bf8ef..f6fa23c6c4fb 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -34,78 +34,6 @@ enum xe_migrate_copy_dir {
XE_MIGRATE_COPY_TO_SRAM,
};
-/**
- * struct xe_migrate_pt_update_ops - Callbacks for the
- * xe_migrate_update_pgtables() function.
- */
-struct xe_migrate_pt_update_ops {
- /**
- * @populate: Populate a command buffer or page-table with ptes.
- * @tile: The tile for the current operation.
- * @map: struct iosys_map into the memory to be populated.
- * @ofs: qword offset into @map, unused if @map is NULL.
- * @num_qwords: Number of qwords to write.
- * @update: Information about the PTEs to be inserted.
- *
- * This interface is intended to be used as a callback into the
- * page-table system to populate command buffers or shared
- * page-tables with PTEs.
- */
- void (*populate)(struct xe_tile *tile, struct iosys_map *map,
- u32 ofs, u32 num_qwords,
- const struct xe_vm_pgtable_update *update);
- /**
- * @clear: Clear a command buffer or page-table with ptes.
- * @vm: VM being updated
- * @tile: The tile for the current operation.
- * @map: struct iosys_map into the memory to be populated.
- * @ofs: qword offset into @map, unused if @map is NULL.
- * @num_qwords: Number of qwords to write.
- * @update: Information about the PTEs to be inserted.
- *
- * This interface is intended to be used as a callback into the
- * page-table system to populate command buffers or shared
- * page-tables with PTEs.
- */
- void (*clear)(struct xe_vm *vm, struct xe_tile *tile,
- struct iosys_map *map, u32 ofs, u32 num_qwords,
- const struct xe_vm_pgtable_update *update);
-
- /**
- * @pre_commit: Callback to be called just before arming the
- * sched_job.
- * @pt_update: Pointer to embeddable callback argument.
- *
- * Return: 0 on success, negative error code on error.
- */
- int (*pre_commit)(struct xe_migrate_pt_update *pt_update);
-};
-
-/**
- * struct xe_migrate_pt_update - Argument to the
- * struct xe_migrate_pt_update_ops callbacks.
- *
- * Intended to be subclassed to support additional arguments if necessary.
- */
-struct xe_migrate_pt_update {
- /** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */
- const struct xe_migrate_pt_update_ops *ops;
- /** @vops: VMA operations */
- struct xe_vma_ops *vops;
- /** @job: The job if a GPU page-table update. NULL otherwise */
- struct xe_sched_job *job;
- /**
- * @ijob: The TLB invalidation job for primary GT. NULL otherwise
- */
- struct xe_tlb_inval_job *ijob;
- /**
- * @mjob: The TLB invalidation job for media GT. NULL otherwise
- */
- struct xe_tlb_inval_job *mjob;
- /** @tile_id: Tile ID of the update */
- u8 tile_id;
-};
-
struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile);
int xe_migrate_init(struct xe_migrate *m);
@@ -137,7 +65,6 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
-struct xe_exec_queue *xe_migrate_bind_queue(struct xe_migrate *migrate);
struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
struct xe_bo *sysmem_bo, u64 sysmem_offset,
u64 size, enum xe_migrate_copy_dir dir);
@@ -156,28 +83,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
-
-void
-xe_migrate_update_pgtables_cpu_execute(struct xe_vm *vm, struct xe_tile *tile,
- const struct xe_migrate_pt_update_ops *ops,
- struct xe_vm_pgtable_update_op *pt_op,
- int num_ops);
-
-struct dma_fence *
-xe_migrate_update_pgtables(struct xe_migrate *m,
- struct xe_migrate_pt_update *pt_update);
-
void xe_migrate_wait(struct xe_migrate *m);
-#if IS_ENABLED(CONFIG_PROVE_LOCKING)
-void xe_migrate_job_lock_assert(struct xe_exec_queue *q);
-#else
-static inline void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
-{
-}
-#endif
-
-void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q);
-void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q);
-
#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 032947a10806..d91d80c92957 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -7,12 +7,12 @@
#include "regs/xe_gtt_defs.h"
#include "xe_bo.h"
+#include "xe_cpu_bind.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_gt_stats.h"
-#include "xe_migrate.h"
#include "xe_page_reclaim.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
@@ -1291,11 +1291,9 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
}
static int xe_pt_vm_dependencies(struct xe_sched_job *job,
- struct xe_tlb_inval_job *ijob,
- struct xe_tlb_inval_job *mjob,
+ struct xe_tlb_inval_job **ijobs,
struct xe_vm *vm,
struct xe_vma_ops *vops,
- struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_range_fence_tree *rftree)
{
struct xe_range_fence *rtfence;
@@ -1308,20 +1306,22 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
if (!job && !no_in_syncs(vops->syncs, vops->num_syncs))
return -ETIME;
- if (!job && !xe_exec_queue_is_idle(pt_update_ops->q))
+ if (!job && !xe_exec_queue_is_idle(vops->q))
return -ETIME;
- if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) {
- err = job_test_add_deps(job, xe_vm_resv(vm),
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_BOOKKEEP :
- DMA_RESV_USAGE_KERNEL);
+ if (vops->flags & (XE_VMA_OPS_FLAG_WAIT_VM_BOOKKEEP |
+ XE_VMA_OPS_FLAG_WAIT_VM_KERNEL)) {
+ enum dma_resv_usage usage = DMA_RESV_USAGE_KERNEL;
+
+ if (vops->flags & XE_VMA_OPS_FLAG_WAIT_VM_BOOKKEEP)
+ usage = DMA_RESV_USAGE_BOOKKEEP;
+
+ err = job_test_add_deps(job, xe_vm_resv(vm), usage);
if (err)
return err;
}
- rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start,
- pt_update_ops->last);
+ rtfence = xe_range_fence_tree_first(rftree, vops->start, vops->last);
while (rtfence) {
fence = rtfence->fence;
@@ -1339,9 +1339,8 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
return err;
}
- rtfence = xe_range_fence_tree_next(rtfence,
- pt_update_ops->start,
- pt_update_ops->last);
+ rtfence = xe_range_fence_tree_next(rtfence, vops->start,
+ vops->last);
}
list_for_each_entry(op, &vops->list, link) {
@@ -1354,14 +1353,11 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
err = xe_sync_entry_add_deps(&vops->syncs[i], job);
if (job) {
- if (ijob) {
- err = xe_tlb_inval_job_alloc_dep(ijob);
- if (err)
- return err;
- }
+ for (i = 0; i < XE_CPU_BIND_INVAL_JOB_COUNT; ++i) {
+ if (!ijobs[i])
+ continue;
- if (mjob) {
- err = xe_tlb_inval_job_alloc_dep(mjob);
+ err = xe_tlb_inval_job_alloc_dep(ijobs[i]);
if (err)
return err;
}
@@ -1370,17 +1366,14 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
return err;
}
-static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
+static int xe_pt_pre_commit(struct xe_cpu_bind_pt_update *pt_update)
{
struct xe_vma_ops *vops = pt_update->vops;
struct xe_vm *vm = vops->vm;
- struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id];
- struct xe_vm_pgtable_update_ops *pt_update_ops =
- &vops->pt_update_ops[pt_update->tile_id];
+ struct xe_range_fence_tree *rftree = &vm->rftree;
- return xe_pt_vm_dependencies(pt_update->job, pt_update->ijob,
- pt_update->mjob, vm, pt_update->vops,
- pt_update_ops, rftree);
+ return xe_pt_vm_dependencies(pt_update->job, pt_update->ijobs,
+ vm, vops, rftree);
}
#if IS_ENABLED(CONFIG_DRM_GPUSVM)
@@ -1408,8 +1401,7 @@ static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
#endif
-static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_vm_pgtable_update_ops *pt_update)
+static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma)
{
struct xe_userptr_vma *uvma;
unsigned long notifier_seq;
@@ -1439,8 +1431,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
return 0;
}
-static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
- struct xe_vm_pgtable_update_ops *pt_update)
+static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op)
{
int err = 0;
@@ -1451,13 +1442,13 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
if (!op->map.immediate && xe_vm_in_fault_mode(vm))
break;
- err = vma_check_userptr(vm, op->map.vma, pt_update);
+ err = vma_check_userptr(vm, op->map.vma);
break;
case DRM_GPUVA_OP_REMAP:
if (op->remap.prev)
- err = vma_check_userptr(vm, op->remap.prev, pt_update);
+ err = vma_check_userptr(vm, op->remap.prev);
if (!err && op->remap.next)
- err = vma_check_userptr(vm, op->remap.next, pt_update);
+ err = vma_check_userptr(vm, op->remap.next);
break;
case DRM_GPUVA_OP_UNMAP:
break;
@@ -1477,7 +1468,7 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
}
}
} else {
- err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), pt_update);
+ err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va));
}
break;
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
@@ -1503,12 +1494,10 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
return err;
}
-static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+static int xe_pt_svm_userptr_pre_commit(struct xe_cpu_bind_pt_update *pt_update)
{
struct xe_vm *vm = pt_update->vops->vm;
struct xe_vma_ops *vops = pt_update->vops;
- struct xe_vm_pgtable_update_ops *pt_update_ops =
- &vops->pt_update_ops[pt_update->tile_id];
struct xe_vma_op *op;
int err;
@@ -1519,7 +1508,7 @@ static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
xe_svm_notifier_lock(vm);
list_for_each_entry(op, &vops->list, link) {
- err = op_check_svm_userptr(vm, op, pt_update_ops);
+ err = op_check_svm_userptr(vm, op);
if (err) {
xe_svm_notifier_unlock(vm);
break;
@@ -1823,10 +1812,10 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
}
static void
-xe_migrate_clear_pgtable_callback(struct xe_vm *vm, struct xe_tile *tile,
- struct iosys_map *map, u32 qword_ofs,
- u32 num_qwords,
- const struct xe_vm_pgtable_update *update)
+xe_pt_clear_pgtable_callback(struct xe_vm *vm, struct xe_tile *tile,
+ struct iosys_map *map, u32 qword_ofs,
+ u32 num_qwords,
+ const struct xe_vm_pgtable_update *update)
{
u64 empty = __xe_pt_empty_pte(tile, vm, update->level);
int i;
@@ -1904,6 +1893,9 @@ to_pt_op(struct xe_vm_pgtable_update_ops *pt_update_ops, u32 op_idx)
static u32
get_current_op(struct xe_vm_pgtable_update_ops *pt_update_ops)
{
+ if (!pt_update_ops->pt_job_ops)
+ return 0;
+
return pt_update_ops->pt_job_ops->current_op;
}
@@ -2187,6 +2179,7 @@ static int unbind_range_prepare(struct xe_vm *vm,
static int op_prepare(struct xe_vm *vm,
struct xe_tile *tile,
+ struct xe_vma_ops *vops,
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma_op *op)
{
@@ -2203,7 +2196,7 @@ static int op_prepare(struct xe_vm *vm,
err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma,
op->map.invalidate_on_bind);
- pt_update_ops->wait_vm_kernel = true;
+ vops->flags |= XE_VMA_OPS_FLAG_WAIT_VM_KERNEL;
break;
case DRM_GPUVA_OP_REMAP:
{
@@ -2217,12 +2210,12 @@ static int op_prepare(struct xe_vm *vm,
if (!err && op->remap.prev) {
err = bind_op_prepare(vm, tile, pt_update_ops,
op->remap.prev, false);
- pt_update_ops->wait_vm_bookkeep = true;
+ vops->flags |= XE_VMA_OPS_FLAG_WAIT_VM_KERNEL;
}
if (!err && op->remap.next) {
err = bind_op_prepare(vm, tile, pt_update_ops,
op->remap.next, false);
- pt_update_ops->wait_vm_bookkeep = true;
+ vops->flags |= XE_VMA_OPS_FLAG_WAIT_VM_KERNEL;
}
break;
}
@@ -2252,7 +2245,7 @@ static int op_prepare(struct xe_vm *vm,
}
} else {
err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
- pt_update_ops->wait_vm_kernel = true;
+ vops->flags |= XE_VMA_OPS_FLAG_WAIT_VM_KERNEL;
}
break;
}
@@ -2283,18 +2276,8 @@ xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
xe_page_reclaim_list_init(&pt_update_ops->prl);
}
-/**
- * xe_pt_update_ops_prepare() - Prepare PT update operations
- * @tile: Tile of PT update operations
- * @vops: VMA operationa
- *
- * Prepare PT update operations which includes updating internal PT state,
- * allocate memory for page tables, populate page table being pruned in, and
- * create PT update operations for leaf insertion / removal.
- *
- * Return: 0 on success, negative error code on error.
- */
-int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
+static int __xe_pt_update_ops_prepare(struct xe_tile *tile,
+ struct xe_vma_ops *vops)
{
struct xe_vm_pgtable_update_ops *pt_update_ops =
&vops->pt_update_ops[tile->id];
@@ -2313,7 +2296,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
return err;
list_for_each_entry(op, &vops->list, link) {
- err = op_prepare(vops->vm, tile, pt_update_ops, op);
+ err = op_prepare(vops->vm, tile, vops, pt_update_ops, op);
if (err)
return err;
@@ -2322,6 +2305,16 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
xe_tile_assert(tile, get_current_op(pt_update_ops) <=
pt_update_ops->num_ops);
+ /* Propagate individual tile state up to VMA operation */
+ if (pt_update_ops->start < vops->start)
+ vops->start = pt_update_ops->start;
+ if (pt_update_ops->last > vops->last)
+ vops->last = pt_update_ops->last;
+ if (pt_update_ops->needs_invalidation)
+ vops->flags |= XE_VMA_OPS_FLAG_NEEDS_INVALIDATION;
+ if (pt_update_ops->needs_svm_lock)
+ vops->flags |= XE_VMA_OPS_FLAG_NEEDS_SVM_LOCK;
+
#ifdef TEST_VM_OPS_ERROR
if (vops->inject_error &&
vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE)
@@ -2330,35 +2323,68 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
return 0;
}
-ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
-static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
- struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma *vma, struct dma_fence *fence,
- struct dma_fence *fence2, bool invalidate_on_bind)
+/**
+ * xe_pt_update_ops_prepare() - Prepare PT update operations
+ * @xe: xe device.
+ * @vops: VMA operationa
+ *
+ * Prepare PT update operations which includes updating internal PT state,
+ * allocate memory for page tables, populate page table being pruned in, and
+ * create PT update operations for leaf insertion / removal.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_pt_update_ops_prepare(struct xe_device *xe, struct xe_vma_ops *vops)
{
- xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
+ struct xe_tile *tile;
+ int id, err;
+
+ for_each_tile(tile, xe, id) {
+ if (!vops->pt_update_ops[id].num_ops)
+ continue;
- if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
- dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
- if (fence2)
- dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
+ err = __xe_pt_update_ops_prepare(tile, vops);
+ if (err)
+ return err;
}
+
+ return 0;
+}
+ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
+
+static void vma_add_fences(struct xe_vma *vma, struct dma_fence **fences,
+ int fence_count, enum dma_resv_usage usage)
+{
+ int i;
+
+ if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm)
+ return;
+
+ for (i = 0; i < fence_count; ++i)
+ if (fences[i])
+ dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv,
+ fences[i], usage);
+}
+
+static void bind_op_commit(struct xe_vm *vm, struct xe_vma *vma,
+ struct dma_fence **fences, int fence_count,
+ enum dma_resv_usage usage, u8 tile_mask,
+ bool invalidate_on_bind)
+{
+ xe_assert(vm->xe, !xe_vma_is_cpu_addr_mirror(vma));
+
+ vma_add_fences(vma, fences, fence_count, usage);
+
/* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
- WRITE_ONCE(vma->tile_present, vma->tile_present | BIT(tile->id));
+ WRITE_ONCE(vma->tile_present, vma->tile_present | tile_mask);
if (invalidate_on_bind)
WRITE_ONCE(vma->tile_invalidated,
- vma->tile_invalidated | BIT(tile->id));
+ vma->tile_invalidated | tile_mask);
else
WRITE_ONCE(vma->tile_invalidated,
- vma->tile_invalidated & ~BIT(tile->id));
- vma->tile_staged &= ~BIT(tile->id);
+ vma->tile_invalidated & ~tile_mask);
+ vma->tile_staged &= ~tile_mask;
if (xe_vma_is_userptr(vma)) {
xe_svm_assert_held_read(vm);
to_userptr_vma(vma)->userptr.initial_bind = true;
@@ -2368,31 +2394,21 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
* Kick rebind worker if this bind triggers preempt fences and not in
* the rebind worker
*/
- if (pt_update_ops->wait_vm_bookkeep &&
+ if (usage == DMA_RESV_USAGE_KERNEL &&
xe_vm_in_preempt_fence_mode(vm) &&
!current->mm)
xe_vm_queue_rebind_worker(vm);
}
-static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
- struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma *vma, struct dma_fence *fence,
- struct dma_fence *fence2)
+static void unbind_op_commit(struct xe_vm *vm, struct xe_vma *vma,
+ struct dma_fence **fences, int fence_count,
+ enum dma_resv_usage usage, u8 tile_mask)
{
- xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
+ xe_assert(vm->xe, !xe_vma_is_cpu_addr_mirror(vma));
- if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
- dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
- if (fence2)
- dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
- }
- vma->tile_present &= ~BIT(tile->id);
+ vma_add_fences(vma, fences, fence_count, usage);
+
+ vma->tile_present &= ~tile_mask;
if (!vma->tile_present) {
list_del_init(&vma->combined_links.rebind);
if (xe_vma_is_userptr(vma)) {
@@ -2407,21 +2423,19 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
static void range_present_and_invalidated_tile(struct xe_vm *vm,
struct xe_svm_range *range,
- u8 tile_id)
+ u8 tile_mask)
{
/* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
lockdep_assert_held(&vm->svm.gpusvm.notifier_lock);
- WRITE_ONCE(range->tile_present, range->tile_present | BIT(tile_id));
- WRITE_ONCE(range->tile_invalidated, range->tile_invalidated & ~BIT(tile_id));
+ WRITE_ONCE(range->tile_present, range->tile_present | tile_mask);
+ WRITE_ONCE(range->tile_invalidated, range->tile_invalidated & ~tile_mask);
}
-static void op_commit(struct xe_vm *vm,
- struct xe_tile *tile,
- struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma_op *op, struct dma_fence *fence,
- struct dma_fence *fence2)
+static void op_commit(struct xe_vm *vm, struct xe_vma_op *op,
+ struct dma_fence **fences, int fence_count,
+ enum dma_resv_usage usage, u8 tile_mask)
{
xe_vm_assert_held(vm);
@@ -2431,8 +2445,8 @@ static void op_commit(struct xe_vm *vm,
(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR))
break;
- bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence,
- fence2, op->map.invalidate_on_bind);
+ bind_op_commit(vm, op->map.vma, fences, fence_count, usage,
+ tile_mask, op->map.invalidate_on_bind);
break;
case DRM_GPUVA_OP_REMAP:
{
@@ -2441,14 +2455,15 @@ static void op_commit(struct xe_vm *vm,
if (xe_vma_is_cpu_addr_mirror(old))
break;
- unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2);
+ unbind_op_commit(vm, old, fences, fence_count, usage,
+ tile_mask);
if (op->remap.prev)
- bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
- fence, fence2, false);
+ bind_op_commit(vm, op->remap.prev, fences, fence_count,
+ usage, tile_mask, false);
if (op->remap.next)
- bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
- fence, fence2, false);
+ bind_op_commit(vm, op->remap.next, fences, fence_count,
+ usage, tile_mask, false);
break;
}
case DRM_GPUVA_OP_UNMAP:
@@ -2456,8 +2471,8 @@ static void op_commit(struct xe_vm *vm,
struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
if (!xe_vma_is_cpu_addr_mirror(vma))
- unbind_op_commit(vm, tile, pt_update_ops, vma, fence,
- fence2);
+ unbind_op_commit(vm, vma, fences, fence_count,
+ tile_mask, usage);
break;
}
case DRM_GPUVA_OP_PREFETCH:
@@ -2469,10 +2484,11 @@ static void op_commit(struct xe_vm *vm,
unsigned long i;
xa_for_each(&op->prefetch_range.range, i, range)
- range_present_and_invalidated_tile(vm, range, tile->id);
+ range_present_and_invalidated_tile(vm, range,
+ tile_mask);
} else {
- bind_op_commit(vm, tile, pt_update_ops, vma, fence,
- fence2, false);
+ bind_op_commit(vm, vma, fences, fence_count, usage,
+ tile_mask, false);
}
break;
}
@@ -2480,11 +2496,12 @@ static void op_commit(struct xe_vm *vm,
{
/* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
if (op->subop == XE_VMA_SUBOP_MAP_RANGE)
- range_present_and_invalidated_tile(vm, op->map_range.range, tile->id);
+ range_present_and_invalidated_tile(vm, op->map_range.range,
+ tile_mask);
else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
WRITE_ONCE(op->unmap_range.range->tile_present,
op->unmap_range.range->tile_present &
- ~BIT(tile->id));
+ ~tile_mask);
break;
}
@@ -2493,40 +2510,25 @@ static void op_commit(struct xe_vm *vm,
}
}
-static const struct xe_migrate_pt_update_ops migrate_ops = {
+static const struct xe_cpu_bind_pt_update_ops cpu_bind_ops = {
.populate = xe_vm_populate_pgtable,
- .clear = xe_migrate_clear_pgtable_callback,
+ .clear = xe_pt_clear_pgtable_callback,
.pre_commit = xe_pt_pre_commit,
};
#if IS_ENABLED(CONFIG_DRM_GPUSVM)
-static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops = {
+static const struct xe_cpu_bind_pt_update_ops svm_userptr_cpu_bind_ops = {
.populate = xe_vm_populate_pgtable,
- .clear = xe_migrate_clear_pgtable_callback,
+ .clear = xe_pt_clear_pgtable_callback,
.pre_commit = xe_pt_svm_userptr_pre_commit,
};
#else
-static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops;
+static const struct xe_cpu_bind_pt_update_ops svm_userptr_cpu_bind_ops;
#endif
-static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q,
- struct xe_tile *tile,
- struct xe_gt *gt,
- unsigned int *type)
-{
- int tile_ofs = tile->id * (XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT + 1);
-
- if (xe_gt_is_media_type(gt))
- *type = tile_ofs + XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT;
- else
- *type = tile_ofs + XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT;
-
- return q->tlb_inval[*type].dep_scheduler;
-}
-
/**
* xe_pt_update_ops_run() - Run PT update operations
- * @tile: Tile of PT update operations
+ * @xe: xe device.
* @vops: VMA operationa
*
* Run PT update operations which includes committing internal PT state changes,
@@ -2536,82 +2538,83 @@ static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q,
* Return: fence on success, negative ERR_PTR on error.
*/
struct dma_fence *
-xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
+xe_pt_update_ops_run(struct xe_device *xe, struct xe_vma_ops *vops)
{
struct xe_vm *vm = vops->vm;
- struct xe_vm_pgtable_update_ops *pt_update_ops =
- &vops->pt_update_ops[tile->id];
- struct xe_exec_queue *q = pt_update_ops->q;
- struct dma_fence *fence, *ifence = NULL, *mfence = NULL;
- struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL;
+ struct xe_exec_queue *q = vops->q;
+ struct dma_fence *fence;
+ struct dma_fence *ifences[XE_CPU_BIND_INVAL_JOB_COUNT] = {};
struct xe_range_fence *rfence;
+ enum dma_resv_usage usage = DMA_RESV_USAGE_BOOKKEEP;
struct xe_vma_op *op;
- unsigned int type;
- int err = 0, i;
- struct xe_migrate_pt_update update = {
- .ops = pt_update_ops->needs_svm_lock ?
- &svm_userptr_migrate_ops :
- &migrate_ops,
+ struct xe_tile *tile;
+ int err = 0, total_ops = 0, i, j;
+ u8 tile_mask = 0;
+ bool needs_invalidation = vops->flags &
+ XE_VMA_OPS_FLAG_NEEDS_INVALIDATION;
+ bool needs_svm_lock = vops->flags &
+ XE_VMA_OPS_FLAG_NEEDS_SVM_LOCK;
+ struct xe_cpu_bind_pt_update update = {
+ .ops = needs_svm_lock ? &svm_userptr_cpu_bind_ops :
+ &cpu_bind_ops,
.vops = vops,
- .tile_id = tile->id,
};
lockdep_assert_held(&vm->lock);
xe_vm_assert_held(vm);
- if (!get_current_op(pt_update_ops)) {
- xe_tile_assert(tile, xe_vm_in_fault_mode(vm));
+ for_each_tile(tile, xe, j) {
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[j];
+ total_ops += get_current_op(pt_update_ops);
+ }
+ if (!total_ops) {
+ xe_assert(xe, xe_vm_in_fault_mode(vm));
return dma_fence_get_stub();
}
#ifdef TEST_VM_OPS_ERROR
if (vops->inject_error &&
- vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN)
+ xe->vm_inject_error_position == FORCE_OP_ERROR_RUN)
return ERR_PTR(-ENOSPC);
#endif
- if (pt_update_ops->needs_invalidation) {
- struct xe_dep_scheduler *dep_scheduler =
- to_dep_scheduler(q, tile, tile->primary_gt, &type);
-
- ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval,
- dep_scheduler, vm,
- pt_update_ops->start,
- pt_update_ops->last,
- type);
- if (IS_ERR(ijob)) {
- err = PTR_ERR(ijob);
- goto kill_vm_tile1;
- }
- update.ijob = ijob;
- /*
- * Only add page reclaim for the primary GT. Media GT does not have
- * any PPC to flush, so enabling the PPC flush bit for media is
- * effectively a NOP and provides no performance benefit nor
- * interfere with primary GT.
- */
- if (xe_page_reclaim_list_valid(&pt_update_ops->prl)) {
- xe_tlb_inval_job_add_page_reclaim(ijob, &pt_update_ops->prl);
- /* Release ref from alloc, job will now handle it */
- xe_page_reclaim_list_invalidate(&pt_update_ops->prl);
- }
-
- if (tile->media_gt) {
- dep_scheduler = to_dep_scheduler(q, tile,
- tile->media_gt, &type);
-
- mjob = xe_tlb_inval_job_create(q,
- &tile->media_gt->tlb_inval,
- dep_scheduler, vm,
- pt_update_ops->start,
- pt_update_ops->last,
- type);
- if (IS_ERR(mjob)) {
- err = PTR_ERR(mjob);
+ if (needs_invalidation) {
+ for_each_tlb_inval(q, i) {
+ struct xe_dep_scheduler *dep_scheduler =
+ q->tlb_inval[i].dep_scheduler;
+ struct xe_tile *tile =
+ &xe->tiles[i / XE_MAX_GT_PER_TILE];
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[tile->id];
+ struct xe_page_reclaim_list *prl = &pt_update_ops->prl;
+ struct xe_tlb_inval_job *ijob;
+ struct xe_gt *gt = i % XE_MAX_GT_PER_TILE ?
+ tile->media_gt : tile->primary_gt;
+
+ ijob = xe_tlb_inval_job_create(q, >->tlb_inval,
+ dep_scheduler,
+ vm, pt_update_ops->start,
+ pt_update_ops->last, i);
+ if (IS_ERR(ijob)) {
+ err = PTR_ERR(ijob);
goto free_ijob;
}
- update.mjob = mjob;
+
+ update.ijobs[i] = ijob;
+
+ /*
+ * Only add page reclaim for the primary GT. Media GT
+ * does not have any PPC to flush, so enabling the PPC
+ * flush bit for media is effectively a NOP and provides
+ * no performance benefit nor interfere with primary GT.
+ */
+ if (xe_page_reclaim_list_valid(prl)) {
+ xe_tlb_inval_job_add_page_reclaim(ijob, prl);
+ /* Release ref from alloc, job will now handle it */
+ xe_page_reclaim_list_invalidate(prl);
+ }
}
}
@@ -2621,67 +2624,61 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
goto free_ijob;
}
- fence = xe_migrate_update_pgtables(tile->migrate, &update);
+ fence = xe_cpu_bind_update_pgtables(xe->cpu_bind, &update);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
goto free_rfence;
}
/* Point of no return - VM killed if failure after this */
- for (i = 0; i < get_current_op(pt_update_ops); ++i) {
- struct xe_vm_pgtable_update_op *pt_op =
- to_pt_op(pt_update_ops, i);
-
- xe_pt_commit(pt_op->vma, pt_op->entries,
- pt_op->num_entries,
- &pt_update_ops->pt_job_ops->deferred);
- pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */
+ for_each_tile(tile, xe, j) {
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[j];
+
+ for (i = 0; i < get_current_op(pt_update_ops); ++i) {
+ struct xe_vm_pgtable_update_op *pt_op =
+ to_pt_op(pt_update_ops, i);
+
+ xe_pt_commit(pt_op->vma, pt_op->entries,
+ pt_op->num_entries,
+ &pt_update_ops->pt_job_ops->deferred);
+ pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */
+ tile_mask |= BIT(tile->id);
+ }
}
- if (xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+ if (xe_range_fence_insert(&vm->rftree, rfence,
&xe_range_fence_kfree_ops,
- pt_update_ops->start,
- pt_update_ops->last, fence))
+ vops->start, vops->last, fence))
dma_fence_wait(fence, false);
- if (ijob)
- ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence);
- if (mjob)
- mfence = xe_tlb_inval_job_push(mjob, tile->migrate, fence);
+ if (vops->flags & XE_VMA_OPS_FLAG_WAIT_VM_BOOKKEEP)
+ usage = DMA_RESV_USAGE_KERNEL;
- if (!mjob && !ijob) {
- dma_resv_add_fence(xe_vm_resv(vm), fence,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
+ if (!needs_invalidation) {
+ dma_resv_add_fence(xe_vm_resv(vm), fence, usage);
list_for_each_entry(op, &vops->list, link)
- op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL);
- } else if (ijob && !mjob) {
- dma_resv_add_fence(xe_vm_resv(vm), ifence,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
-
- list_for_each_entry(op, &vops->list, link)
- op_commit(vops->vm, tile, pt_update_ops, op, ifence, NULL);
+ op_commit(vops->vm, op, &fence, 1, usage, tile_mask);
} else {
- dma_resv_add_fence(xe_vm_resv(vm), ifence,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
+ for (i = 0; i < XE_CPU_BIND_INVAL_JOB_COUNT; ++i) {
+ if (!update.ijobs[i])
+ continue;
+
+ ifences[i] = xe_tlb_inval_job_push(update.ijobs[i],
+ fence);
+ xe_assert(xe, !IS_ERR_OR_NULL(ifences[i]));
- dma_resv_add_fence(xe_vm_resv(vm), mfence,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_add_fence(xe_vm_resv(vm), ifences[i], usage);
+ }
list_for_each_entry(op, &vops->list, link)
- op_commit(vops->vm, tile, pt_update_ops, op, ifence,
- mfence);
+ op_commit(vops->vm, op, ifences,
+ XE_CPU_BIND_INVAL_JOB_COUNT, usage,
+ tile_mask);
}
- if (pt_update_ops->needs_svm_lock)
+ if (needs_svm_lock)
xe_svm_notifier_unlock(vm);
/*
@@ -2691,21 +2688,18 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
if (!(q->flags & EXEC_QUEUE_FLAG_MIGRATE))
xe_exec_queue_last_fence_set(q, vm, fence);
- xe_tlb_inval_job_put(mjob);
- xe_tlb_inval_job_put(ijob);
- dma_fence_put(ifence);
- dma_fence_put(mfence);
+ for (i = 0; i < XE_CPU_BIND_INVAL_JOB_COUNT; ++i) {
+ xe_tlb_inval_job_put(update.ijobs[i]);
+ dma_fence_put(ifences[i]);
+ }
return fence;
free_rfence:
kfree(rfence);
free_ijob:
- xe_tlb_inval_job_put(mjob);
- xe_tlb_inval_job_put(ijob);
-kill_vm_tile1:
- if (err != -EAGAIN && err != -ENODATA && tile->id)
- xe_vm_kill(vops->vm, false);
+ for (i = 0; i < XE_CPU_BIND_INVAL_JOB_COUNT; ++i)
+ xe_tlb_inval_job_put(update.ijobs[i]);
return ERR_PTR(err);
}
@@ -2713,52 +2707,65 @@ ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO);
/**
* xe_pt_update_ops_fini() - Finish PT update operations
- * @tile: Tile of PT update operations
+ * @xe: xe device.
* @vops: VMA operations
*
* Finish PT update operations by committing to destroy page table memory
*/
-void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
+void xe_pt_update_ops_fini(struct xe_device *xe, struct xe_vma_ops *vops)
{
- struct xe_vm_pgtable_update_ops *pt_update_ops =
- &vops->pt_update_ops[tile->id];
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[id];
- xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+ if (!pt_update_ops->num_ops)
+ continue;
+
+ xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+ }
}
/**
* xe_pt_update_ops_abort() - Abort PT update operations
- * @tile: Tile of PT update operations
+ * @xe: xe device.
* @vops: VMA operationa
*
* Abort PT update operations by unwinding internal PT state
*/
-void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops)
+void xe_pt_update_ops_abort(struct xe_device *xe, struct xe_vma_ops *vops)
{
- struct xe_vm_pgtable_update_ops *pt_update_ops =
- &vops->pt_update_ops[tile->id];
- int i;
+ struct xe_tile *tile;
+ int id;
lockdep_assert_held(&vops->vm->lock);
xe_vm_assert_held(vops->vm);
- for (i = pt_update_ops->num_ops - 1; i >= 0; --i) {
- struct xe_vm_pgtable_update_op *pt_op =
- to_pt_op(pt_update_ops, i);
-
- if (!pt_op->vma || i >= get_current_op(pt_update_ops))
- continue;
-
- if (pt_op->bind)
- xe_pt_abort_bind(pt_op->vma, pt_op->entries,
- pt_op->num_entries,
- pt_op->rebind);
- else
- xe_pt_abort_unbind(pt_op->vma, pt_op->entries,
- pt_op->num_entries);
+ for_each_tile(tile, xe, id) {
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[id];
+ int i;
+
+ for (i = pt_update_ops->num_ops - 1; i >= 0; --i) {
+ struct xe_vm_pgtable_update_op *pt_op =
+ to_pt_op(pt_update_ops, i);
+
+ if (!pt_op->vma || i >= get_current_op(pt_update_ops))
+ continue;
+
+ if (pt_op->bind)
+ xe_pt_abort_bind(pt_op->vma, pt_op->entries,
+ pt_op->num_entries,
+ pt_op->rebind);
+ else
+ xe_pt_abort_unbind(pt_op->vma, pt_op->entries,
+ pt_op->num_entries);
+ }
}
- xe_pt_update_ops_fini(tile, vops);
+ xe_pt_update_ops_fini(xe, vops);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 5faddb8e700c..cd78141fb81c 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -39,11 +39,11 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt);
-int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops);
-struct dma_fence *xe_pt_update_ops_run(struct xe_tile *tile,
+int xe_pt_update_ops_prepare(struct xe_device *xe, struct xe_vma_ops *vops);
+struct dma_fence *xe_pt_update_ops_run(struct xe_device *xe,
struct xe_vma_ops *vops);
-void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops);
-void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops);
+void xe_pt_update_ops_fini(struct xe_device *xe, struct xe_vma_ops *vops);
+void xe_pt_update_ops_abort(struct xe_device *xe, struct xe_vma_ops *vops);
bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index aa1d7c0e8669..5cdd7cd25a91 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -120,8 +120,6 @@ struct xe_pt_job_ops {
struct xe_vm_pgtable_update_ops {
/** @pt_job_ops: PT update operations dynamic allocation*/
struct xe_pt_job_ops *pt_job_ops;
- /** @q: exec queue for PT operations */
- struct xe_exec_queue *q;
/** @prl: embedded page reclaim list */
struct xe_page_reclaim_list prl;
/** @start: start address of ops */
@@ -134,18 +132,6 @@ struct xe_vm_pgtable_update_ops {
bool needs_svm_lock;
/** @needs_invalidation: Needs invalidation */
bool needs_invalidation;
- /**
- * @wait_vm_bookkeep: PT operations need to wait until VM is idle
- * (bookkeep dma-resv slots are idle) and stage all future VM activity
- * behind these operations (install PT operations into VM kernel
- * dma-resv slot).
- */
- bool wait_vm_bookkeep;
- /**
- * @wait_vm_kernel: PT operations need to wait until VM kernel dma-resv
- * slots are idle.
- */
- bool wait_vm_kernel;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index a8ba7f90368f..3fde9b386bb9 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -73,8 +73,9 @@ static void job_free(struct xe_sched_job *job)
struct xe_exec_queue *q = job->q;
bool is_migration = xe_sched_job_is_migration(q);
- kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
- xe_sched_job_parallel_slab : xe_sched_job_slab, job);
+ kmem_cache_free(job->is_pt_job || xe_exec_queue_is_parallel(job->q) ||
+ is_migration ? xe_sched_job_parallel_slab :
+ xe_sched_job_slab, job);
}
static struct xe_device *job_to_xe(struct xe_sched_job *job)
@@ -124,10 +125,12 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
xe_assert(xe, batch_addr ||
q->flags & (EXEC_QUEUE_FLAG_VM | EXEC_QUEUE_FLAG_MIGRATE));
- job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
+ job = job_alloc(!batch_addr || xe_exec_queue_is_parallel(q) ||
+ is_migration);
if (!job)
return ERR_PTR(-ENOMEM);
+ job->is_pt_job = !batch_addr;
job->q = q;
job->sample_timestamp = U64_MAX;
kref_init(&job->refcount);
@@ -140,7 +143,6 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
if (!batch_addr) {
job->fence = dma_fence_get_stub();
- job->is_pt_job = true;
} else {
for (i = 0; i < q->width; ++i) {
struct dma_fence *fence = xe_lrc_alloc_seqno_fence();
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 9be4e2c5989d..3a797de746ad 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -14,7 +14,7 @@ struct dma_fence;
struct dma_fence_chain;
struct xe_exec_queue;
-struct xe_migrate_pt_update_ops;
+struct xe_cpu_bind_pt_update_ops;
struct xe_pt_job_ops;
struct xe_tile;
struct xe_vm;
@@ -25,12 +25,11 @@ struct xe_vm;
struct xe_pt_update_args {
/** @vm: VM which is being bound */
struct xe_vm *vm;
- /** @tile: Tile which page tables belong to */
- struct xe_tile *tile;
- /** @ops: Migrate PT update ops */
- const struct xe_migrate_pt_update_ops *ops;
+ /** @ops: CPU bind PT update ops */
+ const struct xe_cpu_bind_pt_update_ops *ops;
+#define XE_PT_UPDATE_JOB_OPS_COUNT 2
/** @pt_job_ops: PT job ops state */
- struct xe_pt_job_ops *pt_job_ops;
+ struct xe_pt_job_ops *pt_job_ops[XE_PT_UPDATE_JOB_OPS_COUNT];
};
/**
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
index 81f560068d3c..7378cfe6e855 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -4,6 +4,7 @@
*/
#include "xe_assert.h"
+#include "xe_cpu_bind.h"
#include "xe_dep_job_types.h"
#include "xe_dep_scheduler.h"
#include "xe_exec_queue.h"
@@ -12,7 +13,6 @@
#include "xe_page_reclaim.h"
#include "xe_tlb_inval.h"
#include "xe_tlb_inval_job.h"
-#include "xe_migrate.h"
#include "xe_pm.h"
#include "xe_vm.h"
@@ -218,7 +218,6 @@ int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job)
/**
* xe_tlb_inval_job_push() - TLB invalidation job push
* @job: TLB invalidation job to push
- * @m: The migration object being used
* @fence: Dependency for TLB invalidation job
*
* Pushes a TLB invalidation job for execution, using @fence as a dependency.
@@ -230,11 +229,11 @@ int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job)
* Return: Job's finished fence on success, cannot fail
*/
struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
- struct xe_migrate *m,
struct dma_fence *fence)
{
struct xe_tlb_inval_fence *ifence =
container_of(job->fence, typeof(*ifence), base);
+ struct xe_cpu_bind *cpu_bind = gt_to_xe(job->q->gt)->cpu_bind;
if (!dma_fence_is_signaled(fence)) {
void *ptr;
@@ -258,11 +257,11 @@ struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
job->fence_armed = true;
/*
- * We need the migration lock to protect the job's seqno and the spsc
- * queue, only taken on migration queue, user queues protected dma-resv
+ * We need the cpu_bind lock to protect the job's seqno and the spsc
+ * queue, only taken on cpu_bind queue, user queues protected dma-resv
* VM lock.
*/
- xe_migrate_job_lock(m, job->q);
+ xe_cpu_bind_job_lock(cpu_bind, job->q);
/* Creation ref pairs with put in xe_tlb_inval_job_destroy */
xe_tlb_inval_fence_init(job->tlb_inval, ifence, false);
@@ -281,7 +280,7 @@ struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
&job->dep.drm.s_fence->finished,
job->idx);
- xe_migrate_job_unlock(m, job->q);
+ xe_cpu_bind_job_unlock(cpu_bind, job->q);
/*
* Not using job->fence, as it has its own dma-fence context, which does
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
index 2a4478f529e6..97e032ea21c3 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
@@ -11,7 +11,6 @@
struct dma_fence;
struct xe_dep_scheduler;
struct xe_exec_queue;
-struct xe_migrate;
struct xe_page_reclaim_list;
struct xe_tlb_inval;
struct xe_tlb_inval_job;
@@ -28,7 +27,6 @@ void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
- struct xe_migrate *m,
struct dma_fence *fence);
void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 52212b51caa8..b3928e05b70a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -24,6 +24,7 @@
#include "regs/xe_gtt_defs.h"
#include "xe_assert.h"
#include "xe_bo.h"
+#include "xe_cpu_bind.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
@@ -688,8 +689,6 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
struct xe_vma *vma, *next;
struct xe_vma_ops vops;
struct xe_vma_op *op, *next_op;
- struct xe_tile *tile;
- u8 id;
int err;
lockdep_assert_held(&vm->lock);
@@ -697,12 +696,9 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
list_empty(&vm->rebind_list))
return 0;
- xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
- for_each_tile(tile, vm->xe, id) {
- vops.pt_update_ops[id].wait_vm_bookkeep = true;
- vops.pt_update_ops[id].q =
- xe_migrate_bind_queue(tile->migrate);
- }
+ xe_vma_ops_init(&vops, vm, xe_cpu_bind_queue(vm->xe->cpu_bind),
+ NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_WAIT_VM_BOOKKEEP;
xe_vm_assert_held(vm);
list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
@@ -747,21 +743,16 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
struct dma_fence *fence = NULL;
struct xe_vma_ops vops;
struct xe_vma_op *op, *next_op;
- struct xe_tile *tile;
- u8 id;
int err;
lockdep_assert_held(&vm->lock);
xe_vm_assert_held(vm);
xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
- xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
- vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
- for_each_tile(tile, vm->xe, id) {
- vops.pt_update_ops[id].wait_vm_bookkeep = true;
- vops.pt_update_ops[tile->id].q =
- xe_migrate_bind_queue(tile->migrate);
- }
+ xe_vma_ops_init(&vops, vm, xe_cpu_bind_queue(vm->xe->cpu_bind),
+ NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT |
+ XE_VMA_OPS_FLAG_WAIT_VM_BOOKKEEP;
err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
if (err)
@@ -837,8 +828,6 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
struct dma_fence *fence = NULL;
struct xe_vma_ops vops;
struct xe_vma_op *op, *next_op;
- struct xe_tile *tile;
- u8 id;
int err;
lockdep_assert_held(&vm->lock);
@@ -846,13 +835,10 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
- xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
- vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
- for_each_tile(tile, vm->xe, id) {
- vops.pt_update_ops[id].wait_vm_bookkeep = true;
- vops.pt_update_ops[tile->id].q =
- xe_migrate_bind_queue(tile->migrate);
- }
+ xe_vma_ops_init(&vops, vm, xe_cpu_bind_queue(vm->xe->cpu_bind),
+ NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT |
+ XE_VMA_OPS_FLAG_WAIT_VM_BOOKKEEP;
err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
if (err)
@@ -919,8 +905,6 @@ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
struct dma_fence *fence = NULL;
struct xe_vma_ops vops;
struct xe_vma_op *op, *next_op;
- struct xe_tile *tile;
- u8 id;
int err;
lockdep_assert_held(&vm->lock);
@@ -930,12 +914,9 @@ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
if (!range->tile_present)
return dma_fence_get_stub();
- xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
- for_each_tile(tile, vm->xe, id) {
- vops.pt_update_ops[id].wait_vm_bookkeep = true;
- vops.pt_update_ops[tile->id].q =
- xe_migrate_bind_queue(tile->migrate);
- }
+ xe_vma_ops_init(&vops, vm, xe_cpu_bind_queue(vm->xe->cpu_bind),
+ NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_WAIT_VM_BOOKKEEP;
err = xe_vm_ops_add_range_unbind(&vops, range);
if (err)
@@ -1555,9 +1536,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
init_rwsem(&vm->exec_queues.lock);
xe_vm_init_prove_locking(xe, vm);
-
- for_each_tile(tile, xe, id)
- xe_range_fence_tree_init(&vm->rftree[id]);
+ xe_range_fence_tree_init(&vm->rftree);
vm->pt_ops = &xelp_pt_ops;
@@ -1701,8 +1680,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
}
err_no_resv:
mutex_destroy(&vm->snap_mutex);
- for_each_tile(tile, xe, id)
- xe_range_fence_tree_fini(&vm->rftree[id]);
+ xe_range_fence_tree_fini(&vm->rftree);
ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
if (vm->xef)
xe_file_put(vm->xef);
@@ -1758,10 +1736,8 @@ void xe_vm_close_and_put(struct xe_vm *vm)
{
LIST_HEAD(contested);
struct xe_device *xe = vm->xe;
- struct xe_tile *tile;
struct xe_vma *vma, *next_vma;
struct drm_gpuva *gpuva, *next;
- u8 id;
xe_assert(xe, !vm->preempt.num_exec_queues);
@@ -1851,8 +1827,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
}
up_write(&xe->usm.lock);
- for_each_tile(tile, xe, id)
- xe_range_fence_tree_fini(&vm->rftree[id]);
+ xe_range_fence_tree_fini(&vm->rftree);
xe_vm_put(vm);
}
@@ -3141,23 +3116,16 @@ static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
{
- struct xe_exec_queue *q = vops->q;
struct xe_tile *tile;
int number_tiles = 0;
u8 id;
- for_each_tile(tile, vm->xe, id) {
+ for_each_tile(tile, vm->xe, id)
if (vops->pt_update_ops[id].num_ops)
++number_tiles;
- if (vops->pt_update_ops[id].q)
- continue;
-
- if (q)
- vops->pt_update_ops[id].q = q;
- else
- vops->pt_update_ops[id].q = vm->q;
- }
+ if (!vops->q)
+ vops->q = vm->q;
return number_tiles;
}
@@ -3165,22 +3133,17 @@ static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
static struct dma_fence *ops_execute(struct xe_vm *vm,
struct xe_vma_ops *vops)
{
- struct xe_tile *tile;
+ struct xe_device *xe = vm->xe;
struct dma_fence *fence = NULL;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
- int number_tiles = 0, current_fence = 0, n_fence = 0, err, i;
- u8 id;
+ int current_fence = 0, n_fence = 1, err, i;
- number_tiles = vm_ops_setup_tile_args(vm, vops);
- if (number_tiles == 0)
+ if (!vm_ops_setup_tile_args(vm, vops))
return ERR_PTR(-ENODATA);
- for_each_tile(tile, vm->xe, id)
- ++n_fence;
-
if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) {
- for_each_tlb_inval(vops->pt_update_ops[0].q, i)
+ for_each_tlb_inval(vops->q, i)
++n_fence;
}
@@ -3196,71 +3159,40 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
goto err_out;
}
- for_each_tile(tile, vm->xe, id) {
- if (!vops->pt_update_ops[id].num_ops)
- continue;
-
- err = xe_pt_update_ops_prepare(tile, vops);
- if (err) {
- fence = ERR_PTR(err);
- goto err_out;
- }
+ err = xe_pt_update_ops_prepare(xe, vops);
+ if (err) {
+ fence = ERR_PTR(err);
+ goto err_out;
}
trace_xe_vm_ops_execute(vops);
- for_each_tile(tile, vm->xe, id) {
- struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
-
- fence = NULL;
- if (!vops->pt_update_ops[id].num_ops)
- goto collect_fences;
-
- fence = xe_pt_update_ops_run(tile, vops);
- if (IS_ERR(fence))
- goto err_out;
-
-collect_fences:
- fences[current_fence++] = fence ?: dma_fence_get_stub();
- if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
- continue;
+ fence = xe_pt_update_ops_run(xe, vops);
+ if (IS_ERR(fence))
+ goto err_out;
- xe_migrate_job_lock(tile->migrate, q);
- for_each_tlb_inval(q, i) {
- if (i >= (tile->id + 1) * XE_MAX_GT_PER_TILE ||
- i < tile->id * XE_MAX_GT_PER_TILE)
- continue;
+ fences[current_fence++] = fence;
- fences[current_fence++] = fence ?
- xe_exec_queue_tlb_inval_last_fence_get(q, vm, i) :
- dma_fence_get_stub();
- }
- xe_migrate_job_unlock(tile->migrate, q);
+ if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) {
+ xe_cpu_bind_job_lock(xe->cpu_bind, vops->q);
+ for_each_tlb_inval(vops->q, i)
+ fences[current_fence++] =
+ xe_exec_queue_tlb_inval_last_fence_get(vops->q,
+ vm, i);
+ xe_cpu_bind_job_unlock(xe->cpu_bind, vops->q);
}
- xe_assert(vm->xe, current_fence == n_fence);
+ xe_assert(xe, current_fence == n_fence);
dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
1, false);
fence = &cf->base;
- for_each_tile(tile, vm->xe, id) {
- if (!vops->pt_update_ops[id].num_ops)
- continue;
-
- xe_pt_update_ops_fini(tile, vops);
- }
+ xe_pt_update_ops_fini(xe, vops);
return fence;
err_out:
- for_each_tile(tile, vm->xe, id) {
- if (!vops->pt_update_ops[id].num_ops)
- continue;
-
- xe_pt_update_ops_abort(tile, vops);
- }
- while (current_fence)
- dma_fence_put(fences[--current_fence]);
+ xe_pt_update_ops_abort(xe, vops);
kfree(fences);
kfree(cf);
@@ -3553,6 +3485,8 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
vops->syncs = syncs;
vops->num_syncs = num_syncs;
vops->flags = 0;
+ vops->start = ~0x0ull;
+ vops->last = 0x0ull;
}
static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 2c173550346a..b4593bd3fe58 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -264,7 +264,7 @@ struct xe_vm {
* @rftree: range fence tree to track updates to page table structure.
* Used to implement conflict tracking between independent bind engines.
*/
- struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE];
+ struct xe_range_fence_tree rftree;
const struct xe_pt_ops *pt_ops;
@@ -492,12 +492,20 @@ struct xe_vma_ops {
u32 num_syncs;
/** @pt_update_ops: page table update operations */
struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE];
+ /** @start: start address of ops */
+ u64 start;
+ /** @last: last address of ops */
+ u64 last;
/** @flag: signify the properties within xe_vma_ops*/
-#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0)
-#define XE_VMA_OPS_FLAG_MADVISE BIT(1)
-#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2)
-#define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3)
-#define XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP BIT(4)
+#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0)
+#define XE_VMA_OPS_FLAG_MADVISE BIT(1)
+#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2)
+#define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3)
+#define XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP BIT(4)
+#define XE_VMA_OPS_FLAG_WAIT_VM_BOOKKEEP BIT(5)
+#define XE_VMA_OPS_FLAG_WAIT_VM_KERNEL BIT(6)
+#define XE_VMA_OPS_FLAG_NEEDS_INVALIDATION BIT(7)
+#define XE_VMA_OPS_FLAG_NEEDS_SVM_LOCK BIT(8)
u32 flags;
#ifdef TEST_VM_OPS_ERROR
/** @inject_error: inject error to test error handling */
--
2.34.1
next prev parent reply other threads:[~2026-02-28 1:35 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-28 1:34 [PATCH v3 00/25] CPU binds and ULLS on migration queue Matthew Brost
2026-02-28 1:34 ` [PATCH v3 01/25] drm/xe: Drop struct xe_migrate_pt_update argument from populate/clear vfuns Matthew Brost
2026-03-05 14:17 ` Francois Dugast
2026-02-28 1:34 ` [PATCH v3 02/25] drm/xe: Add xe_migrate_update_pgtables_cpu_execute helper Matthew Brost
2026-03-05 14:39 ` Francois Dugast
2026-02-28 1:34 ` [PATCH v3 03/25] drm/xe: Decouple exec queue idle check from LRC Matthew Brost
2026-03-02 20:50 ` Summers, Stuart
2026-03-02 21:02 ` Matthew Brost
2026-03-03 21:26 ` Summers, Stuart
2026-03-03 22:42 ` Matthew Brost
2026-03-03 22:54 ` Summers, Stuart
2026-02-28 1:34 ` [PATCH v3 04/25] drm/xe: Add job count to GuC exec queue snapshot Matthew Brost
2026-03-02 20:50 ` Summers, Stuart
2026-02-28 1:34 ` [PATCH v3 05/25] drm/xe: Update xe_bo_put_deferred arguments to include writeback flag Matthew Brost
2026-04-01 12:20 ` Francois Dugast
2026-04-01 22:39 ` Matthew Brost
2026-02-28 1:34 ` [PATCH v3 06/25] drm/xe: Add XE_BO_FLAG_PUT_VM_ASYNC Matthew Brost
2026-04-01 12:22 ` Francois Dugast
2026-04-01 22:38 ` Matthew Brost
2026-02-28 1:34 ` [PATCH v3 07/25] drm/xe: Update scheduler job layer to support PT jobs Matthew Brost
2026-03-03 22:50 ` Summers, Stuart
2026-03-03 23:00 ` Matthew Brost
2026-02-28 1:34 ` [PATCH v3 08/25] drm/xe: Add helpers to access PT ops Matthew Brost
2026-04-07 15:22 ` Francois Dugast
2026-02-28 1:34 ` [PATCH v3 09/25] drm/xe: Add struct xe_pt_job_ops Matthew Brost
2026-03-03 23:26 ` Summers, Stuart
2026-03-03 23:28 ` Matthew Brost
2026-02-28 1:34 ` [PATCH v3 10/25] drm/xe: Update GuC submission backend to run PT jobs Matthew Brost
2026-03-03 23:28 ` Summers, Stuart
2026-03-04 0:26 ` Matthew Brost
2026-03-04 20:43 ` Summers, Stuart
2026-03-04 21:53 ` Matthew Brost
2026-03-05 20:24 ` Summers, Stuart
2026-02-28 1:34 ` [PATCH v3 11/25] drm/xe: Store level in struct xe_vm_pgtable_update Matthew Brost
2026-03-03 23:44 ` Summers, Stuart
2026-02-28 1:34 ` [PATCH v3 12/25] drm/xe: Don't use migrate exec queue for page fault binds Matthew Brost
2026-02-28 1:34 ` [PATCH v3 13/25] drm/xe: Enable CPU binds for jobs Matthew Brost
2026-02-28 1:34 ` [PATCH v3 14/25] drm/xe: Remove unused arguments from xe_migrate_pt_update_ops Matthew Brost
2026-02-28 1:34 ` [PATCH v3 15/25] drm/xe: Make bind queues operate cross-tile Matthew Brost
2026-02-28 1:34 ` Matthew Brost [this message]
2026-02-28 1:34 ` [PATCH v3 17/25] drm/xe: Add device flag to enable PT mirroring across tiles Matthew Brost
2026-02-28 1:34 ` [PATCH v3 18/25] drm/xe: Add xe_hw_engine_write_ring_tail Matthew Brost
2026-02-28 1:34 ` [PATCH v3 19/25] drm/xe: Add ULLS support to LRC Matthew Brost
2026-03-05 20:21 ` Francois Dugast
2026-02-28 1:34 ` [PATCH v3 20/25] drm/xe: Add ULLS migration job support to migration layer Matthew Brost
2026-03-05 23:34 ` Summers, Stuart
2026-03-09 23:11 ` Matthew Brost
2026-02-28 1:34 ` [PATCH v3 21/25] drm/xe: Add MI_SEMAPHORE_WAIT instruction defs Matthew Brost
2026-02-28 1:34 ` [PATCH v3 22/25] drm/xe: Add ULLS migration job support to ring ops Matthew Brost
2026-02-28 1:34 ` [PATCH v3 23/25] drm/xe: Add ULLS migration job support to GuC submission Matthew Brost
2026-02-28 1:35 ` [PATCH v3 24/25] drm/xe: Enter ULLS for migration jobs upon page fault or SVM prefetch Matthew Brost
2026-02-28 1:35 ` [PATCH v3 25/25] drm/xe: Add modparam to enable / disable ULLS on migrate queue Matthew Brost
2026-03-05 22:59 ` Summers, Stuart
2026-04-01 22:44 ` Matthew Brost
2026-02-28 1:43 ` ✗ CI.checkpatch: warning for CPU binds and ULLS on migration queue (rev3) Patchwork
2026-02-28 1:44 ` ✓ CI.KUnit: success " Patchwork
2026-02-28 2:32 ` ✓ Xe.CI.BAT: " Patchwork
2026-02-28 13:59 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-03-02 17:54 ` Summers, Stuart
2026-03-02 18:13 ` Matthew Brost
2026-03-05 22:56 ` [PATCH v3 00/25] CPU binds and ULLS on migration queue Summers, Stuart
2026-03-10 22:17 ` Matthew Brost
2026-03-20 15:31 ` Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260228013501.106680-17-matthew.brost@intel.com \
--to=matthew.brost@intel.com \
--cc=arvind.yadav@intel.com \
--cc=francois.dugast@intel.com \
--cc=himal.prasad.ghimiray@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=stuart.summers@intel.com \
--cc=thomas.hellstrom@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox