Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: himal.prasad.ghimiray@intel.com, apopple@nvidia.com,
	airlied@gmail.com, thomas.hellstrom@linux.intel.com,
	simona.vetter@ffwll.ch, felix.kuehling@amd.com, dakr@kernel.org
Subject: [PATCH v5 20/32] drm/xe: Add migrate layer functions for SVM support
Date: Wed, 12 Feb 2025 18:11:00 -0800	[thread overview]
Message-ID: <20250213021112.1228481-21-matthew.brost@intel.com> (raw)
In-Reply-To: <20250213021112.1228481-1-matthew.brost@intel.com>

Add functions which migrate to / from VRAM accepting a single DPA
argument (VRAM) and array of dma addresses (SRAM). Used for SVM
migrations.

v2:
 - Don't unlock job_mutex in error path of xe_migrate_vram
v3:
 - Kernel doc (Thomas)
 - Better commit message (Thomas)
 - s/dword/num_dword (Thomas)
 - Return error on to large of migration (Thomas)

Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 175 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_migrate.h |  10 ++
 2 files changed, 185 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 278bc96cf593..df4282c71bf0 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1544,6 +1544,181 @@ void xe_migrate_wait(struct xe_migrate *m)
 		dma_fence_wait(m->fence, false);
 }
 
+static u32 pte_update_cmd_size(u64 size)
+{
+	u32 num_dword;
+	u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+
+	XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
+	/*
+	 * MI_STORE_DATA_IMM command is used to update page table. Each
+	 * instruction can update maximumly 0x1ff pte entries. To update
+	 * n (n <= 0x1ff) pte entries, we need:
+	 * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
+	 * 2 dword for the page table's physical location
+	 * 2*n dword for value of pte to fill (each pte entry is 2 dwords)
+	 */
+	num_dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff);
+	num_dword += entries * 2;
+
+	return num_dword;
+}
+
+static void build_pt_update_batch_sram(struct xe_migrate *m,
+				       struct xe_bb *bb, u32 pt_offset,
+				       dma_addr_t *sram_addr, u32 size)
+{
+	u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
+	u32 ptes;
+	int i = 0;
+
+	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+	while (ptes) {
+		u32 chunk = min(0x1ffU, ptes);
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+		bb->cs[bb->len++] = pt_offset;
+		bb->cs[bb->len++] = 0;
+
+		pt_offset += chunk * 8;
+		ptes -= chunk;
+
+		while (chunk--) {
+			u64 addr = sram_addr[i++] & PAGE_MASK;
+
+			xe_tile_assert(m->tile, addr);
+			addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
+								 addr, pat_index,
+								 0, false, 0);
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+		}
+	}
+}
+
+enum xe_migrate_copy_dir {
+	XE_MIGRATE_COPY_TO_VRAM,
+	XE_MIGRATE_COPY_TO_SRAM,
+};
+
+static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
+					 unsigned long npages,
+					 dma_addr_t *sram_addr, u64 vram_addr,
+					 const enum xe_migrate_copy_dir dir)
+{
+	struct xe_gt *gt = m->tile->primary_gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u32 batch_size = 2;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u64 round_update_size;
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	u32 update_idx, pt_slot = 0;
+	int err;
+
+	if (npages * PAGE_SIZE > MAX_PREEMPTDISABLE_TRANSFER)
+		return ERR_PTR(-EINVAL);
+
+	round_update_size = npages * PAGE_SIZE;
+	batch_size += pte_update_cmd_size(round_update_size);
+	batch_size += EMIT_COPY_DW;
+
+	bb = xe_bb_new(gt, batch_size, true);
+	if (IS_ERR(bb)) {
+		err = PTR_ERR(bb);
+		return ERR_PTR(err);
+	}
+
+	build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
+				   sram_addr, round_update_size);
+
+	if (dir == XE_MIGRATE_COPY_TO_VRAM) {
+		src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0);
+		dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
+
+	} else {
+		src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
+		dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0);
+	}
+
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+	update_idx = bb->len;
+
+	emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, round_update_size,
+		  XE_PAGE_SIZE);
+
+	job = xe_bb_create_migration_job(m->q, bb,
+					 xe_migrate_batch_base(m, true),
+					 update_idx);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err;
+	}
+
+	xe_sched_job_add_migrate_flush(job, 0);
+
+	mutex_lock(&m->job_mutex);
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	dma_fence_put(m->fence);
+	m->fence = dma_fence_get(fence);
+	mutex_unlock(&m->job_mutex);
+
+	xe_bb_free(bb, fence);
+
+	return fence;
+
+err:
+	xe_bb_free(bb, NULL);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_migrate_to_vram() - Migrate to VRAM
+ * @m: The migration context.
+ * @npages: Number of pages to migrate.
+ * @src_addr: Array of dma addresses (source of migrate)
+ * @dst_addr: Device physical address of VRAM (destination of migrate)
+ *
+ * Copy from an array dma addresses to a VRAM device physical address
+ *
+ * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
+ * failure
+ */
+struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
+				     unsigned long npages,
+				     dma_addr_t *src_addr,
+				     u64 dst_addr)
+{
+	return xe_migrate_vram(m, npages, src_addr, dst_addr,
+			       XE_MIGRATE_COPY_TO_VRAM);
+}
+
+/**
+ * xe_migrate_from_vram() - Migrate from VRAM
+ * @m: The migration context.
+ * @npages: Number of pages to migrate.
+ * @src_addr: Device physical address of VRAM (source of migrate)
+ * @dst_addr: Array of dma addresses (destination of migrate)
+ *
+ * Copy from a VRAM device physical address to an array dma addresses
+ *
+ * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
+ * failure
+ */
+struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
+				       unsigned long npages,
+				       u64 src_addr,
+				       dma_addr_t *dst_addr)
+{
+	return xe_migrate_vram(m, npages, dst_addr, src_addr,
+			       XE_MIGRATE_COPY_TO_SRAM);
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 #include "tests/xe_migrate.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 0109866e398a..6ff9a963425c 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -95,6 +95,16 @@ struct xe_migrate_pt_update {
 
 struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
 
+struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
+				     unsigned long npages,
+				     dma_addr_t *src_addr,
+				     u64 dst_addr);
+
+struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
+				       unsigned long npages,
+				       u64 src_addr,
+				       dma_addr_t *dst_addr);
+
 struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct xe_bo *src_bo,
 				  struct xe_bo *dst_bo,
-- 
2.34.1


  parent reply	other threads:[~2025-02-13  2:10 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-13  2:10 [PATCH v5 00/32] Introduce GPU SVM and Xe SVM implementation Matthew Brost
2025-02-13  2:10 ` [PATCH v5 01/32] drm/xe: Retry BO allocation Matthew Brost
2025-02-13  2:10 ` [PATCH v5 02/32] mm/migrate: Add migrate_device_pfns Matthew Brost
2025-02-13  2:10 ` [PATCH v5 03/32] mm/migrate: Trylock device page in do_swap_page Matthew Brost
2025-02-19  5:36   ` Alistair Popple
2025-02-19  6:08     ` Matthew Brost
2025-02-19  6:25       ` Alistair Popple
2025-02-20 13:28   ` Gwan-gyeong Mun
2025-02-20 20:03     ` Matthew Brost
2025-02-13  2:10 ` [PATCH v5 04/32] drm/pagemap: Add DRM pagemap Matthew Brost
2025-02-20 13:53   ` Gwan-gyeong Mun
2025-02-13  2:10 ` [PATCH v5 05/32] drm/xe/bo: Introduce xe_bo_put_async Matthew Brost
2025-02-14  9:52   ` Ghimiray, Himal Prasad
2025-02-20 14:33   ` Gwan-gyeong Mun
2025-02-13  2:10 ` [PATCH v5 06/32] drm/gpusvm: Add support for GPU Shared Virtual Memory Matthew Brost
2025-02-19  8:59   ` Thomas Hellström
2025-02-13  2:10 ` [PATCH v5 07/32] drm/xe: Select DRM_GPUSVM Kconfig Matthew Brost
2025-02-13  2:10 ` [PATCH v5 08/32] drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR Matthew Brost
2025-02-13  2:10 ` [PATCH v5 09/32] drm/xe: Add SVM init / close / fini to faulting VMs Matthew Brost
2025-02-13  2:10 ` [PATCH v5 10/32] drm/xe: Add dma_addr res cursor Matthew Brost
2025-02-13  2:10 ` [PATCH v5 11/32] drm/xe: Nuke VM's mapping upon close Matthew Brost
2025-02-13  2:10 ` [PATCH v5 12/32] drm/xe: Add SVM range invalidation and page fault Matthew Brost
2025-02-13 10:05   ` Ghimiray, Himal Prasad
2025-02-13  2:10 ` [PATCH v5 13/32] drm/gpuvm: Add DRM_GPUVA_OP_DRIVER Matthew Brost
2025-02-13  2:10 ` [PATCH v5 14/32] drm/xe: Add (re)bind to SVM page fault handler Matthew Brost
2025-02-13  2:10 ` [PATCH v5 15/32] drm/xe: Add SVM garbage collector Matthew Brost
2025-02-13 10:07   ` Ghimiray, Himal Prasad
2025-02-13  2:10 ` [PATCH v5 16/32] drm/xe: Add unbind to " Matthew Brost
2025-02-19 15:05   ` Thomas Hellström
2025-02-13  2:10 ` [PATCH v5 17/32] drm/xe: Do not allow CPU address mirror VMA unbind if the GPU has bindings Matthew Brost
2025-02-13 11:28   ` Ghimiray, Himal Prasad
2025-02-13  2:10 ` [PATCH v5 18/32] drm/xe: Enable CPU address mirror uAPI Matthew Brost
2025-02-13 11:26   ` Ghimiray, Himal Prasad
2025-02-13  2:10 ` [PATCH v5 19/32] drm/xe/uapi: Add DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR Matthew Brost
2025-02-13  2:11 ` Matthew Brost [this message]
2025-02-13  2:11 ` [PATCH v5 21/32] drm/xe: Add SVM device memory mirroring Matthew Brost
2025-02-13 11:28   ` Ghimiray, Himal Prasad
2025-02-13  2:11 ` [PATCH v5 22/32] drm/xe: Add drm_gpusvm_devmem to xe_bo Matthew Brost
2025-02-13 11:29   ` Ghimiray, Himal Prasad
2025-02-13  2:11 ` [PATCH v5 23/32] drm/xe: Add drm_pagemap ops to SVM Matthew Brost
2025-02-13  2:11 ` [PATCH v5 24/32] drm/xe: Add GPUSVM device memory copy vfunc functions Matthew Brost
2025-02-13  2:11 ` [PATCH v5 25/32] drm/xe: Add Xe SVM populate_devmem_pfn GPU SVM vfunc Matthew Brost
2025-02-13  2:11 ` [PATCH v5 26/32] drm/xe: Add Xe SVM devmem_release " Matthew Brost
2025-02-13 18:29   ` Ghimiray, Himal Prasad
2025-02-13  2:11 ` [PATCH v5 27/32] drm/xe: Add SVM VRAM migration Matthew Brost
2025-02-13 18:28   ` Ghimiray, Himal Prasad
2025-02-18 21:54     ` Matthew Brost
2025-02-19  2:59       ` Ghimiray, Himal Prasad
2025-02-19  3:05         ` Matthew Brost
2025-02-19  3:40           ` Ghimiray, Himal Prasad
2025-02-19 10:30   ` Thomas Hellström
2025-02-19 17:38     ` Matthew Brost
2025-02-20 15:53   ` Matthew Auld
2025-02-20 15:59     ` Thomas Hellström
2025-02-20 19:55       ` Matthew Brost
2025-02-21 15:15         ` Matthew Auld
2025-02-21 15:22           ` Matthew Brost
2025-02-13  2:11 ` [PATCH v5 28/32] drm/xe: Basic SVM BO eviction Matthew Brost
2025-02-13  2:11 ` [PATCH v5 29/32] drm/xe: Add SVM debug Matthew Brost
2025-02-13 11:30   ` Ghimiray, Himal Prasad
2025-02-13  2:11 ` [PATCH v5 30/32] drm/xe: Add modparam for SVM notifier size Matthew Brost
2025-02-13 11:31   ` Ghimiray, Himal Prasad
2025-02-13  2:11 ` [PATCH v5 31/32] drm/xe: Add always_migrate_to_vram modparam Matthew Brost
2025-02-13 11:31   ` Ghimiray, Himal Prasad
2025-02-13  2:11 ` [PATCH v5 32/32] drm/doc: gpusvm: Add GPU SVM documentation Matthew Brost
2025-02-13  3:35 ` ✓ CI.Patch_applied: success for Introduce GPU SVM and Xe SVM implementation (rev5) Patchwork
2025-02-13  3:36 ` ✗ CI.checkpatch: warning " Patchwork
2025-02-13  3:37 ` ✗ CI.KUnit: failure " Patchwork
2025-02-13 21:23 ` [PATCH v5 00/32] Introduce GPU SVM and Xe SVM implementation Demi Marie Obenour
2025-02-14  8:47   ` Thomas Hellström
2025-02-14  9:07     ` Ghimiray, Himal Prasad
2025-02-14  9:10       ` Ghimiray, Himal Prasad
2025-02-14 16:14     ` Demi Marie Obenour
2025-02-14 16:26       ` Thomas Hellström
2025-02-14 18:36         ` Demi Marie Obenour

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250213021112.1228481-21-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=airlied@gmail.com \
    --cc=apopple@nvidia.com \
    --cc=dakr@kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=felix.kuehling@amd.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=simona.vetter@ffwll.ch \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox