Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Oak Zeng <oak.zeng@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: himal.prasad.ghimiray@intel.com, krishnaiah.bommu@intel.com,
	matthew.brost@intel.com, Thomas.Hellstrom@linux.intel.com,
	brian.welty@intel.com
Subject: [v2 21/31] drm/xe/svm: Introduce svm migration function
Date: Tue,  9 Apr 2024 16:17:32 -0400	[thread overview]
Message-ID: <20240409201742.3042626-22-oak.zeng@intel.com> (raw)
In-Reply-To: <20240409201742.3042626-1-oak.zeng@intel.com>

Introduce xe_migrate_pa function for data migration.
This function is similar to xe_migrate_copy function
but has different parameters. Instead of BO and ttm
resource parameters, it has source and destination
buffer's physical address as parameter. This function is
intended to be used by svm sub-system which doesn't
have BO and TTM concept.

Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 217 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_migrate.h |   7 ++
 2 files changed, 224 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 82b63bdb9c47..f1d53911253b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -462,6 +462,37 @@ static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur)
 	return cur->size >= size;
 }
 
+/**
+ * pte_update_cmd_size() - calculate the batch buffer command size
+ * to update a flat page table.
+ *
+ * @size: The virtual address range size of the page table to update
+ *
+ * The page table to update is supposed to be a flat 1 level page
+ * table with all entries pointing to 4k pages.
+ *
+ * Return the number of dwords of the update command
+ */
+static u32 pte_update_cmd_size(u64 size)
+{
+	u32 dword;
+	u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+
+	XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
+	/*
+	 * MI_STORE_DATA_IMM command is used to update page table. Each
+	 * instruction can update maximumly 0x1ff pte entries. To update
+	 * n (n <= 0x1ff) pte entries, we need:
+	 * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
+	 * 2 dword for the page table's physical location
+	 * 2*n dword for value of pte to fill (each pte entry is 2 dwords)
+	 */
+	dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff);
+	dword += entries * 2;
+
+	return dword;
+}
+
 static u32 pte_update_size(struct xe_migrate *m,
 			   bool is_vram,
 			   struct ttm_resource *res,
@@ -562,6 +593,48 @@ static void emit_pte(struct xe_migrate *m,
 	}
 }
 
+/**
+ * build_pt_update_batch_sram() - build batch buffer commands to update
+ * migration vm page table for system memory
+ *
+ * @m: The migration context
+ * @bb: The batch buffer which hold the page table update commands
+ * @pt_offset: The offset of page table to update, in byte
+ * @pa: device physical address you want the page table to point to
+ * @size: size of the virtual address space you want the page table to cover
+ */
+static void build_pt_update_batch_sram(struct xe_migrate *m,
+		     struct xe_bb *bb, u32 pt_offset,
+		     u64 pa, u32 size)
+{
+	u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
+	u32 ptes;
+
+	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+	while (ptes) {
+		u32 chunk = min(0x1ffU, ptes);
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+		bb->cs[bb->len++] = pt_offset;
+		bb->cs[bb->len++] = 0;
+
+		pt_offset += chunk * 8;
+		ptes -= chunk;
+
+		while (chunk--) {
+			u64 addr;
+
+			addr = pa & PAGE_MASK;
+			addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
+								 addr, pat_index,
+								 0, false, 0);
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+			pa += XE_PAGE_SIZE;
+		}
+	}
+}
+
 #define EMIT_COPY_CCS_DW 5
 static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 			  u64 dst_ofs, bool dst_is_indirect,
@@ -879,6 +952,150 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 	return fence;
 }
 
+/**
+ * xe_migrate_pa() - Migrate buffers with src and dst physical address
+ *
+ * @m: The migration context
+ * @src_pa: physical address of source, from GPU's point of view. This is a
+ * device physical address (dpa) when source is in vram. When source is in
+ * system memory, this is a dma mapped host physical address
+ * @src_is_vram: True if source buffer is in vram.
+ * @dst_pa: physical address of destination, from GPU's point of view. This is a
+ * device physical address (dpa) when source is in vram. When source is in
+ * system memory, this is a dma mapped host physical address
+ * @dst_is_vram: True if destination buffer is in vram.
+ * @size: The size of data to copy.
+ *
+ * Copy @size bytes of data from @src_pa to @dst_pa. The functionality
+ * and behavior of this function is similar to xe_migrate_copy function, but
+ * the interface is different. This function is a helper function supposed to
+ * be used by SVM subsytem. Since in SVM subsystem there is no buffer object
+ * and ttm, there is no src/dst bo as function input. Instead, we directly use
+ * src/dst's physical address as function input.
+ *
+ * Since the back store of any user malloc'ed or mmap'ed memory can be placed in
+ * system  memory, it can not be compressed. Thus this function doesn't need
+ * to consider copy CCS (compression control surface) data as xe_migrate_copy did.
+ *
+ * This function assumes the source buffer and destination buffer are all physically
+ * contiguous.
+ *
+ * We use gpu blitter to copy data. Source and destination are first mapped to
+ * migration vm which is a flat one level (L0) page table, then blitter is used to
+ * perform the copy.
+ *
+ * Return: Pointer to a dma_fence representing the last copy batch, or
+ * an error pointer on failure. If there is a failure, any copy operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_pa(struct xe_migrate *m,
+				  u64 src_pa,
+				  bool src_is_vram,
+				  u64 dst_pa,
+				  bool dst_is_vram,
+				  u64 size)
+{
+#define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M)
+	struct xe_gt *gt = m->tile->primary_gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u64 round_update_size;
+	/* A slot is a 4K page of page table, covers 2M virtual address*/
+	u32 pt_slot;
+	int err;
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 update_idx;
+
+		/* Maximumly copy MAX_PREEMPTDISABLE_TRANSFER bytes. Why?*/
+		round_update_size = min_t(u64, size, MAX_PREEMPTDISABLE_TRANSFER);
+
+		/* src pte update*/
+		if (!src_is_vram)
+			batch_size += pte_update_cmd_size(round_update_size);
+		/* dst pte update*/
+		if (!dst_is_vram)
+			batch_size += pte_update_cmd_size(round_update_size);
+
+		/* Copy command size*/
+		batch_size += EMIT_COPY_DW;
+
+		bb = xe_bb_new(gt, batch_size, true);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (!src_is_vram) {
+			pt_slot = 0;
+			build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
+					src_pa, round_update_size);
+			src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0);
+		}
+		else
+			src_L0_ofs = xe_migrate_vram_ofs(xe, src_pa);
+
+		if (!dst_is_vram) {
+			pt_slot = NUM_PT_PER_BLIT;
+			build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
+					dst_pa, round_update_size);
+			dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0);
+		}
+		else
+			dst_L0_ofs = xe_migrate_vram_ofs(xe, dst_pa);
+
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, round_update_size,
+			  XE_PAGE_SIZE);
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, true),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, 0);
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= round_update_size;
+		src_pa += round_update_size;
+		dst_pa += round_update_size;
+		continue;
+
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		/* Sync partial copy if any. FIXME: under job_mutex? */
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
 static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
 				 u32 size, u32 pitch)
 {
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 701bb27349b0..98b480244265 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -101,6 +101,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct ttm_resource *dst,
 				  bool copy_only_ccs);
 
+struct dma_fence *xe_migrate_pa(struct xe_migrate *m,
+				  u64 src_pa,
+				  bool src_is_vram,
+				  u64 dst_pa,
+				  bool dst_is_vram,
+				  u64 size);
+
 struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 				   struct xe_bo *bo,
 				   struct ttm_resource *dst);
-- 
2.26.3


  parent reply	other threads:[~2024-04-09 20:05 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-09 20:17 [v2 00/31] Basic system allocator support in xe driver Oak Zeng
2024-04-09 20:17 ` [v2 01/31] drm/xe: Refactor vm_bind Oak Zeng
2024-04-09 20:17 ` [v2 02/31] drm/xe/svm: Add SVM document Oak Zeng
2024-04-09 20:17 ` [v2 03/31] drm/xe: Invalidate userptr VMA on page pin fault Oak Zeng
2024-04-09 20:17 ` [v2 04/31] drm/xe: Drop unused arguments from vm_bind_ioctl_ops_parse Oak Zeng
2024-04-09 20:17 ` [v2 05/31] drm/xe: Fix op->tile_mask for fault mode Oak Zeng
2024-04-09 20:17 ` [v2 06/31] drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR flag Oak Zeng
2024-04-09 20:17 ` [v2 07/31] drm/xe: Create userptr if page fault occurs on system_allocator VMA Oak Zeng
2024-04-09 20:17 ` [v2 08/31] drm/xe: Add faulted userptr VMA garbage collector Oak Zeng
2024-04-09 20:17 ` [v2 09/31] drm/xe: Introduce helper to populate userptr Oak Zeng
2024-04-09 20:17 ` [v2 10/31] drm/xe: Introduce a helper to free sg table Oak Zeng
2024-04-09 20:17 ` [v2 11/31] drm/xe: Use hmm_range_fault to populate user pages Oak Zeng
2024-04-09 20:17 ` [v2 12/31] drm/xe/svm: Remap and provide memmap backing for GPU vram Oak Zeng
2024-04-10 21:09   ` Matthew Brost
2024-04-16 19:01   ` Matthew Brost
2024-04-09 20:17 ` [v2 13/31] drm/xe/svm: Introduce DRM_XE_SVM kernel config Oak Zeng
2024-04-10 21:13   ` Matthew Brost
2024-06-04 18:57     ` Zeng, Oak
2024-04-09 20:17 ` [v2 14/31] drm/xe: Introduce helper to get tile from memory region Oak Zeng
2024-04-10 21:17   ` Matthew Brost
2024-04-09 20:17 ` [v2 15/31] drm/xe: Introduce a helper to get dpa from pfn Oak Zeng
2024-04-10 21:35   ` Matthew Brost
2024-04-09 20:17 ` [v2 16/31] drm/xe/svm: Get xe memory region from page Oak Zeng
2024-04-10 21:38   ` Matthew Brost
2024-04-09 20:17 ` [v2 17/31] drm/xe: Get xe_vma from xe_userptr Oak Zeng
2024-04-10 21:42   ` Matthew Brost
2024-04-09 20:17 ` [v2 18/31] drm/xe/svm: Build userptr sg table for device pages Oak Zeng
2024-04-10 21:52   ` Matthew Brost
2024-04-09 20:17 ` [v2 19/31] drm/xe/svm: Determine a vma is backed by device memory Oak Zeng
2024-04-10 21:56   ` Matthew Brost
2024-06-05  2:29     ` Zeng, Oak
2024-04-09 20:17 ` [v2 20/31] drm/xe: add xe lock document Oak Zeng
2024-04-09 20:17 ` Oak Zeng [this message]
2024-04-10 22:06   ` [v2 21/31] drm/xe/svm: Introduce svm migration function Matthew Brost
2024-04-09 20:17 ` [v2 22/31] drm/xe/svm: implement functions to allocate and free device memory Oak Zeng
2024-04-10 22:23   ` Matthew Brost
2024-04-15 20:13     ` Zeng, Oak
2024-04-15 21:19       ` Matthew Brost
2024-06-05 22:16     ` Zeng, Oak
2024-06-05 23:37       ` Matthew Brost
2024-06-06  3:30         ` Zeng, Oak
2024-06-06  4:44           ` Matthew Brost
2024-04-17 20:55   ` Matthew Brost
2024-04-09 20:17 ` [v2 23/31] drm/xe/svm: Trace buddy block allocation and free Oak Zeng
2024-04-09 20:17 ` [v2 24/31] drm/xe/svm: Create and destroy xe svm Oak Zeng
2024-04-10 22:25   ` Matthew Brost
2024-04-09 20:17 ` [v2 25/31] drm/xe/svm: Add vm to xe_svm process Oak Zeng
2024-04-09 20:17 ` [v2 26/31] drm/xe: Make function lookup_vma public Oak Zeng
2024-04-10 22:26   ` Matthew Brost
2024-04-09 20:17 ` [v2 27/31] drm/xe/svm: Handle CPU page fault Oak Zeng
2024-04-11  2:07   ` Matthew Brost
2024-04-12 17:24     ` Zeng, Oak
2024-04-12 18:10       ` Matthew Brost
2024-04-12 18:39         ` Zeng, Oak
2024-06-07  4:44         ` Zeng, Oak
2024-06-07  4:30     ` Zeng, Oak
2024-04-09 20:17 ` [v2 28/31] drm/xe/svm: Introduce helper to migrate vma to vram Oak Zeng
2024-04-11  2:49   ` Matthew Brost
2024-04-12 21:21     ` Zeng, Oak
2024-04-15 19:40       ` Matthew Brost
2024-06-07 17:12         ` Zeng, Oak
2024-06-07 17:56           ` Matthew Brost
2024-06-07 18:10             ` Matthew Brost
2024-04-09 20:17 ` [v2 29/31] drm/xe/svm: trace svm migration Oak Zeng
2024-04-09 20:17 ` [v2 30/31] drm/xe/svm: Add a helper to determine a vma is fault userptr Oak Zeng
2024-04-11  2:50   ` Matthew Brost
2024-04-09 20:17 ` [v2 31/31] drm/xe/svm: Migration from sram to vram for system allocator Oak Zeng
2024-04-11  2:55   ` Matthew Brost
2024-06-07 17:22     ` Zeng, Oak
2024-06-07 18:18       ` Matthew Brost
2024-06-07 18:23         ` Matthew Brost
2024-04-09 20:52 ` ✗ CI.Patch_applied: failure for Basic system allocator support in xe driver Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240409201742.3042626-22-oak.zeng@intel.com \
    --to=oak.zeng@intel.com \
    --cc=Thomas.Hellstrom@linux.intel.com \
    --cc=brian.welty@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=krishnaiah.bommu@intel.com \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox