[PATCH v1 1/3] drm/xe/migrate: Add function for raw copy of VRAM and CCS

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Lukasz Laguna <lukasz.laguna@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: michal.winiarski@intel.com, michal.wajdeczko@intel.com,
	lukasz.laguna@intel.com
Subject: [PATCH v1 1/3] drm/xe/migrate: Add function for raw copy of VRAM and CCS
Date: Wed, 16 Oct 2024 11:57:43 +0200	[thread overview]
Message-ID: <20241016095745.7477-2-lukasz.laguna@intel.com> (raw)
In-Reply-To: <20241016095745.7477-1-lukasz.laguna@intel.com>

Add support for copying chunks of data between VRAM and sysmem objects.
Additionally, allow to copy corresponding CCS metadata from or to
dedicated buffer object.

Signed-off-by: Lukasz Laguna <lukasz.laguna@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 208 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_migrate.h |   5 +
 2 files changed, 213 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index cfd31ae49cc1..e4770f44582d 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -936,6 +936,214 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 	return fence;
 }
 
+/**
+ * xe_migrate_raw_vram_copy() - Raw copy of VRAM object and corresponding CCS.
+ * @vram_bo: The VRAM buffer object.
+ * @vram_offset: The VRAM offset.
+ * @sysmem_bo: The sysmem buffer object. If copying only CCS metadata set this
+ * to NULL.
+ * @sysmem_offset: The sysmem offset.
+ * @ccs_bo: The CCS buffer object located in sysmem. If copying of CCS metadata
+ * is not needed set this to NULL.
+ * @ccs_offset: The CCS offset.
+ * @size: The size of VRAM chunk to copy.
+ * @to_sysmem: True to copy from VRAM to sysmem, false for opposite direction.
+ *
+ * Copies the content of buffer object from or to VRAM. If supported and
+ * needed, it also copies corresponding CCS metadata.
+ *
+ * Return: Pointer to a dma_fence representing the last copy batch, or
+ * an error pointer on failure. If there is a failure, any copy operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_raw_vram_copy(struct xe_bo *vram_bo, u64 vram_offset,
+					   struct xe_bo *sysmem_bo, u64 sysmem_offset,
+					   struct xe_bo *ccs_bo, u64 ccs_offset,
+					   u64 size, bool to_sysmem)
+{
+	struct xe_device *xe = xe_bo_device(vram_bo);
+	struct xe_tile *tile = vram_bo->tile;
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_migrate *m = tile->migrate;
+	struct dma_fence *fence = NULL;
+	struct ttm_resource *vram = vram_bo->ttm.resource, *sysmem, *ccs;
+	struct xe_res_cursor vram_it, sysmem_it, ccs_it;
+	u64 vram_L0_ofs, sysmem_L0_ofs;
+	u32 vram_L0_pt, sysmem_L0_pt;
+	u64 vram_L0, sysmem_L0;
+	bool copy_content = sysmem_bo ? true : false;
+	bool copy_ccs = ccs_bo ? true : false;
+	int pass = 0;
+	int err;
+
+	if (!copy_content && !copy_ccs)
+		return ERR_PTR(-EINVAL);
+
+	if (!IS_ALIGNED(vram_offset | sysmem_offset | ccs_offset | size, PAGE_SIZE))
+		return ERR_PTR(-EINVAL);
+
+	if (!xe_bo_is_vram(vram_bo))
+		return ERR_PTR(-EINVAL);
+
+	if (range_overflows(vram_offset, size, (u64)vram_bo->ttm.base.size))
+		return ERR_PTR(-EOVERFLOW);
+
+	if (copy_content) {
+		if (xe_bo_is_vram(sysmem_bo))
+			return ERR_PTR(-EINVAL);
+		if (range_overflows(sysmem_offset, size, (u64)sysmem_bo->ttm.base.size))
+			return ERR_PTR(-EOVERFLOW);
+	}
+
+	if (copy_ccs) {
+		if (xe_bo_is_vram(ccs_bo))
+			return ERR_PTR(-EINVAL);
+		if (!xe_device_has_flat_ccs(xe))
+			return ERR_PTR(-EOPNOTSUPP);
+		if (ccs_bo->ttm.base.size < xe_device_ccs_bytes(xe, size))
+			return ERR_PTR(-EINVAL);
+		if (range_overflows(ccs_offset, (u64)xe_device_ccs_bytes(xe, size),
+				    (u64)ccs_bo->ttm.base.size))
+			return ERR_PTR(-EOVERFLOW);
+	}
+
+	xe_res_first(vram, vram_offset, size, &vram_it);
+
+	if (copy_content) {
+		sysmem = sysmem_bo->ttm.resource;
+		xe_res_first_sg(xe_bo_sg(sysmem_bo), sysmem_offset, size, &sysmem_it);
+	}
+
+	if (copy_ccs) {
+		ccs = ccs_bo->ttm.resource;
+		xe_res_first_sg(xe_bo_sg(ccs_bo), ccs_offset, xe_device_ccs_bytes(xe, size),
+				&ccs_it);
+	}
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags = 0;
+		u32 update_idx;
+		u64 ccs_ofs, ccs_size;
+		u32 ccs_pt;
+
+		bool usm = xe->info.has_usm;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		vram_L0 = xe_migrate_res_sizes(m, &vram_it);
+
+		if (copy_content) {
+			sysmem_L0 = xe_migrate_res_sizes(m, &sysmem_it);
+			vram_L0 = min(vram_L0, sysmem_L0);
+		}
+
+		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, vram_L0);
+
+		batch_size += pte_update_size(m, PTE_UPDATE_FLAG_IS_VRAM, vram, &vram_it, &vram_L0,
+					      &vram_L0_ofs, &vram_L0_pt, 0, 0, avail_pts);
+		if (copy_content) {
+			batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0,
+						      &sysmem_L0_ofs, &sysmem_L0_pt, 0, avail_pts,
+						      avail_pts);
+		}
+
+		if (copy_ccs) {
+			ccs_size = xe_device_ccs_bytes(xe, vram_L0);
+			batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs,
+						      &ccs_pt, 0, copy_content ? 2 * avail_pts :
+						      avail_pts, avail_pts);
+			xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
+		}
+
+		batch_size += copy_content ? EMIT_COPY_DW : 0;
+		batch_size += copy_ccs ? EMIT_COPY_CCS_DW : 0;
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (xe_migrate_allow_identity(vram_L0, &vram_it))
+			xe_res_next(&vram_it, vram_L0);
+		else
+			emit_pte(m, bb, vram_L0_pt, true, false, &vram_it, vram_L0, vram);
+
+		if (copy_content)
+			emit_pte(m, bb, sysmem_L0_pt, false, false, &sysmem_it, vram_L0, sysmem);
+
+		if (copy_ccs)
+			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, ccs);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		if (copy_content)
+			emit_copy(gt, bb, to_sysmem ? vram_L0_ofs : sysmem_L0_ofs, to_sysmem ?
+				  sysmem_L0_ofs : vram_L0_ofs, vram_L0, XE_PAGE_SIZE);
+
+		if (copy_ccs) {
+			emit_copy_ccs(gt, bb, to_sysmem ? ccs_ofs : vram_L0_ofs, !to_sysmem,
+				      to_sysmem ? vram_L0_ofs : ccs_ofs, to_sysmem, vram_L0);
+			flush_flags = to_sysmem ? 0 : MI_FLUSH_DW_CCS;
+		}
+
+		job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+		if (!fence) {
+			err = xe_sched_job_add_deps(job, vram_bo->ttm.base.resv,
+						    DMA_RESV_USAGE_BOOKKEEP);
+			if (!err && copy_content)
+				err = xe_sched_job_add_deps(job, sysmem_bo->ttm.base.resv,
+							    DMA_RESV_USAGE_BOOKKEEP);
+			if (!err && copy_ccs)
+				err = xe_sched_job_add_deps(job, ccs_bo->ttm.base.resv,
+							    DMA_RESV_USAGE_BOOKKEEP);
+			if (err)
+				goto err_job;
+		}
+
+		mutex_lock(&m->job_mutex);
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= vram_L0;
+		continue;
+
+err_job:
+		xe_sched_job_put(job);
+err:
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		/* Sync partial copy if any. FIXME: under job_mutex? */
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
 static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
 				 u32 size, u32 pitch)
 {
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 0109866e398a..5cdb26ba6493 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -102,6 +102,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct ttm_resource *dst,
 				  bool copy_only_ccs);
 
+struct dma_fence *xe_migrate_raw_vram_copy(struct xe_bo *vram_bo, u64 vram_offset,
+					   struct xe_bo *sysmem_bo, u64 sysmem_offset,
+					   struct xe_bo *ccs_bo, u64 ccs_offset,
+					   u64 size, bool to_sysmem);
+
 #define XE_MIGRATE_CLEAR_FLAG_BO_DATA		BIT(0)
 #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA		BIT(1)
 #define XE_MIGRATE_CLEAR_FLAG_FULL	(XE_MIGRATE_CLEAR_FLAG_BO_DATA | \
-- 
2.40.0

next prev parent reply	other threads:[~2024-10-16  9:58 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-16  9:57 [PATCH v1 0/3] PF: Add functions to save and restore VF LMEM and CCS data Lukasz Laguna
2024-10-16  9:57 ` Lukasz Laguna [this message]
2024-10-16 12:54   ` [PATCH v1 1/3] drm/xe/migrate: Add function for raw copy of VRAM and CCS Nirmoy Das
2024-10-30 10:38     ` Laguna, Lukasz
2024-10-16  9:57 ` [PATCH v1 2/3] drm/xe/bo: Add trylock helper for buffer objects Lukasz Laguna
2024-10-16  9:57 ` [PATCH v1 3/3] drm/xe/pf: Add functions to save and restore VF LMEM and CCS data Lukasz Laguna
2024-10-17 11:12   ` Michal Wajdeczko
2024-10-30 10:36     ` Laguna, Lukasz
2024-10-16 12:43 ` ✓ CI.Patch_applied: success for PF: " Patchwork
2024-10-16 12:43 ` ✗ CI.checkpatch: warning " Patchwork
2024-10-16 12:44 ` ✓ CI.KUnit: success " Patchwork
2024-10-16 12:55 ` ✓ CI.Build: " Patchwork
2024-10-16 12:58 ` ✗ CI.Hooks: failure " Patchwork
2024-10-16 12:59 ` ✓ CI.checksparse: success " Patchwork
2024-10-16 13:16 ` ✓ CI.BAT: " Patchwork
2024-10-17  4:42 ` ✗ CI.FULL: failure " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:cfd31ae49cc dfblob:e4770f44582 dfblob:0109866e398
dfblob:5cdb26ba649 )
 OR (
bs:"[PATCH v1 1/3] drm/xe/migrate: Add function for raw copy of VRAM and CCS" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241016095745.7477-2-lukasz.laguna@intel.com \
    --to=lukasz.laguna@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=michal.wajdeczko@intel.com \
    --cc=michal.winiarski@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox