Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
       [not found] <cover.1720677099.git.akshata.jahagirdar@intel.com>
@ 2024-07-11  5:55 ` Akshata Jahagirdar
  0 siblings, 0 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-11  5:55 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Akshata Jahagirdar

This test verifies if the main and ccs data are cleared during bo creation.
The motivation to use Kunit instead of IGT is that, although we can verify
whether the data is zero following bo creation,
we cannot confirm whether the zero value after bo creation is the result of
our clear function or simply because the initial data present there was zero.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c      | 271 +++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
 3 files changed, 273 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 962f6438e219..8e31270f11d1 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -359,3 +359,274 @@ void xe_migrate_sanity_kunit(struct kunit *test)
 	xe_call_for_each_device(migrate_test_run_device);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
+
+static struct dma_fence *blt_copy(struct xe_tile *tile,
+				struct xe_bo *src_bo, struct xe_bo *dst_bo,
+				bool copy_only_ccs, const char *str, struct kunit *test)
+{
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_migrate *m = tile->migrate;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = src_bo->size;
+	struct xe_res_cursor src_it, dst_it;
+	struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int err;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+	else
+		xe_res_first(src, 0, size, &src_it);
+
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
+	else
+		xe_res_first(dst, 0, size, &dst_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags = 0;
+		u32 update_idx;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		src_L0 = xe_migrate_res_sizes(m, &src_it);
+		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, src_is_vram, src, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      avail_pts);
+
+		batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, dst, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      avail_pts, avail_pts);
+
+		/* Add copy commands size here */
+		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
+
+		bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (src_is_vram)
+			xe_res_next(&src_it, src_L0);
+		else
+			emit_pte(m, bb, src_L0_pt, src_is_vram, false,
+				 &src_it, src_L0, src);
+
+		if (dst_is_vram)
+			xe_res_next(&dst_it, src_L0);
+		else
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
+				 &dst_it, src_L0, dst);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+		if (!copy_only_ccs)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		if (copy_only_ccs)
+			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+					src_is_vram, dst_L0_ofs,
+					dst_is_vram, src_L0, dst_L0_ofs, copy_only_ccs);
+
+		mutex_lock(&m->job_mutex);
+
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, xe->info.has_usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void test_clear(struct xe_device *xe, struct xe_tile *tile,
+			struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
+{
+	struct dma_fence *fence;
+	u64 expected, retval;
+
+	expected = 0xd0d0d0d0d0d0d0d0;
+	xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+
+	fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
+		retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+		if (retval == expected)
+			KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+	}
+	dma_fence_put(fence);
+
+	kunit_info(test, "Clear vram buffer object\n");
+	expected = 0x0000000000000000;
+	fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+				false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear main buffer first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear main buffer last value", test);
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+			true, "Blit surf copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear ccs data first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear ccs data last value", test);
+	}
+	dma_fence_put(fence);
+}
+
+static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
+{
+	struct xe_bo *sys_bo, *vram_bo;
+	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+	int ret;
+
+	sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+			ttm_bo_type_device, XE_BO_CREATE_SYSTEM_BIT | XE_BO_NEEDS_CPU_ACCESS);
+
+	if (IS_ERR(sys_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(sys_bo));
+		return;
+	}
+
+	xe_bo_lock(sys_bo, false);
+	ret = xe_bo_validate(sys_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
+		goto out_unlock;
+	}
+
+	ret = xe_bo_vmap(sys_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
+		goto out_unlock;
+	}
+	xe_bo_unlock(sys_bo);
+
+	vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, bo_flags | XE_BO_NEEDS_CPU_ACCESS);
+	if (IS_ERR(vram_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(vram_bo));
+		return;
+	}
+
+	xe_bo_lock(vram_bo, false);
+	ret = xe_bo_validate(vram_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
+		goto out_unlock;
+	}
+
+	ret = xe_bo_vmap(vram_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+		goto out_unlock;
+	}
+
+	test_clear(xe, tile, sys_bo, vram_bo, test);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(vram_bo, false);
+	xe_bo_vunmap(vram_bo);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(sys_bo, false);
+	xe_bo_vunmap(sys_bo);
+	xe_bo_unlock(sys_bo);
+out_unlock:
+	xe_bo_put(vram_bo);
+	xe_bo_put(sys_bo);
+}
+
+static int validate_ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
+		kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	xe_device_mem_access_get(xe);
+
+	for_each_tile(tile, xe, id)
+		validate_ccs_test_run_tile(xe, tile, test);
+
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+void xe_validate_ccs_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(validate_ccs_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index eb0d8963419c..49b9ef060ad3 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -9,6 +9,7 @@
 
 static struct kunit_case xe_migrate_tests[] = {
 	KUNIT_CASE(xe_migrate_sanity_kunit),
+	KUNIT_CASE(xe_validate_ccs_kunit),
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
index 7c645c66824f..dc0ecfdfad39 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -9,5 +9,6 @@
 struct kunit;
 
 void xe_migrate_sanity_kunit(struct kunit *test);
+void xe_validate_ccs_kunit(struct kunit *test);
 
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
       [not found] <cover.1720689220.git.akshata.jahagirdar@intel.com>
@ 2024-07-11  9:18 ` Akshata Jahagirdar
  2024-07-11  9:19   ` Akshata Jahagirdar
  2024-07-11 14:27   ` Matthew Auld
  0 siblings, 2 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-11  9:18 UTC (permalink / raw)
  To: intel-xe
  Cc: matthew.d.roper, matthew.auld, himal.prasad.ghimiray,
	lucas.demarchi, Akshata Jahagirdar

This test verifies if the main and ccs data are cleared during bo creation.
The motivation to use Kunit instead of IGT is that, although we can verify
whether the data is zero following bo creation,
we cannot confirm whether the zero value after bo creation is the result of
our clear function or simply because the initial data present there was zero.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c      | 271 +++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
 3 files changed, 273 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 962f6438e219..6538c0e6b57e 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -359,3 +359,274 @@ void xe_migrate_sanity_kunit(struct kunit *test)
 	xe_call_for_each_device(migrate_test_run_device);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
+
+static struct dma_fence *blt_copy(struct xe_tile *tile,
+				struct xe_bo *src_bo, struct xe_bo *dst_bo,
+				bool copy_only_ccs, const char *str, struct kunit *test)
+{
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_migrate *m = tile->migrate;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = src_bo->size;
+	struct xe_res_cursor src_it, dst_it;
+	struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int err;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+	else
+		xe_res_first(src, 0, size, &src_it);
+
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
+	else
+		xe_res_first(dst, 0, size, &dst_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags = 0;
+		u32 update_idx;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		src_L0 = xe_migrate_res_sizes(m, &src_it);
+		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, src_is_vram, src, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      avail_pts);
+
+		batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, dst, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      avail_pts, avail_pts);
+
+		/* Add copy commands size here */
+		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
+
+		bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (src_is_vram)
+			xe_res_next(&src_it, src_L0);
+		else
+			emit_pte(m, bb, src_L0_pt, src_is_vram, false,
+				 &src_it, src_L0, src);
+
+		if (dst_is_vram)
+			xe_res_next(&dst_it, src_L0);
+		else
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
+				 &dst_it, src_L0, dst);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+		if (!copy_only_ccs)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		if (copy_only_ccs)
+			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+					src_is_vram, dst_L0_ofs,
+					dst_is_vram, src_L0, dst_L0_ofs, copy_only_ccs);
+
+		mutex_lock(&m->job_mutex);
+
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, xe->info.has_usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void test_clear(struct xe_device *xe, struct xe_tile *tile,
+			struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
+{
+	struct dma_fence *fence;
+	u64 expected, retval;
+
+	expected = 0xd0d0d0d0d0d0d0d0;
+	xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+
+	fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
+		retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+		if (retval == expected)
+			KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+	}
+	dma_fence_put(fence);
+
+	kunit_info(test, "Clear vram buffer object\n");
+	expected = 0x0000000000000000;
+	fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+				false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear main buffer first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear main buffer last value", test);
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+			true, "Blit surf copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear ccs data first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear ccs data last value", test);
+	}
+	dma_fence_put(fence);
+}
+
+static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
+{
+	struct xe_bo *sys_bo, *vram_bo;
+	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+	long ret;
+
+	sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+			ttm_bo_type_device, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+
+	if (IS_ERR(sys_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(sys_bo));
+		return;
+	}
+
+	xe_bo_lock(sys_bo, false);
+	ret = xe_bo_validate(sys_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
+		goto out_unlock;
+	}
+
+	ret = xe_bo_vmap(sys_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
+		goto out_unlock;
+	}
+	xe_bo_unlock(sys_bo);
+
+	vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+	if (IS_ERR(vram_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(vram_bo));
+		return;
+	}
+
+	xe_bo_lock(vram_bo, false);
+	ret = xe_bo_validate(vram_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
+		goto out_unlock;
+	}
+
+	ret = xe_bo_vmap(vram_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+		goto out_unlock;
+	}
+
+	test_clear(xe, tile, sys_bo, vram_bo, test);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(vram_bo, false);
+	xe_bo_vunmap(vram_bo);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(sys_bo, false);
+	xe_bo_vunmap(sys_bo);
+	xe_bo_unlock(sys_bo);
+out_unlock:
+	xe_bo_put(vram_bo);
+	xe_bo_put(sys_bo);
+}
+
+static int validate_ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
+		kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	xe_pm_runtime_get(xe);
+
+	for_each_tile(tile, xe, id)
+		validate_ccs_test_run_tile(xe, tile, test);
+
+	xe_pm_runtime_put(xe);
+
+	return 0;
+}
+
+void xe_validate_ccs_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(validate_ccs_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index eb0d8963419c..49b9ef060ad3 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -9,6 +9,7 @@
 
 static struct kunit_case xe_migrate_tests[] = {
 	KUNIT_CASE(xe_migrate_sanity_kunit),
+	KUNIT_CASE(xe_validate_ccs_kunit),
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
index 7c645c66824f..dc0ecfdfad39 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -9,5 +9,6 @@
 struct kunit;
 
 void xe_migrate_sanity_kunit(struct kunit *test);
+void xe_validate_ccs_kunit(struct kunit *test);
 
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
  2024-07-11  9:18 ` Akshata Jahagirdar
@ 2024-07-11  9:19   ` Akshata Jahagirdar
  2024-07-11 14:27   ` Matthew Auld
  1 sibling, 0 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-11  9:19 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Akshata Jahagirdar

This test verifies if the main and ccs data are cleared during bo creation.
The motivation to use Kunit instead of IGT is that, although we can verify
whether the data is zero following bo creation,
we cannot confirm whether the zero value after bo creation is the result of
our clear function or simply because the initial data present there was zero.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c      | 271 +++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
 3 files changed, 273 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 962f6438e219..6538c0e6b57e 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -359,3 +359,274 @@ void xe_migrate_sanity_kunit(struct kunit *test)
 	xe_call_for_each_device(migrate_test_run_device);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
+
+static struct dma_fence *blt_copy(struct xe_tile *tile,
+				struct xe_bo *src_bo, struct xe_bo *dst_bo,
+				bool copy_only_ccs, const char *str, struct kunit *test)
+{
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_migrate *m = tile->migrate;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = src_bo->size;
+	struct xe_res_cursor src_it, dst_it;
+	struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int err;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+	else
+		xe_res_first(src, 0, size, &src_it);
+
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
+	else
+		xe_res_first(dst, 0, size, &dst_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags = 0;
+		u32 update_idx;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		src_L0 = xe_migrate_res_sizes(m, &src_it);
+		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, src_is_vram, src, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      avail_pts);
+
+		batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, dst, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      avail_pts, avail_pts);
+
+		/* Add copy commands size here */
+		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
+
+		bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (src_is_vram)
+			xe_res_next(&src_it, src_L0);
+		else
+			emit_pte(m, bb, src_L0_pt, src_is_vram, false,
+				 &src_it, src_L0, src);
+
+		if (dst_is_vram)
+			xe_res_next(&dst_it, src_L0);
+		else
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
+				 &dst_it, src_L0, dst);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+		if (!copy_only_ccs)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		if (copy_only_ccs)
+			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+					src_is_vram, dst_L0_ofs,
+					dst_is_vram, src_L0, dst_L0_ofs, copy_only_ccs);
+
+		mutex_lock(&m->job_mutex);
+
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, xe->info.has_usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void test_clear(struct xe_device *xe, struct xe_tile *tile,
+			struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
+{
+	struct dma_fence *fence;
+	u64 expected, retval;
+
+	expected = 0xd0d0d0d0d0d0d0d0;
+	xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+
+	fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
+		retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+		if (retval == expected)
+			KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+	}
+	dma_fence_put(fence);
+
+	kunit_info(test, "Clear vram buffer object\n");
+	expected = 0x0000000000000000;
+	fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+				false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear main buffer first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear main buffer last value", test);
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+			true, "Blit surf copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear ccs data first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear ccs data last value", test);
+	}
+	dma_fence_put(fence);
+}
+
+static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
+{
+	struct xe_bo *sys_bo, *vram_bo;
+	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+	long ret;
+
+	sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+			ttm_bo_type_device, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+
+	if (IS_ERR(sys_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(sys_bo));
+		return;
+	}
+
+	xe_bo_lock(sys_bo, false);
+	ret = xe_bo_validate(sys_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
+		goto out_unlock;
+	}
+
+	ret = xe_bo_vmap(sys_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
+		goto out_unlock;
+	}
+	xe_bo_unlock(sys_bo);
+
+	vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+	if (IS_ERR(vram_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(vram_bo));
+		return;
+	}
+
+	xe_bo_lock(vram_bo, false);
+	ret = xe_bo_validate(vram_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
+		goto out_unlock;
+	}
+
+	ret = xe_bo_vmap(vram_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+		goto out_unlock;
+	}
+
+	test_clear(xe, tile, sys_bo, vram_bo, test);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(vram_bo, false);
+	xe_bo_vunmap(vram_bo);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(sys_bo, false);
+	xe_bo_vunmap(sys_bo);
+	xe_bo_unlock(sys_bo);
+out_unlock:
+	xe_bo_put(vram_bo);
+	xe_bo_put(sys_bo);
+}
+
+static int validate_ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
+		kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	xe_pm_runtime_get(xe);
+
+	for_each_tile(tile, xe, id)
+		validate_ccs_test_run_tile(xe, tile, test);
+
+	xe_pm_runtime_put(xe);
+
+	return 0;
+}
+
+void xe_validate_ccs_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(validate_ccs_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index eb0d8963419c..49b9ef060ad3 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -9,6 +9,7 @@
 
 static struct kunit_case xe_migrate_tests[] = {
 	KUNIT_CASE(xe_migrate_sanity_kunit),
+	KUNIT_CASE(xe_validate_ccs_kunit),
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
index 7c645c66824f..dc0ecfdfad39 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -9,5 +9,6 @@
 struct kunit;
 
 void xe_migrate_sanity_kunit(struct kunit *test);
+void xe_validate_ccs_kunit(struct kunit *test);
 
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
  2024-07-11  9:18 ` Akshata Jahagirdar
  2024-07-11  9:19   ` Akshata Jahagirdar
@ 2024-07-11 14:27   ` Matthew Auld
  2024-07-12  4:15     ` Jahagirdar, Akshata
  1 sibling, 1 reply; 22+ messages in thread
From: Matthew Auld @ 2024-07-11 14:27 UTC (permalink / raw)
  To: Akshata Jahagirdar, intel-xe
  Cc: matthew.d.roper, himal.prasad.ghimiray, lucas.demarchi

On 11/07/2024 10:18, Akshata Jahagirdar wrote:
> This test verifies if the main and ccs data are cleared during bo creation.
> The motivation to use Kunit instead of IGT is that, although we can verify
> whether the data is zero following bo creation,
> we cannot confirm whether the zero value after bo creation is the result of
> our clear function or simply because the initial data present there was zero.

One idea might be to pre-fill and release some amount of vram at the 
start of the test. Then maybe spawn a number threads each allocating a 
bit of vram (also various sizes) in a loop, each time dirtying the pages 
and ccs before releasing it. There should be some amount of page re-use 
to catch issues, plus is a lot more nasty with multiple threads and 
varying sizes. Such a test can also be useful for lnl, just that we use 
system memory instead of vram. Or maybe we already have something like that?

> 
> Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
> ---
>   drivers/gpu/drm/xe/tests/xe_migrate.c      | 271 +++++++++++++++++++++
>   drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
>   drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
>   3 files changed, 273 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index 962f6438e219..6538c0e6b57e 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -359,3 +359,274 @@ void xe_migrate_sanity_kunit(struct kunit *test)
>   	xe_call_for_each_device(migrate_test_run_device);
>   }
>   EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
> +
> +static struct dma_fence *blt_copy(struct xe_tile *tile,
> +				struct xe_bo *src_bo, struct xe_bo *dst_bo,
> +				bool copy_only_ccs, const char *str, struct kunit *test)
> +{
> +	struct xe_gt *gt = tile->primary_gt;
> +	struct xe_migrate *m = tile->migrate;
> +	struct xe_device *xe = gt_to_xe(gt);
> +	struct dma_fence *fence = NULL;
> +	u64 size = src_bo->size;
> +	struct xe_res_cursor src_it, dst_it;
> +	struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
> +	u64 src_L0_ofs, dst_L0_ofs;
> +	u32 src_L0_pt, dst_L0_pt;
> +	u64 src_L0, dst_L0;
> +	int err;
> +	bool src_is_vram = mem_type_is_vram(src->mem_type);
> +	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
> +
> +	if (!src_is_vram)
> +		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
> +	else
> +		xe_res_first(src, 0, size, &src_it);
> +
> +	if (!dst_is_vram)
> +		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
> +	else
> +		xe_res_first(dst, 0, size, &dst_it);
> +
> +	while (size) {
> +		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
> +		struct xe_sched_job *job;
> +		struct xe_bb *bb;
> +		u32 flush_flags = 0;
> +		u32 update_idx;
> +		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
> +
> +		src_L0 = xe_migrate_res_sizes(m, &src_it);
> +		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
> +
> +		src_L0 = min(src_L0, dst_L0);
> +
> +		batch_size += pte_update_size(m, src_is_vram, src_is_vram, src, &src_it, &src_L0,
> +					      &src_L0_ofs, &src_L0_pt, 0, 0,
> +					      avail_pts);
> +
> +		batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, dst, &dst_it, &src_L0,
> +					      &dst_L0_ofs, &dst_L0_pt, 0,
> +					      avail_pts, avail_pts);
> +
> +		/* Add copy commands size here */
> +		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
> +			((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
> +
> +		bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
> +		if (IS_ERR(bb)) {
> +			err = PTR_ERR(bb);
> +			goto err_sync;
> +		}
> +
> +		if (src_is_vram)
> +			xe_res_next(&src_it, src_L0);
> +		else
> +			emit_pte(m, bb, src_L0_pt, src_is_vram, false,
> +				 &src_it, src_L0, src);
> +
> +		if (dst_is_vram)
> +			xe_res_next(&dst_it, src_L0);
> +		else
> +			emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
> +				 &dst_it, src_L0, dst);
> +
> +		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
> +		update_idx = bb->len;
> +		if (!copy_only_ccs)
> +			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
> +
> +		if (copy_only_ccs)
> +			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
> +					src_is_vram, dst_L0_ofs,
> +					dst_is_vram, src_L0, dst_L0_ofs, copy_only_ccs);
> +
> +		mutex_lock(&m->job_mutex);

This is not allowed anymore it seems. CI is also complaing about this. 
See: 50e52592fbe791d96ec2cb431d158cc6bc495be5

Not sure what else has changed.

> +
> +		job = xe_bb_create_migration_job(m->q, bb,
> +						 xe_migrate_batch_base(m, xe->info.has_usm),
> +						 update_idx);
> +		if (IS_ERR(job)) {
> +			err = PTR_ERR(job);
> +			goto err;
> +		}
> +
> +		xe_sched_job_add_migrate_flush(job, flush_flags);
> +
> +		xe_sched_job_arm(job);
> +		dma_fence_put(fence);
> +		fence = dma_fence_get(&job->drm.s_fence->finished);
> +		xe_sched_job_push(job);
> +
> +		dma_fence_put(m->fence);
> +		m->fence = dma_fence_get(fence);
> +
> +		mutex_unlock(&m->job_mutex);
> +
> +		xe_bb_free(bb, fence);
> +		size -= src_L0;
> +		continue;
> +
> +err:
> +		mutex_unlock(&m->job_mutex);
> +		xe_bb_free(bb, NULL);
> +
> +err_sync:
> +		if (fence) {
> +			dma_fence_wait(fence, false);
> +			dma_fence_put(fence);
> +		}
> +		return ERR_PTR(err);
> +	}
> +
> +	return fence;
> +}
> +
> +static void test_clear(struct xe_device *xe, struct xe_tile *tile,
> +			struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
> +{
> +	struct dma_fence *fence;
> +	u64 expected, retval;
> +
> +	expected = 0xd0d0d0d0d0d0d0d0;
> +	xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
> +
> +	fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
> +	if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
> +		retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
> +		if (retval == expected)
> +			KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
> +	}
> +	dma_fence_put(fence);
> +
> +	fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test);
> +	if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
> +		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
> +		check(retval, expected, "Decompressed value must be equal to initial value", test);
> +		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
> +		check(retval, expected, "Decompressed value must be equal to initial value", test);
> +	}
> +	dma_fence_put(fence);
> +
> +	kunit_info(test, "Clear vram buffer object\n");
> +	expected = 0x0000000000000000;
> +	fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);
> +
> +	fence = blt_copy(tile, vram_bo, sys_bo,
> +				false, "Blit copy from vram to sysmem", test);
> +	if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
> +		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
> +		check(retval, expected, "Clear main buffer first value", test);
> +		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
> +		check(retval, expected, "Clear main buffer last value", test);
> +	}
> +	dma_fence_put(fence);
> +
> +	fence = blt_copy(tile, vram_bo, sys_bo,
> +			true, "Blit surf copy from vram to sysmem", test);
> +	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
> +		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
> +		check(retval, expected, "Clear ccs data first value", test);
> +		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
> +		check(retval, expected, "Clear ccs data last value", test);
> +	}
> +	dma_fence_put(fence);
> +}
> +
> +static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
> +{
> +	struct xe_bo *sys_bo, *vram_bo;
> +	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
> +	long ret;
> +
> +	sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
> +			ttm_bo_type_device, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
> +
> +	if (IS_ERR(sys_bo)) {
> +		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
> +			   PTR_ERR(sys_bo));
> +		return;
> +	}
> +
> +	xe_bo_lock(sys_bo, false);
> +	ret = xe_bo_validate(sys_bo, NULL, false);
> +	if (ret) {
> +		KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
> +		goto out_unlock;

vram_bo is not initialized here.

> +	}
> +
> +	ret = xe_bo_vmap(sys_bo);
> +	if (ret) {
> +		KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
> +		goto out_unlock;
> +	}
> +	xe_bo_unlock(sys_bo);
> +
> +	vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
> +			       ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
> +	if (IS_ERR(vram_bo)) {
> +		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
> +			   PTR_ERR(vram_bo));
> +		return;

Missing bo_put?

> +	}
> +
> +	xe_bo_lock(vram_bo, false);
> +	ret = xe_bo_validate(vram_bo, NULL, false);
> +	if (ret) {
> +		KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
> +		goto out_unlock;
> +	}
> +
> +	ret = xe_bo_vmap(vram_bo);
> +	if (ret) {
> +		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
> +		goto out_unlock;
> +	}
> +
> +	test_clear(xe, tile, sys_bo, vram_bo, test);
> +	xe_bo_unlock(vram_bo);
> +
> +	xe_bo_lock(vram_bo, false);
> +	xe_bo_vunmap(vram_bo);
> +	xe_bo_unlock(vram_bo);
> +
> +	xe_bo_lock(sys_bo, false);
> +	xe_bo_vunmap(sys_bo);
> +	xe_bo_unlock(sys_bo);
> +out_unlock:

There is no unlock here.

> +	xe_bo_put(vram_bo);
> +	xe_bo_put(sys_bo);
> +}
> +
> +static int validate_ccs_test_run_device(struct xe_device *xe)
> +{
> +	struct kunit *test = xe_cur_kunit();
> +	struct xe_tile *tile;
> +	int id;
> +
> +	if (!xe_device_has_flat_ccs(xe)) {
> +		kunit_info(test, "Skipping non-flat-ccs device.\n");
> +		return 0;
> +	}
> +
> +	if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
> +		kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
> +			   dev_name(xe->drm.dev));
> +		return 0;
> +	}

So is there something else for xe2 igpu and ccs?

> +
> +	xe_pm_runtime_get(xe);
> +
> +	for_each_tile(tile, xe, id)
> +		validate_ccs_test_run_tile(xe, tile, test);
> +
> +	xe_pm_runtime_put(xe);
> +
> +	return 0;
> +}
> +
> +void xe_validate_ccs_kunit(struct kunit *test)
> +{
> +	xe_call_for_each_device(validate_ccs_test_run_device);
> +}
> +EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
> index eb0d8963419c..49b9ef060ad3 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
> @@ -9,6 +9,7 @@
>   
>   static struct kunit_case xe_migrate_tests[] = {
>   	KUNIT_CASE(xe_migrate_sanity_kunit),
> +	KUNIT_CASE(xe_validate_ccs_kunit),
>   	{}
>   };
>   
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
> index 7c645c66824f..dc0ecfdfad39 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
> @@ -9,5 +9,6 @@
>   struct kunit;
>   
>   void xe_migrate_sanity_kunit(struct kunit *test);
> +void xe_validate_ccs_kunit(struct kunit *test);
>   
>   #endif

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
  2024-07-11 14:27   ` Matthew Auld
@ 2024-07-12  4:15     ` Jahagirdar, Akshata
  2024-07-16  8:51       ` Matthew Auld
  0 siblings, 1 reply; 22+ messages in thread
From: Jahagirdar, Akshata @ 2024-07-12  4:15 UTC (permalink / raw)
  To: Matthew Auld, intel-xe
  Cc: matthew.d.roper, himal.prasad.ghimiray, lucas.demarchi


On 7/11/2024 7:27 AM, Matthew Auld wrote:
> On 11/07/2024 10:18, Akshata Jahagirdar wrote:
>> This test verifies if the main and ccs data are cleared during bo 
>> creation.
>> The motivation to use Kunit instead of IGT is that, although we can 
>> verify
>> whether the data is zero following bo creation,
>> we cannot confirm whether the zero value after bo creation is the 
>> result of
>> our clear function or simply because the initial data present there 
>> was zero.
>
> One idea might be to pre-fill and release some amount of vram at the 
> start of the test. Then maybe spawn a number threads each allocating a 
> bit of vram (also various sizes) in a loop, each time dirtying the 
> pages and ccs before releasing it. There should be some amount of page 
> re-use to catch issues, plus is a lot more nasty with multiple threads 
> and varying sizes. Such a test can also be useful for lnl, just that 
> we use system memory instead of vram. Or maybe we already have 
> something like that?
>
If I am not wrong, think there is an igt testcase xe_evict_ccs which 
exercises similar testcases.
>>
>> Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
>> ---
>>   drivers/gpu/drm/xe/tests/xe_migrate.c      | 271 +++++++++++++++++++++
>>   drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
>>   drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
>>   3 files changed, 273 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c 
>> b/drivers/gpu/drm/xe/tests/xe_migrate.c
>> index 962f6438e219..6538c0e6b57e 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
>> @@ -359,3 +359,274 @@ void xe_migrate_sanity_kunit(struct kunit *test)
>>       xe_call_for_each_device(migrate_test_run_device);
>>   }
>>   EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
>> +
>> +static struct dma_fence *blt_copy(struct xe_tile *tile,
>> +                struct xe_bo *src_bo, struct xe_bo *dst_bo,
>> +                bool copy_only_ccs, const char *str, struct kunit 
>> *test)
>> +{
>> +    struct xe_gt *gt = tile->primary_gt;
>> +    struct xe_migrate *m = tile->migrate;
>> +    struct xe_device *xe = gt_to_xe(gt);
>> +    struct dma_fence *fence = NULL;
>> +    u64 size = src_bo->size;
>> +    struct xe_res_cursor src_it, dst_it;
>> +    struct ttm_resource *src = src_bo->ttm.resource, *dst = 
>> dst_bo->ttm.resource;
>> +    u64 src_L0_ofs, dst_L0_ofs;
>> +    u32 src_L0_pt, dst_L0_pt;
>> +    u64 src_L0, dst_L0;
>> +    int err;
>> +    bool src_is_vram = mem_type_is_vram(src->mem_type);
>> +    bool dst_is_vram = mem_type_is_vram(dst->mem_type);
>> +
>> +    if (!src_is_vram)
>> +        xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
>> +    else
>> +        xe_res_first(src, 0, size, &src_it);
>> +
>> +    if (!dst_is_vram)
>> +        xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
>> +    else
>> +        xe_res_first(dst, 0, size, &dst_it);
>> +
>> +    while (size) {
>> +        u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
>> +        struct xe_sched_job *job;
>> +        struct xe_bb *bb;
>> +        u32 flush_flags = 0;
>> +        u32 update_idx;
>> +        u32 avail_pts = max_mem_transfer_per_pass(xe) / 
>> LEVEL0_PAGE_TABLE_ENCODE_SIZE;
>> +
>> +        src_L0 = xe_migrate_res_sizes(m, &src_it);
>> +        dst_L0 = xe_migrate_res_sizes(m, &dst_it);
>> +
>> +        src_L0 = min(src_L0, dst_L0);
>> +
>> +        batch_size += pte_update_size(m, src_is_vram, src_is_vram, 
>> src, &src_it, &src_L0,
>> +                          &src_L0_ofs, &src_L0_pt, 0, 0,
>> +                          avail_pts);
>> +
>> +        batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, 
>> dst, &dst_it, &src_L0,
>> +                          &dst_L0_ofs, &dst_L0_pt, 0,
>> +                          avail_pts, avail_pts);
>> +
>> +        /* Add copy commands size here */
>> +        batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
>> +            ((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? 
>> EMIT_COPY_CCS_DW : 0);
>> +
>> +        bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
>> +        if (IS_ERR(bb)) {
>> +            err = PTR_ERR(bb);
>> +            goto err_sync;
>> +        }
>> +
>> +        if (src_is_vram)
>> +            xe_res_next(&src_it, src_L0);
>> +        else
>> +            emit_pte(m, bb, src_L0_pt, src_is_vram, false,
>> +                 &src_it, src_L0, src);
>> +
>> +        if (dst_is_vram)
>> +            xe_res_next(&dst_it, src_L0);
>> +        else
>> +            emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
>> +                 &dst_it, src_L0, dst);
>> +
>> +        bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>> +        update_idx = bb->len;
>> +        if (!copy_only_ccs)
>> +            emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, 
>> XE_PAGE_SIZE);
>> +
>> +        if (copy_only_ccs)
>> +            flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
>> +                    src_is_vram, dst_L0_ofs,
>> +                    dst_is_vram, src_L0, dst_L0_ofs, copy_only_ccs);
>> +
>> +        mutex_lock(&m->job_mutex);
>
> This is not allowed anymore it seems. CI is also complaing about this. 
> See: 50e52592fbe791d96ec2cb431d158cc6bc495be5
>
> Not sure what else has changed.
>
I see. I missed this. Thank you! Will fix this!
>> +
>> +        job = xe_bb_create_migration_job(m->q, bb,
>> +                         xe_migrate_batch_base(m, xe->info.has_usm),
>> +                         update_idx);
>> +        if (IS_ERR(job)) {
>> +            err = PTR_ERR(job);
>> +            goto err;
>> +        }
>> +
>> +        xe_sched_job_add_migrate_flush(job, flush_flags);
>> +
>> +        xe_sched_job_arm(job);
>> +        dma_fence_put(fence);
>> +        fence = dma_fence_get(&job->drm.s_fence->finished);
>> +        xe_sched_job_push(job);
>> +
>> +        dma_fence_put(m->fence);
>> +        m->fence = dma_fence_get(fence);
>> +
>> +        mutex_unlock(&m->job_mutex);
>> +
>> +        xe_bb_free(bb, fence);
>> +        size -= src_L0;
>> +        continue;
>> +
>> +err:
>> +        mutex_unlock(&m->job_mutex);
>> +        xe_bb_free(bb, NULL);
>> +
>> +err_sync:
>> +        if (fence) {
>> +            dma_fence_wait(fence, false);
>> +            dma_fence_put(fence);
>> +        }
>> +        return ERR_PTR(err);
>> +    }
>> +
>> +    return fence;
>> +}
>> +
>> +static void test_clear(struct xe_device *xe, struct xe_tile *tile,
>> +            struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct 
>> kunit *test)
>> +{
>> +    struct dma_fence *fence;
>> +    u64 expected, retval;
>> +
>> +    expected = 0xd0d0d0d0d0d0d0d0;
>> +    xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
>> +
>> +    fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from 
>> sysmem to vram", test);
>> +    if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to 
>> vram", test)) {
>> +        retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
>> +        if (retval == expected)
>> +            KUNIT_FAIL(test, "Sanity check failed: VRAM must have 
>> compressed value\n");
>> +    }
>> +    dma_fence_put(fence);
>> +
>> +    fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from 
>> vram to sysmem", test);
>> +    if (!sanity_fence_failed(xe, fence, "Blit copy from vram to 
>> sysmem", test)) {
>> +        retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
>> +        check(retval, expected, "Decompressed value must be equal to 
>> initial value", test);
>> +        retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
>> +        check(retval, expected, "Decompressed value must be equal to 
>> initial value", test);
>> +    }
>> +    dma_fence_put(fence);
>> +
>> +    kunit_info(test, "Clear vram buffer object\n");
>> +    expected = 0x0000000000000000;
>> +    fence = xe_migrate_clear(tile->migrate, vram_bo, 
>> vram_bo->ttm.resource);
>> +
>> +    fence = blt_copy(tile, vram_bo, sys_bo,
>> +                false, "Blit copy from vram to sysmem", test);
>> +    if (!sanity_fence_failed(xe, fence, "Clear main buffer data", 
>> test)) {
>> +        retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
>> +        check(retval, expected, "Clear main buffer first value", test);
>> +        retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
>> +        check(retval, expected, "Clear main buffer last value", test);
>> +    }
>> +    dma_fence_put(fence);
>> +
>> +    fence = blt_copy(tile, vram_bo, sys_bo,
>> +            true, "Blit surf copy from vram to sysmem", test);
>> +    if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", 
>> test)) {
>> +        retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
>> +        check(retval, expected, "Clear ccs data first value", test);
>> +        retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
>> +        check(retval, expected, "Clear ccs data last value", test);
>> +    }
>> +    dma_fence_put(fence);
>> +}
>> +
>> +static void validate_ccs_test_run_tile(struct xe_device *xe, struct 
>> xe_tile *tile, struct kunit *test)
>> +{
>> +    struct xe_bo *sys_bo, *vram_bo;
>> +    unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
>> +    long ret;
>> +
>> +    sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 
>> DRM_XE_GEM_CPU_CACHING_WC,
>> +            ttm_bo_type_device, XE_BO_FLAG_SYSTEM | 
>> XE_BO_FLAG_NEEDS_CPU_ACCESS);
>> +
>> +    if (IS_ERR(sys_bo)) {
>> +        KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
>> +               PTR_ERR(sys_bo));
>> +        return;
>> +    }
>> +
>> +    xe_bo_lock(sys_bo, false);
>> +    ret = xe_bo_validate(sys_bo, NULL, false);
>> +    if (ret) {
>> +        KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", 
>> ret);
>> +        goto out_unlock;
>
> vram_bo is not initialized here.
>
>> +    }
>> +
>> +    ret = xe_bo_vmap(sys_bo);
>> +    if (ret) {
>> +        KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
>> +        goto out_unlock;
>> +    }
>> +    xe_bo_unlock(sys_bo);
>> +
>> +    vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 
>> DRM_XE_GEM_CPU_CACHING_WC,
>> +                   ttm_bo_type_device, bo_flags | 
>> XE_BO_FLAG_NEEDS_CPU_ACCESS);
>> +    if (IS_ERR(vram_bo)) {
>> +        KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
>> +               PTR_ERR(vram_bo));
>> +        return;
>
> Missing bo_put?
>
Since, bo creation failed, we don't call bo_put() again.
This is taken care by the xe_bo_create_user() function.
>
>> +    }
>> +
>> +    xe_bo_lock(vram_bo, false);
>> +    ret = xe_bo_validate(vram_bo, NULL, false);
>> +    if (ret) {
>> +        KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
>> +        goto out_unlock;
>> +    }
>> +
>> +    ret = xe_bo_vmap(vram_bo);
>> +    if (ret) {
>> +        KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
>> +        goto out_unlock;
>> +    }
>> +
>> +    test_clear(xe, tile, sys_bo, vram_bo, test);
>> +    xe_bo_unlock(vram_bo);
>> +
>> +    xe_bo_lock(vram_bo, false);
>> +    xe_bo_vunmap(vram_bo);
>> +    xe_bo_unlock(vram_bo);
>> +
>> +    xe_bo_lock(sys_bo, false);
>> +    xe_bo_vunmap(sys_bo);
>> +    xe_bo_unlock(sys_bo);
>> +out_unlock:
>
> There is no unlock here.
Will change it to out_put here.
>> +    xe_bo_put(vram_bo);
>> +    xe_bo_put(sys_bo);
>> +}
>> +
>> +static int validate_ccs_test_run_device(struct xe_device *xe)
>> +{
>> +    struct kunit *test = xe_cur_kunit();
>> +    struct xe_tile *tile;
>> +    int id;
>> +
>> +    if (!xe_device_has_flat_ccs(xe)) {
>> +        kunit_info(test, "Skipping non-flat-ccs device.\n");
>> +        return 0;
>> +    }
>> +
>> +    if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
>> +        kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
>> +               dev_name(xe->drm.dev));
>> +        return 0;
>> +    }
>
> So is there something else for xe2 igpu and ccs?
Yes, so there is another kunit test xe_ccs_migrate_kunit for xe2 igpu
that does data validation for ccs as well.
>> +
>> +    xe_pm_runtime_get(xe);
>> +
>> +    for_each_tile(tile, xe, id)
>> +        validate_ccs_test_run_tile(xe, tile, test);
>> +
>> +    xe_pm_runtime_put(xe);
>> +
>> +    return 0;
>> +}
>> +
>> +void xe_validate_ccs_kunit(struct kunit *test)
>> +{
>> +    xe_call_for_each_device(validate_ccs_test_run_device);
>> +}
>> +EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
>> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c 
>> b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
>> index eb0d8963419c..49b9ef060ad3 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
>> @@ -9,6 +9,7 @@
>>     static struct kunit_case xe_migrate_tests[] = {
>>       KUNIT_CASE(xe_migrate_sanity_kunit),
>> +    KUNIT_CASE(xe_validate_ccs_kunit),
>>       {}
>>   };
>>   diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h 
>> b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
>> index 7c645c66824f..dc0ecfdfad39 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
>> +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
>> @@ -9,5 +9,6 @@
>>   struct kunit;
>>     void xe_migrate_sanity_kunit(struct kunit *test);
>> +void xe_validate_ccs_kunit(struct kunit *test);
>>     #endif

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 0/6] Implement compression support on BMG
@ 2024-07-12  6:39 Akshata Jahagirdar
  2024-07-12  6:39 ` [PATCH 1/6] drm/xe/migrate: Handle clear ccs logic for xe2 dgfx Akshata Jahagirdar
                   ` (9 more replies)
  0 siblings, 10 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  6:39 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Akshata Jahagirdar

Xe2+ has unified compression (exactly one compression mode/format),
where compression is now controlled via PAT at PTE level.
This simplifies KMD operations, as it can now decompress freely
without concern for the buffer's original compression format—unlike DG2,
which had multiple compression formats and thus required copying the
raw CCS state during VRAM eviction. In addition mixed VRAM and system
memory buffers were not supported with compression enabled.

On Xe2 dGPU compression is still only supported with VRAM, however we
can now support compression with VRAM and system memory buffers,
with GPU access being seamless underneath. So long as when doing
VRAM -> system memory the KMD uses compressed -> uncompressed,
to decompress it. This also allows CPU access to such buffers,
assuming that userspace first decompress the corresponding
pages being accessed.
If the pages are already in system memory then KMD would have already
decompressed them. When restoring such buffers with sysmem -> VRAM
the KMD can't easily know which pages were originally compressed,
so we always use uncompressed -> uncompressed here.
With this it also means we can drop all the raw CCS handling on such
platforms (including needing to allocate extra CCS storage).

In order to support this we now need to have two different identity
mappings for compressed and uncompressed VRAM.
In this patch, we set up the additional identity map for the VRAM with
compressed pat_index. We then select the appropriate mapping during
migration/clear. During eviction (vram->sysmem), we use the mapping
from compressed -> uncompressed. During restore (sysmem->vram), we need
the mapping from uncompressed -> uncompressed.
Therefore, we need to have two different mappings for compressed and
uncompressed vram. We set up an additional identity map for the vram
with compressed pat_index.
We then select the appropriate mapping during migration/clear.

Akshata Jahagirdar (6):
  drm/xe/xe2: Introduce identity map for compressed pat for vram
  drm/xe/migrate: Handle clear ccs logic for xe2 dgfx
  drm/xe/migrate: Add kunit to test clear functionality
  drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx
  drm/xe/migrate: Add kunit to test migration functionality for BMG
  drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx

 drivers/gpu/drm/xe/tests/xe_bo.c           |   6 +
 drivers/gpu/drm/xe/tests/xe_migrate.c      | 389 +++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
 drivers/gpu/drm/xe/xe_migrate.c            |  91 ++++-
 5 files changed, 470 insertions(+), 18 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/6] drm/xe/migrate: Handle clear ccs logic for xe2 dgfx
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
@ 2024-07-12  6:39 ` Akshata Jahagirdar
  2024-07-12  6:39 ` [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality Akshata Jahagirdar
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  6:39 UTC (permalink / raw)
  To: intel-xe
  Cc: akshatajahagirdar6, Jahagirdar, Akshata, Matthew Auld,
	Himal Prasad Ghimiray

From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>

For Xe2 dGPU, we clear the bo by modifying the VRAM using an
uncompressed pat index which then indirectly updates the
compression status as uncompressed i.e zeroed CCS.
So xe_migrate_clear() should be updated for BMG to not
emit CCS surf copy commands.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index fa23a7e7ec43..85eec95c9bc2 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -347,6 +347,11 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
 	return logical_mask;
 }
 
+static bool xe_migrate_needs_ccs_emit(struct xe_device *xe)
+{
+	return xe_device_has_flat_ccs(xe) && !(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe));
+}
+
 /**
  * xe_migrate_init() - Initialize a migrate context
  * @tile: Back-pointer to the tile we're initializing for.
@@ -420,7 +425,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		return ERR_PTR(err);
 
 	if (IS_DGFX(xe)) {
-		if (xe_device_has_flat_ccs(xe))
+		if (xe_migrate_needs_ccs_emit(xe))
 			/* min chunk size corresponds to 4K of CCS Metadata */
 			m->min_chunk_size = SZ_4K * SZ_64K /
 				xe_device_ccs_bytes(xe, SZ_64K);
@@ -1034,7 +1039,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 					clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
 					avail_pts);
 
-		if (xe_device_has_flat_ccs(xe))
+		if (xe_migrate_needs_ccs_emit(xe))
 			batch_size += EMIT_COPY_CCS_DW;
 
 		/* Clear commands */
@@ -1062,7 +1067,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		if (!clear_system_ccs)
 			emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
 
-		if (xe_device_has_flat_ccs(xe)) {
+		if (xe_migrate_needs_ccs_emit(xe)) {
 			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
 				      m->cleared_mem_ofs, false, clear_L0);
 			flush_flags = MI_FLUSH_DW_CCS;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
  2024-07-12  6:39 ` [PATCH 1/6] drm/xe/migrate: Handle clear ccs logic for xe2 dgfx Akshata Jahagirdar
@ 2024-07-12  6:39 ` Akshata Jahagirdar
  2024-07-12 12:56   ` Nirmoy Das
  2024-07-12  6:39 ` [PATCH 3/6] drm/xe/xe2: Introduce identity map for compressed pat for vram Akshata Jahagirdar
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  6:39 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Jahagirdar, Akshata, Himal Prasad Ghimiray

From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>

This test verifies if the main and ccs data are cleared during bo creation.
The motivation to use Kunit instead of IGT is that, although we can verify
whether the data is zero following bo creation,
we cannot confirm whether the zero value after bo creation is the result of
our clear function or simply because the initial data present was zero.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c      | 272 +++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
 3 files changed, 274 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 962f6438e219..0c9a30dac46d 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -359,3 +359,275 @@ void xe_migrate_sanity_kunit(struct kunit *test)
 	xe_call_for_each_device(migrate_test_run_device);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
+
+static struct dma_fence *blt_copy(struct xe_tile *tile,
+				  struct xe_bo *src_bo, struct xe_bo *dst_bo,
+				  bool copy_only_ccs, const char *str, struct kunit *test)
+{
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_migrate *m = tile->migrate;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = src_bo->size;
+	struct xe_res_cursor src_it, dst_it;
+	struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int err;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+	else
+		xe_res_first(src, 0, size, &src_it);
+
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
+	else
+		xe_res_first(dst, 0, size, &dst_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags = 0;
+		u32 update_idx;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		src_L0 = xe_migrate_res_sizes(m, &src_it);
+		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, src_is_vram, src, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      avail_pts);
+
+		batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, dst, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      avail_pts, avail_pts);
+
+		/* Add copy commands size here */
+		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
+
+		bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (src_is_vram)
+			xe_res_next(&src_it, src_L0);
+		else
+			emit_pte(m, bb, src_L0_pt, src_is_vram, false,
+				 &src_it, src_L0, src);
+
+		if (dst_is_vram)
+			xe_res_next(&dst_it, src_L0);
+		else
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
+				 &dst_it, src_L0, dst);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+		if (!copy_only_ccs)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		if (copy_only_ccs)
+			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+							  src_is_vram, dst_L0_ofs,
+							  dst_is_vram, src_L0, dst_L0_ofs,
+							  copy_only_ccs);
+
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, xe->info.has_usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+
+		mutex_lock(&m->job_mutex);
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err:
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void test_clear(struct xe_device *xe, struct xe_tile *tile,
+		       struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
+{
+	struct dma_fence *fence;
+	u64 expected, retval;
+
+	expected = 0xd0d0d0d0d0d0d0d0;
+	xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+
+	fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
+		retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+		if (retval == expected)
+			KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+	}
+	dma_fence_put(fence);
+
+	kunit_info(test, "Clear vram buffer object\n");
+	expected = 0x0000000000000000;
+	fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+			 false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear main buffer first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear main buffer last value", test);
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+			 true, "Blit surf copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear ccs data first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear ccs data last value", test);
+	}
+	dma_fence_put(fence);
+}
+
+static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
+				       struct kunit *test)
+{
+	struct xe_bo *sys_bo, *vram_bo;
+	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+	long ret;
+
+	sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+				   DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device,
+				   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+
+	if (IS_ERR(sys_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(sys_bo));
+		return;
+	}
+
+	xe_bo_lock(sys_bo, false);
+	ret = xe_bo_validate(sys_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
+		goto out_put;
+	}
+
+	ret = xe_bo_vmap(sys_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
+		goto out_put;
+	}
+	xe_bo_unlock(sys_bo);
+
+	vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+				    ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+	if (IS_ERR(vram_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(vram_bo));
+		return;
+	}
+
+	xe_bo_lock(vram_bo, false);
+	ret = xe_bo_validate(vram_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
+		goto out_put;
+	}
+
+	ret = xe_bo_vmap(vram_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+		goto out_put;
+	}
+
+	test_clear(xe, tile, sys_bo, vram_bo, test);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(vram_bo, false);
+	xe_bo_vunmap(vram_bo);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(sys_bo, false);
+	xe_bo_vunmap(sys_bo);
+	xe_bo_unlock(sys_bo);
+out_put:
+	xe_bo_put(vram_bo);
+	xe_bo_put(sys_bo);
+}
+
+static int validate_ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
+		kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	xe_pm_runtime_get(xe);
+
+	for_each_tile(tile, xe, id)
+		validate_ccs_test_run_tile(xe, tile, test);
+
+	xe_pm_runtime_put(xe);
+
+	return 0;
+}
+
+void xe_validate_ccs_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(validate_ccs_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index eb0d8963419c..49b9ef060ad3 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -9,6 +9,7 @@
 
 static struct kunit_case xe_migrate_tests[] = {
 	KUNIT_CASE(xe_migrate_sanity_kunit),
+	KUNIT_CASE(xe_validate_ccs_kunit),
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
index 7c645c66824f..dc0ecfdfad39 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -9,5 +9,6 @@
 struct kunit;
 
 void xe_migrate_sanity_kunit(struct kunit *test);
+void xe_validate_ccs_kunit(struct kunit *test);
 
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/6] drm/xe/xe2: Introduce identity map for compressed pat for vram
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
  2024-07-12  6:39 ` [PATCH 1/6] drm/xe/migrate: Handle clear ccs logic for xe2 dgfx Akshata Jahagirdar
  2024-07-12  6:39 ` [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality Akshata Jahagirdar
@ 2024-07-12  6:39 ` Akshata Jahagirdar
  2024-07-12  6:39 ` [PATCH 4/6] drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx Akshata Jahagirdar
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  6:39 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Akshata Jahagirdar, Himal Prasad Ghimiray

Xe2+ has unified compression (exactly one compression mode/format),
where compression is now controlled via PAT at PTE level.
This simplifies KMD operations, as it can now decompress freely
without concern for the buffer's original compression format—unlike DG2,
which had multiple compression formats and thus required copying the
raw CCS state during VRAM eviction. In addition mixed VRAM and system
memory buffers were not supported with compression enabled.

On Xe2 dGPU compression is still only supported with VRAM, however we
can now support compression with VRAM and system memory buffers,
with GPU access being seamless underneath. So long as when doing
VRAM -> system memory the KMD uses compressed -> uncompressed,
to decompress it. This also allows CPU access to such buffers,
assuming that userspace first decompress the corresponding
pages being accessed.
If the pages are already in system memory then KMD would have already
decompressed them. When restoring such buffers with sysmem -> VRAM
the KMD can't easily know which pages were originally compressed,
so we always use uncompressed -> uncompressed here.
With this it also means we can drop all the raw CCS handling on such
platforms (including needing to allocate extra CCS storage).

In order to support this we now need to have two different identity
mappings for compressed and uncompressed VRAM.
In this patch, we set up the additional identity map for the VRAM with
compressed pat_index. We then select the appropriate mapping during
migration/clear. During eviction (vram->sysmem), we use the mapping
from compressed -> uncompressed. During restore (sysmem->vram), we need
the mapping from uncompressed -> uncompressed.
Therefore, we need to have two different mappings for compressed and
uncompressed vram. We set up an additional identity map for the vram
with compressed pat_index.
We then select the appropriate mapping during migration/clear.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 75 +++++++++++++++++++++++++++------
 1 file changed, 62 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 85eec95c9bc2..3b8a334fe08f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -120,14 +120,20 @@ static u64 xe_migrate_vm_addr(u64 slot, u32 level)
 	return (slot + 1ULL) << xe_pt_shift(level + 1);
 }
 
-static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr)
+static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte)
 {
 	/*
 	 * Remove the DPA to get a correct offset into identity table for the
 	 * migrate offset
 	 */
+	u64 identity_offset = 256ULL;
+
+	if (GRAPHICS_VER(xe) >= 20 && is_comp_pte)
+		identity_offset = 256ULL +
+				  DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G);
+
 	addr -= xe->mem.vram.dpa_base;
-	return addr + (256ULL << xe_pt_shift(2));
+	return addr + (identity_offset << xe_pt_shift(2));
 }
 
 static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
@@ -214,12 +220,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	} else {
 		u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
 
-		m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
+		m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr, false);
 
 		if (xe->info.has_usm) {
 			batch = tile->primary_gt->usm.bb_pool->bo;
 			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
-			m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
+			m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr, false);
 		}
 	}
 
@@ -251,7 +257,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		  | XE_PTE_NULL);
 	m->cleared_mem_ofs = (255ULL << xe_pt_shift(level));
 
-	/* Identity map the entire vram at 256GiB offset */
+	/* Identity map the entire vram for uncompressed pat_index at 256GiB offset */
 	if (IS_DGFX(xe)) {
 		u64 pos, ofs, flags;
 		/* XXX: Unclear if this should be usable_size? */
@@ -294,6 +300,49 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		}
 
 		xe_assert(xe, pos == vram_limit);
+
+		/*
+		 * Identity map the entire vram for compressed pat_index for xe2+
+		 * if flat ccs is enabled.
+		 */
+		if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) {
+			u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION];
+			u64 vram_offset = 256 +
+				DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G);
+
+			level = 2;
+			ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8;
+			flags = vm->pt_ops->pte_encode_addr(xe, 0, comp_pat_index, level,
+								true, 0);
+
+			/*
+			 * Use 1GB pages when possible, last chunk always use 2M
+			 * pages as mixing reserved memory (stolen, WOCPM) with a single
+			 * mapping is not allowed on certain platforms.
+			 */
+			for (pos = xe->mem.vram.dpa_base; pos < vram_limit;
+			pos += SZ_1G, ofs += 8) {
+				if (pos + SZ_1G >= vram_limit) {
+					u64 pt31_ofs = bo->size - XE_PAGE_SIZE;
+
+					entry = vm->pt_ops->pde_encode_bo(bo, pt31_ofs,
+									comp_pat_index);
+					xe_map_wr(xe, &bo->vmap, ofs, u64, entry);
+
+					flags = vm->pt_ops->pte_encode_addr(xe, 0,
+									comp_pat_index,
+									level - 1,
+									true, 0);
+
+					for (ofs = pt31_ofs; pos < vram_limit;
+					pos += SZ_2M, ofs += 8)
+						xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+					break;	/* Ensure pos == vram_limit assert correct */
+				}
+
+				xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+			}
+		}
 	}
 
 	/*
@@ -480,7 +529,7 @@ static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur)
 }
 
 static u32 pte_update_size(struct xe_migrate *m,
-			   bool is_vram,
+			   bool is_vram, bool is_comp_pte,
 			   struct ttm_resource *res,
 			   struct xe_res_cursor *cur,
 			   u64 *L0, u64 *L0_ofs, u32 *L0_pt,
@@ -492,7 +541,8 @@ static u32 pte_update_size(struct xe_migrate *m,
 	if (is_vram && xe_migrate_allow_identity(*L0, cur)) {
 		/* Offset into identity map. */
 		*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
-					      cur->start + vram_region_gpu_offset(res));
+					      cur->start + vram_region_gpu_offset(res),
+					      is_comp_pte);
 		cmds += cmd_size;
 	} else {
 		/* Clip L0 to available size */
@@ -783,17 +833,17 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 
 		src_L0 = min(src_L0, dst_L0);
 
-		batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0,
+		batch_size += pte_update_size(m, src_is_vram, false, src, &src_it, &src_L0,
 					      &src_L0_ofs, &src_L0_pt, 0, 0,
 					      avail_pts);
 
-		batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0,
+		batch_size += pte_update_size(m, dst_is_vram, false, dst, &dst_it, &src_L0,
 					      &dst_L0_ofs, &dst_L0_pt, 0,
 					      avail_pts, avail_pts);
 
 		if (copy_system_ccs) {
 			ccs_size = xe_device_ccs_bytes(xe, src_L0);
-			batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size,
+			batch_size += pte_update_size(m, false, false, NULL, &ccs_it, &ccs_size,
 						      &ccs_ofs, &ccs_pt, 0,
 						      2 * avail_pts,
 						      avail_pts);
@@ -1034,14 +1084,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		/* Calculate final sizes and batch size.. */
 		batch_size = 2 +
-			pte_update_size(m, clear_vram, src, &src_it,
+			pte_update_size(m, clear_vram, false, src, &src_it,
 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
 					clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
 					avail_pts);
 
 		if (xe_migrate_needs_ccs_emit(xe))
 			batch_size += EMIT_COPY_CCS_DW;
-
 		/* Clear commands */
 
 		if (WARN_ON_ONCE(!clear_L0))
@@ -1151,7 +1200,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	if (!ppgtt_ofs)
 		ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
 						xe_bo_addr(update->pt_bo, 0,
-							   XE_PAGE_SIZE));
+							   XE_PAGE_SIZE), false);
 
 	do {
 		u64 addr = ppgtt_ofs + ofs * 8;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/6] drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
                   ` (2 preceding siblings ...)
  2024-07-12  6:39 ` [PATCH 3/6] drm/xe/xe2: Introduce identity map for compressed pat for vram Akshata Jahagirdar
@ 2024-07-12  6:39 ` Akshata Jahagirdar
  2024-07-12 12:55   ` Nirmoy Das
  2024-07-12  6:39 ` [PATCH 5/6] drm/xe/migrate: Add kunit to test migration functionality for BMG Akshata Jahagirdar
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  6:39 UTC (permalink / raw)
  To: intel-xe
  Cc: akshatajahagirdar6, Jahagirdar, Akshata, Matthew Auld,
	Himal Prasad Ghimiray

From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>

During eviction (vram->sysmem), we use the mapping from compressed -> uncompressed.
During restore (sysmem->vram), we need the mapping from uncompressed -> uncompressed.
Handle logic for selecting the compressed identity map for eviction,
and selecting uncompressed map for restore operations.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 3b8a334fe08f..1eee6be24423 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -715,7 +715,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 	struct xe_gt *gt = m->tile->primary_gt;
 	u32 flush_flags = 0;
 
-	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
+	if (xe_device_needs_ccs_emit(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
 		/*
 		 * If the src is already in vram, then it should already
 		 * have been cleared by us, or has been populated by the
@@ -791,6 +791,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
 		xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
 	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
+	bool use_comp_pat = GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe) && src_is_vram && !dst_is_vram;
 
 	/* Copying CCS between two different BOs is not supported yet. */
 	if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
@@ -833,7 +834,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 
 		src_L0 = min(src_L0, dst_L0);
 
-		batch_size += pte_update_size(m, src_is_vram, false, src, &src_it, &src_L0,
+		batch_size += pte_update_size(m, src_is_vram, use_comp_pat, src, &src_it, &src_L0,
 					      &src_L0_ofs, &src_L0_pt, 0, 0,
 					      avail_pts);
 
@@ -852,7 +853,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 
 		/* Add copy commands size here */
 		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
-			((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
+			((xe_device_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0));
 
 		bb = xe_bb_new(gt, batch_size, usm);
 		if (IS_ERR(bb)) {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/6] drm/xe/migrate: Add kunit to test migration functionality for BMG
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
                   ` (3 preceding siblings ...)
  2024-07-12  6:39 ` [PATCH 4/6] drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx Akshata Jahagirdar
@ 2024-07-12  6:39 ` Akshata Jahagirdar
  2024-07-12  6:39 ` [PATCH 6/6] drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx Akshata Jahagirdar
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  6:39 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Jahagirdar, Akshata, Himal Prasad Ghimiray

From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>

This part of kunit verifies that
- main data is decompressed and ccs data is clear post bo eviction.
- main data is raw copied and ccs data is clear post bo restore.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 119 +++++++++++++++++++++++++-
 1 file changed, 118 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 0c9a30dac46d..e810d64b9e31 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -481,6 +481,94 @@ static struct dma_fence *blt_copy(struct xe_tile *tile,
 	return fence;
 }
 
+static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
+			 struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo,
+			 struct kunit *test)
+{
+	struct dma_fence *fence;
+	u64 expected, retval;
+	long timeout;
+	long ret;
+
+	expected = 0xd0d0d0d0d0d0d0d0;
+	xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+
+	fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
+		retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+		if (retval == expected)
+			KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
+	}
+	dma_fence_put(fence);
+
+	kunit_info(test, "Evict vram buffer object\n");
+	ret = xe_bo_evict(vram_bo, true);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to evict bo.\n");
+		return;
+	}
+
+	ret = xe_bo_vmap(vram_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+		return;
+	}
+
+	retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+	check(retval, expected, "Clear evicted vram data first value", test);
+	retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64);
+	check(retval, expected, "Clear evicted vram data last value", test);
+
+	fence = blt_copy(tile, vram_bo, ccs_bo,
+			 true, "Blit surf copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+		retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64);
+		check(retval, 0, "Clear ccs data first value", test);
+
+		retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64);
+		check(retval, 0, "Clear ccs data last value", test);
+	}
+	dma_fence_put(fence);
+
+	kunit_info(test, "Restore vram buffer object\n");
+	ret = xe_bo_validate(vram_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
+		return;
+	}
+
+	/* Sync all migration blits */
+	timeout = dma_resv_wait_timeout(vram_bo->ttm.base.resv,
+					DMA_RESV_USAGE_KERNEL,
+					true,
+					5 * HZ);
+	if (timeout <= 0) {
+		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
+		return;
+	}
+
+	ret = xe_bo_vmap(vram_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+		return;
+	}
+
+	retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+	check(retval, expected, "Restored value must be equal to initial value", test);
+	retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64);
+	check(retval, expected, "Restored value must be equal to initial value", test);
+
+	fence = blt_copy(tile, vram_bo, ccs_bo,
+			 true, "Blit surf copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+		retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64);
+		check(retval, 0, "Clear ccs data first value", test);
+		retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64);
+		check(retval, 0, "Clear ccs data last value", test);
+	}
+	dma_fence_put(fence);
+}
+
 static void test_clear(struct xe_device *xe, struct xe_tile *tile,
 		       struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
 {
@@ -535,7 +623,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile,
 static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
 				       struct kunit *test)
 {
-	struct xe_bo *sys_bo, *vram_bo;
+	struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL;
 	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
 	long ret;
 
@@ -563,6 +651,29 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
 	}
 	xe_bo_unlock(sys_bo);
 
+	ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+				   ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+
+	if (IS_ERR(ccs_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(ccs_bo));
+		return;
+	}
+
+	xe_bo_lock(ccs_bo, false);
+	ret = xe_bo_validate(ccs_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
+		goto out_unlock;
+	}
+
+	ret = xe_bo_vmap(ccs_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
+		goto out_unlock;
+	}
+	xe_bo_unlock(ccs_bo);
+
 	vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
 				    ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
 	if (IS_ERR(vram_bo)) {
@@ -585,17 +696,23 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
 	}
 
 	test_clear(xe, tile, sys_bo, vram_bo, test);
+	test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test);
 	xe_bo_unlock(vram_bo);
 
 	xe_bo_lock(vram_bo, false);
 	xe_bo_vunmap(vram_bo);
 	xe_bo_unlock(vram_bo);
 
+	xe_bo_lock(ccs_bo, false);
+	xe_bo_vunmap(ccs_bo);
+	xe_bo_unlock(ccs_bo);
+
 	xe_bo_lock(sys_bo, false);
 	xe_bo_vunmap(sys_bo);
 	xe_bo_unlock(sys_bo);
 out_put:
 	xe_bo_put(vram_bo);
+	xe_bo_put(ccs_bo);
 	xe_bo_put(sys_bo);
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 6/6] drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
                   ` (4 preceding siblings ...)
  2024-07-12  6:39 ` [PATCH 5/6] drm/xe/migrate: Add kunit to test migration functionality for BMG Akshata Jahagirdar
@ 2024-07-12  6:39 ` Akshata Jahagirdar
  2024-07-12  6:44 ` ✓ CI.Patch_applied: success for Implement compression support on BMG Patchwork
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  6:39 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Jahagirdar, Akshata, Himal Prasad Ghimiray

From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>

In xe2+ dgfx, we don't need to handle the copying of ccs
metadata during migration. This test validates the ccs data post
clear and copy during evict/restore operation. Thus, we can skip
this test on xe2+ dgfx.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 9f3c02826464..f7ef91b9a5d2 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -163,6 +163,12 @@ static int ccs_test_run_device(struct xe_device *xe)
 		return 0;
 	}
 
+	/* For xe2+ dgfx, we don't handle ccs metadata */
+	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) {
+		kunit_info(test, "Skipping on xe2+ dgfx device.\n");
+		return 0;
+	}
+
 	xe_pm_runtime_get(xe);
 
 	for_each_tile(tile, xe, id) {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* ✓ CI.Patch_applied: success for Implement compression support on BMG
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
                   ` (5 preceding siblings ...)
  2024-07-12  6:39 ` [PATCH 6/6] drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx Akshata Jahagirdar
@ 2024-07-12  6:44 ` Patchwork
  2024-07-12  6:45 ` ✗ CI.checkpatch: warning " Patchwork
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2024-07-12  6:44 UTC (permalink / raw)
  To: Akshata Jahagirdar; +Cc: intel-xe

== Series Details ==

Series: Implement compression support on BMG
URL   : https://patchwork.freedesktop.org/series/136023/
State : success

== Summary ==

=== Applying kernel patches on branch 'drm-tip' with base: ===
Base commit: 0b9174f23743 drm-tip: 2024y-07m-12d-04h-04m-44s UTC integration manifest
=== git am output follows ===
Applying: drm/xe/migrate: Handle clear ccs logic for xe2 dgfx
Applying: drm/xe/migrate: Add kunit to test clear functionality
Applying: drm/xe/xe2: Introduce identity map for compressed pat for vram
Applying: drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx
Applying: drm/xe/migrate: Add kunit to test migration functionality for BMG
Applying: drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx



^ permalink raw reply	[flat|nested] 22+ messages in thread

* ✗ CI.checkpatch: warning for Implement compression support on BMG
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
                   ` (6 preceding siblings ...)
  2024-07-12  6:44 ` ✓ CI.Patch_applied: success for Implement compression support on BMG Patchwork
@ 2024-07-12  6:45 ` Patchwork
  2024-07-12  6:45 ` ✗ CI.KUnit: failure " Patchwork
  2024-07-12  7:24 ` [PATCH 0/6] " Akshata Jahagirdar
  9 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2024-07-12  6:45 UTC (permalink / raw)
  To: Akshata Jahagirdar; +Cc: intel-xe

== Series Details ==

Series: Implement compression support on BMG
URL   : https://patchwork.freedesktop.org/series/136023/
State : warning

== Summary ==

+ KERNEL=/kernel
+ git clone https://gitlab.freedesktop.org/drm/maintainer-tools mt
Cloning into 'mt'...
warning: redirecting to https://gitlab.freedesktop.org/drm/maintainer-tools.git/
+ git -C mt rev-list -n1 origin/master
51ce9f6cd981d42d7467409d7dbc559a450abc1e
+ cd /kernel
+ git config --global --add safe.directory /kernel
+ git log -n1
commit 10b21c37e888b840444931e81111a41ab13cf420
Author: Jahagirdar, Akshata <akshata.jahagirdar@intel.com>
Date:   Fri Jul 12 06:39:25 2024 +0000

    drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx
    
    In xe2+ dgfx, we don't need to handle the copying of ccs
    metadata during migration. This test validates the ccs data post
    clear and copy during evict/restore operation. Thus, we can skip
    this test on xe2+ dgfx.
    
    Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
    Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
+ /mt/dim checkpatch 0b9174f237436b60b1c6fd9d5fbacbfcde660e3e drm-intel
eeb7861be028 drm/xe/migrate: Handle clear ccs logic for xe2 dgfx
-:58: WARNING:FROM_SIGN_OFF_MISMATCH: From:/Signed-off-by: email name mismatch: 'From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>' != 'Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>'

total: 0 errors, 1 warnings, 0 checks, 35 lines checked
288027bbf066 drm/xe/migrate: Add kunit to test clear functionality
-:317: WARNING:FROM_SIGN_OFF_MISMATCH: From:/Signed-off-by: email name mismatch: 'From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>' != 'Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>'

total: 0 errors, 1 warnings, 0 checks, 288 lines checked
0909884d6cb7 drm/xe/xe2: Introduce identity map for compressed pat for vram
85980848175d drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx
-:6: WARNING:COMMIT_LOG_LONG_LINE: Prefer a maximum 75 chars per line (possible unwrapped commit description?)
#6: 
During eviction (vram->sysmem), we use the mapping from compressed -> uncompressed.

-:53: WARNING:FROM_SIGN_OFF_MISMATCH: From:/Signed-off-by: email name mismatch: 'From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>' != 'Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>'

total: 0 errors, 2 warnings, 0 checks, 31 lines checked
efabe00da866 drm/xe/migrate: Add kunit to test migration functionality for BMG
-:175: WARNING:FROM_SIGN_OFF_MISMATCH: From:/Signed-off-by: email name mismatch: 'From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>' != 'Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>'

total: 0 errors, 1 warnings, 0 checks, 154 lines checked
10b21c37e888 drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx
-:30: WARNING:FROM_SIGN_OFF_MISMATCH: From:/Signed-off-by: email name mismatch: 'From: "Jahagirdar, Akshata" <akshata.jahagirdar@intel.com>' != 'Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>'

total: 0 errors, 1 warnings, 0 checks, 12 lines checked



^ permalink raw reply	[flat|nested] 22+ messages in thread

* ✗ CI.KUnit: failure for Implement compression support on BMG
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
                   ` (7 preceding siblings ...)
  2024-07-12  6:45 ` ✗ CI.checkpatch: warning " Patchwork
@ 2024-07-12  6:45 ` Patchwork
  2024-07-12  7:24 ` [PATCH 0/6] " Akshata Jahagirdar
  9 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2024-07-12  6:45 UTC (permalink / raw)
  To: Akshata Jahagirdar; +Cc: intel-xe

== Series Details ==

Series: Implement compression support on BMG
URL   : https://patchwork.freedesktop.org/series/136023/
State : failure

== Summary ==

+ trap cleanup EXIT
+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/xe/.kunitconfig
ERROR:root:../drivers/gpu/drm/xe/xe_migrate.c: In function ‘xe_migrate_ccs_copy’:
../drivers/gpu/drm/xe/xe_migrate.c:718:6: error: implicit declaration of function ‘xe_device_needs_ccs_emit’; did you mean ‘xe_migrate_needs_ccs_emit’? [-Werror=implicit-function-declaration]
  718 |  if (xe_device_needs_ccs_emit(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
      |      ^~~~~~~~~~~~~~~~~~~~~~~~
      |      xe_migrate_needs_ccs_emit
In file included from ../drivers/gpu/drm/xe/xe_migrate.c:1536:
../drivers/gpu/drm/xe/tests/xe_migrate.c: In function ‘validate_ccs_test_run_tile’:
../drivers/gpu/drm/xe/tests/xe_migrate.c:673:3: error: label ‘out_unlock’ used but not defined
  673 |   goto out_unlock;
      |   ^~~~
cc1: some warnings being treated as errors
make[7]: *** [../scripts/Makefile.build:244: drivers/gpu/drm/xe/xe_migrate.o] Error 1
make[7]: *** Waiting for unfinished jobs....
../lib/iomap.c:156:5: warning: no previous prototype for ‘ioread64_lo_hi’ [-Wmissing-prototypes]
  156 | u64 ioread64_lo_hi(const void __iomem *addr)
      |     ^~~~~~~~~~~~~~
../lib/iomap.c:163:5: warning: no previous prototype for ‘ioread64_hi_lo’ [-Wmissing-prototypes]
  163 | u64 ioread64_hi_lo(const void __iomem *addr)
      |     ^~~~~~~~~~~~~~
../lib/iomap.c:170:5: warning: no previous prototype for ‘ioread64be_lo_hi’ [-Wmissing-prototypes]
  170 | u64 ioread64be_lo_hi(const void __iomem *addr)
      |     ^~~~~~~~~~~~~~~~
../lib/iomap.c:178:5: warning: no previous prototype for ‘ioread64be_hi_lo’ [-Wmissing-prototypes]
  178 | u64 ioread64be_hi_lo(const void __iomem *addr)
      |     ^~~~~~~~~~~~~~~~
../lib/iomap.c:264:6: warning: no previous prototype for ‘iowrite64_lo_hi’ [-Wmissing-prototypes]
  264 | void iowrite64_lo_hi(u64 val, void __iomem *addr)
      |      ^~~~~~~~~~~~~~~
../lib/iomap.c:272:6: warning: no previous prototype for ‘iowrite64_hi_lo’ [-Wmissing-prototypes]
  272 | void iowrite64_hi_lo(u64 val, void __iomem *addr)
      |      ^~~~~~~~~~~~~~~
../lib/iomap.c:280:6: warning: no previous prototype for ‘iowrite64be_lo_hi’ [-Wmissing-prototypes]
  280 | void iowrite64be_lo_hi(u64 val, void __iomem *addr)
      |      ^~~~~~~~~~~~~~~~~
../lib/iomap.c:288:6: warning: no previous prototype for ‘iowrite64be_hi_lo’ [-Wmissing-prototypes]
  288 | void iowrite64be_hi_lo(u64 val, void __iomem *addr)
      |      ^~~~~~~~~~~~~~~~~
make[6]: *** [../scripts/Makefile.build:485: drivers/gpu/drm/xe] Error 2
make[5]: *** [../scripts/Makefile.build:485: drivers/gpu/drm] Error 2
make[4]: *** [../scripts/Makefile.build:485: drivers/gpu] Error 2
make[3]: *** [../scripts/Makefile.build:485: drivers] Error 2
make[3]: *** Waiting for unfinished jobs....
make[2]: *** [/kernel/Makefile:1934: .] Error 2
make[1]: *** [/kernel/Makefile:240: __sub-make] Error 2
make: *** [Makefile:240: __sub-make] Error 2

[06:45:13] Configuring KUnit Kernel ...
Generating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[06:45:17] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make ARCH=um O=.kunit --jobs=48
+ cleanup
++ stat -c %u:%g /kernel
+ chown -R 1003:1003 /kernel



^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 0/6] Implement compression support on BMG
  2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
                   ` (8 preceding siblings ...)
  2024-07-12  6:45 ` ✗ CI.KUnit: failure " Patchwork
@ 2024-07-12  7:24 ` Akshata Jahagirdar
  9 siblings, 0 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  7:24 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Akshata Jahagirdar

Xe2+ has unified compression (exactly one compression mode/format),
where compression is now controlled via PAT at PTE level.
This simplifies KMD operations, as it can now decompress freely
without concern for the buffer's original compression format—unlike DG2,
which had multiple compression formats and thus required copying the
raw CCS state during VRAM eviction. In addition mixed VRAM and system
memory buffers were not supported with compression enabled.

On Xe2 dGPU compression is still only supported with VRAM, however we
can now support compression with VRAM and system memory buffers,
with GPU access being seamless underneath. So long as when doing
VRAM -> system memory the KMD uses compressed -> uncompressed,
to decompress it. This also allows CPU access to such buffers,
assuming that userspace first decompress the corresponding
pages being accessed.
If the pages are already in system memory then KMD would have already
decompressed them. When restoring such buffers with sysmem -> VRAM
the KMD can't easily know which pages were originally compressed,
so we always use uncompressed -> uncompressed here.
With this it also means we can drop all the raw CCS handling on such
platforms (including needing to allocate extra CCS storage).

In order to support this we now need to have two different identity
mappings for compressed and uncompressed VRAM.
In this patch, we set up the additional identity map for the VRAM with
compressed pat_index. We then select the appropriate mapping during
migration/clear. During eviction (vram->sysmem), we use the mapping
from compressed -> uncompressed. During restore (sysmem->vram), we need
the mapping from uncompressed -> uncompressed.
Therefore, we need to have two different mappings for compressed and
uncompressed vram. We set up an additional identity map for the vram
with compressed pat_index.
We then select the appropriate mapping during migration/clear.

Akshata Jahagirdar (6):
  drm/xe/xe2: Introduce identity map for compressed pat for vram
  drm/xe/migrate: Handle clear ccs logic for xe2 dgfx
  drm/xe/migrate: Add kunit to test clear functionality
  drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx
  drm/xe/migrate: Add kunit to test migration functionality for BMG
  drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx

 drivers/gpu/drm/xe/tests/xe_bo.c           |   6 +
 drivers/gpu/drm/xe/tests/xe_migrate.c      | 389 +++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
 drivers/gpu/drm/xe/xe_migrate.c            |  91 ++++-
 5 files changed, 470 insertions(+), 18 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
       [not found] <cover.1720768378.git.akshata.jahagirdar@intel.com>
@ 2024-07-12  7:24 ` Akshata Jahagirdar
  0 siblings, 0 replies; 22+ messages in thread
From: Akshata Jahagirdar @ 2024-07-12  7:24 UTC (permalink / raw)
  To: intel-xe; +Cc: akshatajahagirdar6, Akshata Jahagirdar, Himal Prasad Ghimiray

This test verifies if the main and ccs data are cleared during bo creation.
The motivation to use Kunit instead of IGT is that, although we can verify
whether the data is zero following bo creation,
we cannot confirm whether the zero value after bo creation is the result of
our clear function or simply because the initial data present was zero.

Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c      | 272 +++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
 3 files changed, 274 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 962f6438e219..0c9a30dac46d 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -359,3 +359,275 @@ void xe_migrate_sanity_kunit(struct kunit *test)
 	xe_call_for_each_device(migrate_test_run_device);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
+
+static struct dma_fence *blt_copy(struct xe_tile *tile,
+				  struct xe_bo *src_bo, struct xe_bo *dst_bo,
+				  bool copy_only_ccs, const char *str, struct kunit *test)
+{
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_migrate *m = tile->migrate;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = src_bo->size;
+	struct xe_res_cursor src_it, dst_it;
+	struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int err;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+	else
+		xe_res_first(src, 0, size, &src_it);
+
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
+	else
+		xe_res_first(dst, 0, size, &dst_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags = 0;
+		u32 update_idx;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		src_L0 = xe_migrate_res_sizes(m, &src_it);
+		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, src_is_vram, src, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      avail_pts);
+
+		batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, dst, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      avail_pts, avail_pts);
+
+		/* Add copy commands size here */
+		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
+
+		bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (src_is_vram)
+			xe_res_next(&src_it, src_L0);
+		else
+			emit_pte(m, bb, src_L0_pt, src_is_vram, false,
+				 &src_it, src_L0, src);
+
+		if (dst_is_vram)
+			xe_res_next(&dst_it, src_L0);
+		else
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
+				 &dst_it, src_L0, dst);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+		if (!copy_only_ccs)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		if (copy_only_ccs)
+			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+							  src_is_vram, dst_L0_ofs,
+							  dst_is_vram, src_L0, dst_L0_ofs,
+							  copy_only_ccs);
+
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, xe->info.has_usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+
+		mutex_lock(&m->job_mutex);
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err:
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void test_clear(struct xe_device *xe, struct xe_tile *tile,
+		       struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
+{
+	struct dma_fence *fence;
+	u64 expected, retval;
+
+	expected = 0xd0d0d0d0d0d0d0d0;
+	xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+
+	fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
+		retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+		if (retval == expected)
+			KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Decompressed value must be equal to initial value", test);
+	}
+	dma_fence_put(fence);
+
+	kunit_info(test, "Clear vram buffer object\n");
+	expected = 0x0000000000000000;
+	fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+			 false, "Blit copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear main buffer first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear main buffer last value", test);
+	}
+	dma_fence_put(fence);
+
+	fence = blt_copy(tile, vram_bo, sys_bo,
+			 true, "Blit surf copy from vram to sysmem", test);
+	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+		check(retval, expected, "Clear ccs data first value", test);
+		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+		check(retval, expected, "Clear ccs data last value", test);
+	}
+	dma_fence_put(fence);
+}
+
+static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
+				       struct kunit *test)
+{
+	struct xe_bo *sys_bo, *vram_bo;
+	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+	long ret;
+
+	sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+				   DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device,
+				   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+
+	if (IS_ERR(sys_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(sys_bo));
+		return;
+	}
+
+	xe_bo_lock(sys_bo, false);
+	ret = xe_bo_validate(sys_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
+		goto out_put;
+	}
+
+	ret = xe_bo_vmap(sys_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
+		goto out_put;
+	}
+	xe_bo_unlock(sys_bo);
+
+	vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+				    ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+	if (IS_ERR(vram_bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(vram_bo));
+		return;
+	}
+
+	xe_bo_lock(vram_bo, false);
+	ret = xe_bo_validate(vram_bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
+		goto out_put;
+	}
+
+	ret = xe_bo_vmap(vram_bo);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+		goto out_put;
+	}
+
+	test_clear(xe, tile, sys_bo, vram_bo, test);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(vram_bo, false);
+	xe_bo_vunmap(vram_bo);
+	xe_bo_unlock(vram_bo);
+
+	xe_bo_lock(sys_bo, false);
+	xe_bo_vunmap(sys_bo);
+	xe_bo_unlock(sys_bo);
+out_put:
+	xe_bo_put(vram_bo);
+	xe_bo_put(sys_bo);
+}
+
+static int validate_ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
+		kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	xe_pm_runtime_get(xe);
+
+	for_each_tile(tile, xe, id)
+		validate_ccs_test_run_tile(xe, tile, test);
+
+	xe_pm_runtime_put(xe);
+
+	return 0;
+}
+
+void xe_validate_ccs_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(validate_ccs_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index eb0d8963419c..49b9ef060ad3 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -9,6 +9,7 @@
 
 static struct kunit_case xe_migrate_tests[] = {
 	KUNIT_CASE(xe_migrate_sanity_kunit),
+	KUNIT_CASE(xe_validate_ccs_kunit),
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
index 7c645c66824f..dc0ecfdfad39 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -9,5 +9,6 @@
 struct kunit;
 
 void xe_migrate_sanity_kunit(struct kunit *test);
+void xe_validate_ccs_kunit(struct kunit *test);
 
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/6] drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx
  2024-07-12  6:39 ` [PATCH 4/6] drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx Akshata Jahagirdar
@ 2024-07-12 12:55   ` Nirmoy Das
  2024-07-15 20:56     ` Jahagirdar, Akshata
  0 siblings, 1 reply; 22+ messages in thread
From: Nirmoy Das @ 2024-07-12 12:55 UTC (permalink / raw)
  To: Akshata Jahagirdar, intel-xe
  Cc: akshatajahagirdar6, Matthew Auld, Himal Prasad Ghimiray

[-- Attachment #1: Type: text/plain, Size: 2874 bytes --]

Hi Akshata,

On 7/12/2024 8:39 AM, Akshata Jahagirdar wrote:
> From: "Jahagirdar, Akshata"<akshata.jahagirdar@intel.com>
>
> During eviction (vram->sysmem), we use the mapping from compressed -> uncompressed.
> During restore (sysmem->vram), we need the mapping from uncompressed -> uncompressed.
> Handle logic for selecting the compressed identity map for eviction,
> and selecting uncompressed map for restore operations.
>
> Signed-off-by: Akshata Jahagirdar<akshata.jahagirdar@intel.com>
> Reviewed-by: Matthew Auld<matthew.auld@intel.com>
> Reviewed-by: Himal Prasad Ghimiray<himal.prasad.ghimiray@intel.com>
> ---
>   drivers/gpu/drm/xe/xe_migrate.c | 7 ++++---
>   1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 3b8a334fe08f..1eee6be24423 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -715,7 +715,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
>   	struct xe_gt *gt = m->tile->primary_gt;
>   	u32 flush_flags = 0;
>   
> -	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
> +	if (xe_device_needs_ccs_emit(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {


Do we need to call xe_migrate_ccs_copy() when xe_device_needs_ccs_emit() 
is true ? If not then move the check for xe_device_needs_ccs_emit() to 
xe_migrate_copy()

to ensure that xe_migrate_ccs_copy() is not even called on such platforms.

Regards,
Nirmoy

>   		/*
>   		 * If the src is already in vram, then it should already
>   		 * have been cleared by us, or has been populated by the
> @@ -791,6 +791,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>   	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
>   		xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
>   	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
> +	bool use_comp_pat = GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe) && src_is_vram && !dst_is_vram;
>   
>   	/* Copying CCS between two different BOs is not supported yet. */
>   	if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
> @@ -833,7 +834,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>   
>   		src_L0 = min(src_L0, dst_L0);
>   
> -		batch_size += pte_update_size(m, src_is_vram, false, src, &src_it, &src_L0,
> +		batch_size += pte_update_size(m, src_is_vram, use_comp_pat, src, &src_it, &src_L0,
>   					      &src_L0_ofs, &src_L0_pt, 0, 0,
>   					      avail_pts);
>   
> @@ -852,7 +853,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>   
>   		/* Add copy commands size here */
>   		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
> -			((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
> +			((xe_device_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0));
>   
>   		bb = xe_bb_new(gt, batch_size, usm);
>   		if (IS_ERR(bb)) {

[-- Attachment #2: Type: text/html, Size: 3970 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
  2024-07-12  6:39 ` [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality Akshata Jahagirdar
@ 2024-07-12 12:56   ` Nirmoy Das
  0 siblings, 0 replies; 22+ messages in thread
From: Nirmoy Das @ 2024-07-12 12:56 UTC (permalink / raw)
  To: Akshata Jahagirdar, intel-xe; +Cc: akshatajahagirdar6, Himal Prasad Ghimiray

[-- Attachment #1: Type: text/plain, Size: 10639 bytes --]


On 7/12/2024 8:39 AM, Akshata Jahagirdar wrote:
> From: "Jahagirdar, Akshata"<akshata.jahagirdar@intel.com>
>
> This test verifies if the main and ccs data are cleared during bo creation.
> The motivation to use Kunit instead of IGT is that, although we can verify
> whether the data is zero following bo creation,
> we cannot confirm whether the zero value after bo creation is the result of
> our clear function or simply because the initial data present was zero.
>
> Signed-off-by: Akshata Jahagirdar<akshata.jahagirdar@intel.com>
> Reviewed-by: Himal Prasad Ghimiray<himal.prasad.ghimiray@intel.com>
> ---
>   drivers/gpu/drm/xe/tests/xe_migrate.c      | 272 +++++++++++++++++++++
>   drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
>   drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
>   3 files changed, 274 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index 962f6438e219..0c9a30dac46d 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -359,3 +359,275 @@ void xe_migrate_sanity_kunit(struct kunit *test)
>   	xe_call_for_each_device(migrate_test_run_device);
>   }
>   EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
> +
> +static struct dma_fence *blt_copy(struct xe_tile *tile,
> +				  struct xe_bo *src_bo, struct xe_bo *dst_bo,
> +				  bool copy_only_ccs, const char *str, struct kunit *test)
> +{
> +	struct xe_gt *gt = tile->primary_gt;
> +	struct xe_migrate *m = tile->migrate;
> +	struct xe_device *xe = gt_to_xe(gt);
> +	struct dma_fence *fence = NULL;
> +	u64 size = src_bo->size;
> +	struct xe_res_cursor src_it, dst_it;
> +	struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
> +	u64 src_L0_ofs, dst_L0_ofs;
> +	u32 src_L0_pt, dst_L0_pt;
> +	u64 src_L0, dst_L0;
> +	int err;
> +	bool src_is_vram = mem_type_is_vram(src->mem_type);
> +	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
> +
> +	if (!src_is_vram)
> +		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
> +	else
> +		xe_res_first(src, 0, size, &src_it);
> +
> +	if (!dst_is_vram)
> +		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
> +	else
> +		xe_res_first(dst, 0, size, &dst_it);
> +
> +	while (size) {
> +		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
> +		struct xe_sched_job *job;
> +		struct xe_bb *bb;
> +		u32 flush_flags = 0;
> +		u32 update_idx;
> +		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
> +
> +		src_L0 = xe_migrate_res_sizes(m, &src_it);
> +		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
> +
> +		src_L0 = min(src_L0, dst_L0);
> +
> +		batch_size += pte_update_size(m, src_is_vram, src_is_vram, src, &src_it, &src_L0,
> +					      &src_L0_ofs, &src_L0_pt, 0, 0,
> +					      avail_pts);
> +
> +		batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, dst, &dst_it, &src_L0,
> +					      &dst_L0_ofs, &dst_L0_pt, 0,
> +					      avail_pts, avail_pts);
> +
> +		/* Add copy commands size here */
> +		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
> +			((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
> +
> +		bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
> +		if (IS_ERR(bb)) {
> +			err = PTR_ERR(bb);
> +			goto err_sync;
> +		}
> +
> +		if (src_is_vram)
> +			xe_res_next(&src_it, src_L0);
> +		else
> +			emit_pte(m, bb, src_L0_pt, src_is_vram, false,
> +				 &src_it, src_L0, src);
> +
> +		if (dst_is_vram)
> +			xe_res_next(&dst_it, src_L0);
> +		else
> +			emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
> +				 &dst_it, src_L0, dst);
> +
> +		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
> +		update_idx = bb->len;
> +		if (!copy_only_ccs)
> +			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
> +
> +		if (copy_only_ccs)
> +			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
> +							  src_is_vram, dst_L0_ofs,
> +							  dst_is_vram, src_L0, dst_L0_ofs,
> +							  copy_only_ccs);
> +
> +		job = xe_bb_create_migration_job(m->q, bb,
> +						 xe_migrate_batch_base(m, xe->info.has_usm),
> +						 update_idx);
> +		if (IS_ERR(job)) {
> +			err = PTR_ERR(job);
> +			goto err;
> +		}
> +
> +		xe_sched_job_add_migrate_flush(job, flush_flags);
> +
> +		mutex_lock(&m->job_mutex);
> +		xe_sched_job_arm(job);
> +		dma_fence_put(fence);
> +		fence = dma_fence_get(&job->drm.s_fence->finished);
> +		xe_sched_job_push(job);
> +
> +		dma_fence_put(m->fence);
> +		m->fence = dma_fence_get(fence);
> +
> +		mutex_unlock(&m->job_mutex);
> +
> +		xe_bb_free(bb, fence);
> +		size -= src_L0;
> +		continue;
> +
> +err:
> +		xe_bb_free(bb, NULL);
> +
> +err_sync:
> +		if (fence) {
> +			dma_fence_wait(fence, false);
> +			dma_fence_put(fence);
> +		}
> +		return ERR_PTR(err);
> +	}
> +
> +	return fence;
> +}
> +
> +static void test_clear(struct xe_device *xe, struct xe_tile *tile,
> +		       struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
> +{
> +	struct dma_fence *fence;
> +	u64 expected, retval;
> +
> +	expected = 0xd0d0d0d0d0d0d0d0;
> +	xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
> +
> +	fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
> +	if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
> +		retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
> +		if (retval == expected)
> +			KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
> +	}
> +	dma_fence_put(fence);
> +
> +	fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test);
> +	if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
> +		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
> +		check(retval, expected, "Decompressed value must be equal to initial value", test);
> +		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
> +		check(retval, expected, "Decompressed value must be equal to initial value", test);
> +	}
> +	dma_fence_put(fence);
> +
> +	kunit_info(test, "Clear vram buffer object\n");
> +	expected = 0x0000000000000000;
> +	fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);


Missing dma_fence_put(). With that

Acked-by: Nirmoy Das <nirmoy.das@intel.com>

> +
> +	fence = blt_copy(tile, vram_bo, sys_bo,
> +			 false, "Blit copy from vram to sysmem", test);
> +	if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
> +		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
> +		check(retval, expected, "Clear main buffer first value", test);
> +		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
> +		check(retval, expected, "Clear main buffer last value", test);
> +	}
> +	dma_fence_put(fence);
> +
> +	fence = blt_copy(tile, vram_bo, sys_bo,
> +			 true, "Blit surf copy from vram to sysmem", test);
> +	if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
> +		retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
> +		check(retval, expected, "Clear ccs data first value", test);
> +		retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
> +		check(retval, expected, "Clear ccs data last value", test);
> +	}
> +	dma_fence_put(fence);
> +}
> +
> +static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
> +				       struct kunit *test)
> +{
> +	struct xe_bo *sys_bo, *vram_bo;
> +	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
> +	long ret;
> +
> +	sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
> +				   DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device,
> +				   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
> +
> +	if (IS_ERR(sys_bo)) {
> +		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
> +			   PTR_ERR(sys_bo));
> +		return;
> +	}
> +
> +	xe_bo_lock(sys_bo, false);
> +	ret = xe_bo_validate(sys_bo, NULL, false);
> +	if (ret) {
> +		KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
> +		goto out_put;
> +	}
> +
> +	ret = xe_bo_vmap(sys_bo);
> +	if (ret) {
> +		KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
> +		goto out_put;
> +	}
> +	xe_bo_unlock(sys_bo);
> +
> +	vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
> +				    ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
> +	if (IS_ERR(vram_bo)) {
> +		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
> +			   PTR_ERR(vram_bo));
> +		return;
> +	}
> +
> +	xe_bo_lock(vram_bo, false);
> +	ret = xe_bo_validate(vram_bo, NULL, false);
> +	if (ret) {
> +		KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
> +		goto out_put;
> +	}
> +
> +	ret = xe_bo_vmap(vram_bo);
> +	if (ret) {
> +		KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
> +		goto out_put;
> +	}
> +
> +	test_clear(xe, tile, sys_bo, vram_bo, test);
> +	xe_bo_unlock(vram_bo);
> +
> +	xe_bo_lock(vram_bo, false);
> +	xe_bo_vunmap(vram_bo);
> +	xe_bo_unlock(vram_bo);
> +
> +	xe_bo_lock(sys_bo, false);
> +	xe_bo_vunmap(sys_bo);
> +	xe_bo_unlock(sys_bo);
> +out_put:
> +	xe_bo_put(vram_bo);
> +	xe_bo_put(sys_bo);
> +}
> +
> +static int validate_ccs_test_run_device(struct xe_device *xe)
> +{
> +	struct kunit *test = xe_cur_kunit();
> +	struct xe_tile *tile;
> +	int id;
> +
> +	if (!xe_device_has_flat_ccs(xe)) {
> +		kunit_info(test, "Skipping non-flat-ccs device.\n");
> +		return 0;
> +	}
> +
> +	if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
> +		kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
> +			   dev_name(xe->drm.dev));
> +		return 0;
> +	}
> +
> +	xe_pm_runtime_get(xe);
> +
> +	for_each_tile(tile, xe, id)
> +		validate_ccs_test_run_tile(xe, tile, test);
> +
> +	xe_pm_runtime_put(xe);
> +
> +	return 0;
> +}
> +
> +void xe_validate_ccs_kunit(struct kunit *test)
> +{
> +	xe_call_for_each_device(validate_ccs_test_run_device);
> +}
> +EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
> index eb0d8963419c..49b9ef060ad3 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
> @@ -9,6 +9,7 @@
>   
>   static struct kunit_case xe_migrate_tests[] = {
>   	KUNIT_CASE(xe_migrate_sanity_kunit),
> +	KUNIT_CASE(xe_validate_ccs_kunit),
>   	{}
>   };
>   
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
> index 7c645c66824f..dc0ecfdfad39 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
> @@ -9,5 +9,6 @@
>   struct kunit;
>   
>   void xe_migrate_sanity_kunit(struct kunit *test);
> +void xe_validate_ccs_kunit(struct kunit *test);
>   
>   #endif

[-- Attachment #2: Type: text/html, Size: 11291 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/6] drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx
  2024-07-12 12:55   ` Nirmoy Das
@ 2024-07-15 20:56     ` Jahagirdar, Akshata
  2024-07-16  8:12       ` Nirmoy Das
  0 siblings, 1 reply; 22+ messages in thread
From: Jahagirdar, Akshata @ 2024-07-15 20:56 UTC (permalink / raw)
  To: Nirmoy Das, intel-xe
  Cc: akshatajahagirdar6, Matthew Auld, Himal Prasad Ghimiray

[-- Attachment #1: Type: text/plain, Size: 3191 bytes --]


On 7/12/2024 5:55 AM, Nirmoy Das wrote:
>
> Hi Akshata,
>
> On 7/12/2024 8:39 AM, Akshata Jahagirdar wrote:
>> From: "Jahagirdar, Akshata"<akshata.jahagirdar@intel.com>
>>
>> During eviction (vram->sysmem), we use the mapping from compressed -> uncompressed.
>> During restore (sysmem->vram), we need the mapping from uncompressed -> uncompressed.
>> Handle logic for selecting the compressed identity map for eviction,
>> and selecting uncompressed map for restore operations.
>>
>> Signed-off-by: Akshata Jahagirdar<akshata.jahagirdar@intel.com>
>> Reviewed-by: Matthew Auld<matthew.auld@intel.com>
>> Reviewed-by: Himal Prasad Ghimiray<himal.prasad.ghimiray@intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_migrate.c | 7 ++++---
>>   1 file changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>> index 3b8a334fe08f..1eee6be24423 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -715,7 +715,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
>>   	struct xe_gt *gt = m->tile->primary_gt;
>>   	u32 flush_flags = 0;
>>   
>> -	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
>> +	if (xe_device_needs_ccs_emit(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
>
>
> Do we need to call xe_migrate_ccs_copy() when 
> xe_device_needs_ccs_emit() is true ? If not then move the check for 
> xe_device_needs_ccs_emit() to xe_migrate_copy()
>
> to ensure that xe_migrate_ccs_copy() is not even called on such platforms.
>
> Regards,
> Nirmoy

xe_device_needs_ccs_emit() is true for any non-BMG platform that has 
flat-ccs enabled.
In such cases, we do need to call xe_migrate_ccs_copy() while clearing 
any ccs metadata in vram.

BR,
Akshata

>>   		/*
>>   		 * If the src is already in vram, then it should already
>>   		 * have been cleared by us, or has been populated by the
>> @@ -791,6 +791,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>>   	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
>>   		xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
>>   	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
>> +	bool use_comp_pat = GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe) && src_is_vram && !dst_is_vram;
>>   
>>   	/* Copying CCS between two different BOs is not supported yet. */
>>   	if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
>> @@ -833,7 +834,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>>   
>>   		src_L0 = min(src_L0, dst_L0);
>>   
>> -		batch_size += pte_update_size(m, src_is_vram, false, src, &src_it, &src_L0,
>> +		batch_size += pte_update_size(m, src_is_vram, use_comp_pat, src, &src_it, &src_L0,
>>   					      &src_L0_ofs, &src_L0_pt, 0, 0,
>>   					      avail_pts);
>>   
>> @@ -852,7 +853,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>>   
>>   		/* Add copy commands size here */
>>   		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
>> -			((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
>> +			((xe_device_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0));
>>   
>>   		bb = xe_bb_new(gt, batch_size, usm);
>>   		if (IS_ERR(bb)) {

[-- Attachment #2: Type: text/html, Size: 4709 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/6] drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx
  2024-07-15 20:56     ` Jahagirdar, Akshata
@ 2024-07-16  8:12       ` Nirmoy Das
  0 siblings, 0 replies; 22+ messages in thread
From: Nirmoy Das @ 2024-07-16  8:12 UTC (permalink / raw)
  To: Jahagirdar, Akshata, intel-xe
  Cc: akshatajahagirdar6, Matthew Auld, Himal Prasad Ghimiray

[-- Attachment #1: Type: text/plain, Size: 3896 bytes --]

Hi Akshata,

On 7/15/2024 10:56 PM, Jahagirdar, Akshata wrote:
>
>
> On 7/12/2024 5:55 AM, Nirmoy Das wrote:
>>
>> Hi Akshata,
>>
>> On 7/12/2024 8:39 AM, Akshata Jahagirdar wrote:
>>> From: "Jahagirdar, Akshata"<akshata.jahagirdar@intel.com>
>>>
>>> During eviction (vram->sysmem), we use the mapping from compressed -> uncompressed.
>>> During restore (sysmem->vram), we need the mapping from uncompressed -> uncompressed.
>>> Handle logic for selecting the compressed identity map for eviction,
>>> and selecting uncompressed map for restore operations.
>>>
>>> Signed-off-by: Akshata Jahagirdar<akshata.jahagirdar@intel.com>
>>> Reviewed-by: Matthew Auld<matthew.auld@intel.com>
>>> Reviewed-by: Himal Prasad Ghimiray<himal.prasad.ghimiray@intel.com>
>>> ---
>>>   drivers/gpu/drm/xe/xe_migrate.c | 7 ++++---
>>>   1 file changed, 4 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>>> index 3b8a334fe08f..1eee6be24423 100644
>>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>>> @@ -715,7 +715,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
>>>   	struct xe_gt *gt = m->tile->primary_gt;
>>>   	u32 flush_flags = 0;
>>>   
>>> -	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
>>> +	if (xe_device_needs_ccs_emit(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
>>
>>
>> Do we need to call xe_migrate_ccs_copy() when 
>> xe_device_needs_ccs_emit() is true ? If not then move the check for 
>> xe_device_needs_ccs_emit() to xe_migrate_copy()
>>

I see that I've confused you here. I meant "Do we need to call 
xe_migrate_ccs_copy() when xe_device_needs_ccs_emit() is *not* true ?"

>> to ensure that xe_migrate_ccs_copy() is not even called on such 
>> platforms.
>>
>> Regards,
>> Nirmoy
>
> xe_device_needs_ccs_emit() is true for any non-BMG platform that has 
> flat-ccs enabled.
> In such cases, we do need to call xe_migrate_ccs_copy() while clearing 
> any ccs metadata in vram.
>

So let's use xe_device_needs_ccs_emit() in the upper layer to decide 
whether to call xe_migrate_ccs_copy(), instead of checking 
xe_device_needs_ccs_emit() within an if-else block of xe_migrate_ccs_copy()

Either:

if (xe_device_needs_ccs_emit())

xe_migrate_ccs_copy(...)


Or even*"
*


xe_migrate_ccs_copy() {

     if ( !xe_device_needs_ccs_emit() )

     return;

..........

}



Regards,

Nirmoy

>
> BR,
> Akshata
>
>>>   		/*
>>>   		 * If the src is already in vram, then it should already
>>>   		 * have been cleared by us, or has been populated by the
>>> @@ -791,6 +791,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>>>   	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
>>>   		xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
>>>   	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
>>> +	bool use_comp_pat = GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe) && src_is_vram && !dst_is_vram;
>>>   
>>>   	/* Copying CCS between two different BOs is not supported yet. */
>>>   	if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
>>> @@ -833,7 +834,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>>>   
>>>   		src_L0 = min(src_L0, dst_L0);
>>>   
>>> -		batch_size += pte_update_size(m, src_is_vram, false, src, &src_it, &src_L0,
>>> +		batch_size += pte_update_size(m, src_is_vram, use_comp_pat, src, &src_it, &src_L0,
>>>   					      &src_L0_ofs, &src_L0_pt, 0, 0,
>>>   					      avail_pts);
>>>   
>>> @@ -852,7 +853,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>>>   
>>>   		/* Add copy commands size here */
>>>   		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
>>> -			((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
>>> +			((xe_device_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0));
>>>   
>>>   		bb = xe_bb_new(gt, batch_size, usm);
>>>   		if (IS_ERR(bb)) {

[-- Attachment #2: Type: text/html, Size: 6563 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality
  2024-07-12  4:15     ` Jahagirdar, Akshata
@ 2024-07-16  8:51       ` Matthew Auld
  0 siblings, 0 replies; 22+ messages in thread
From: Matthew Auld @ 2024-07-16  8:51 UTC (permalink / raw)
  To: Jahagirdar, Akshata, intel-xe
  Cc: matthew.d.roper, himal.prasad.ghimiray, lucas.demarchi

On 12/07/2024 05:15, Jahagirdar, Akshata wrote:
> 
> On 7/11/2024 7:27 AM, Matthew Auld wrote:
>> On 11/07/2024 10:18, Akshata Jahagirdar wrote:
>>> This test verifies if the main and ccs data are cleared during bo 
>>> creation.
>>> The motivation to use Kunit instead of IGT is that, although we can 
>>> verify
>>> whether the data is zero following bo creation,
>>> we cannot confirm whether the zero value after bo creation is the 
>>> result of
>>> our clear function or simply because the initial data present there 
>>> was zero.
>>
>> One idea might be to pre-fill and release some amount of vram at the 
>> start of the test. Then maybe spawn a number threads each allocating a 
>> bit of vram (also various sizes) in a loop, each time dirtying the 
>> pages and ccs before releasing it. There should be some amount of page 
>> re-use to catch issues, plus is a lot more nasty with multiple threads 
>> and varying sizes. Such a test can also be useful for lnl, just that 
>> we use system memory instead of vram. Or maybe we already have 
>> something like that?
>>
> If I am not wrong, think there is an igt testcase xe_evict_ccs which 
> exercises similar testcases.
>>>
>>> Signed-off-by: Akshata Jahagirdar <akshata.jahagirdar@intel.com>
>>> ---
>>>   drivers/gpu/drm/xe/tests/xe_migrate.c      | 271 +++++++++++++++++++++
>>>   drivers/gpu/drm/xe/tests/xe_migrate_test.c |   1 +
>>>   drivers/gpu/drm/xe/tests/xe_migrate_test.h |   1 +
>>>   3 files changed, 273 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c 
>>> b/drivers/gpu/drm/xe/tests/xe_migrate.c
>>> index 962f6438e219..6538c0e6b57e 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
>>> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
>>> @@ -359,3 +359,274 @@ void xe_migrate_sanity_kunit(struct kunit *test)
>>>       xe_call_for_each_device(migrate_test_run_device);
>>>   }
>>>   EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
>>> +
>>> +static struct dma_fence *blt_copy(struct xe_tile *tile,
>>> +                struct xe_bo *src_bo, struct xe_bo *dst_bo,
>>> +                bool copy_only_ccs, const char *str, struct kunit 
>>> *test)
>>> +{
>>> +    struct xe_gt *gt = tile->primary_gt;
>>> +    struct xe_migrate *m = tile->migrate;
>>> +    struct xe_device *xe = gt_to_xe(gt);
>>> +    struct dma_fence *fence = NULL;
>>> +    u64 size = src_bo->size;
>>> +    struct xe_res_cursor src_it, dst_it;
>>> +    struct ttm_resource *src = src_bo->ttm.resource, *dst = 
>>> dst_bo->ttm.resource;
>>> +    u64 src_L0_ofs, dst_L0_ofs;
>>> +    u32 src_L0_pt, dst_L0_pt;
>>> +    u64 src_L0, dst_L0;
>>> +    int err;
>>> +    bool src_is_vram = mem_type_is_vram(src->mem_type);
>>> +    bool dst_is_vram = mem_type_is_vram(dst->mem_type);
>>> +
>>> +    if (!src_is_vram)
>>> +        xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
>>> +    else
>>> +        xe_res_first(src, 0, size, &src_it);
>>> +
>>> +    if (!dst_is_vram)
>>> +        xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
>>> +    else
>>> +        xe_res_first(dst, 0, size, &dst_it);
>>> +
>>> +    while (size) {
>>> +        u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
>>> +        struct xe_sched_job *job;
>>> +        struct xe_bb *bb;
>>> +        u32 flush_flags = 0;
>>> +        u32 update_idx;
>>> +        u32 avail_pts = max_mem_transfer_per_pass(xe) / 
>>> LEVEL0_PAGE_TABLE_ENCODE_SIZE;
>>> +
>>> +        src_L0 = xe_migrate_res_sizes(m, &src_it);
>>> +        dst_L0 = xe_migrate_res_sizes(m, &dst_it);
>>> +
>>> +        src_L0 = min(src_L0, dst_L0);
>>> +
>>> +        batch_size += pte_update_size(m, src_is_vram, src_is_vram, 
>>> src, &src_it, &src_L0,
>>> +                          &src_L0_ofs, &src_L0_pt, 0, 0,
>>> +                          avail_pts);
>>> +
>>> +        batch_size += pte_update_size(m, dst_is_vram, dst_is_vram, 
>>> dst, &dst_it, &src_L0,
>>> +                          &dst_L0_ofs, &dst_L0_pt, 0,
>>> +                          avail_pts, avail_pts);
>>> +
>>> +        /* Add copy commands size here */
>>> +        batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
>>> +            ((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? 
>>> EMIT_COPY_CCS_DW : 0);
>>> +
>>> +        bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
>>> +        if (IS_ERR(bb)) {
>>> +            err = PTR_ERR(bb);
>>> +            goto err_sync;
>>> +        }
>>> +
>>> +        if (src_is_vram)
>>> +            xe_res_next(&src_it, src_L0);
>>> +        else
>>> +            emit_pte(m, bb, src_L0_pt, src_is_vram, false,
>>> +                 &src_it, src_L0, src);
>>> +
>>> +        if (dst_is_vram)
>>> +            xe_res_next(&dst_it, src_L0);
>>> +        else
>>> +            emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
>>> +                 &dst_it, src_L0, dst);
>>> +
>>> +        bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>>> +        update_idx = bb->len;
>>> +        if (!copy_only_ccs)
>>> +            emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, 
>>> XE_PAGE_SIZE);
>>> +
>>> +        if (copy_only_ccs)
>>> +            flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
>>> +                    src_is_vram, dst_L0_ofs,
>>> +                    dst_is_vram, src_L0, dst_L0_ofs, copy_only_ccs);
>>> +
>>> +        mutex_lock(&m->job_mutex);
>>
>> This is not allowed anymore it seems. CI is also complaing about this. 
>> See: 50e52592fbe791d96ec2cb431d158cc6bc495be5
>>
>> Not sure what else has changed.
>>
> I see. I missed this. Thank you! Will fix this!
>>> +
>>> +        job = xe_bb_create_migration_job(m->q, bb,
>>> +                         xe_migrate_batch_base(m, xe->info.has_usm),
>>> +                         update_idx);
>>> +        if (IS_ERR(job)) {
>>> +            err = PTR_ERR(job);
>>> +            goto err;
>>> +        }
>>> +
>>> +        xe_sched_job_add_migrate_flush(job, flush_flags);
>>> +
>>> +        xe_sched_job_arm(job);
>>> +        dma_fence_put(fence);
>>> +        fence = dma_fence_get(&job->drm.s_fence->finished);
>>> +        xe_sched_job_push(job);
>>> +
>>> +        dma_fence_put(m->fence);
>>> +        m->fence = dma_fence_get(fence);
>>> +
>>> +        mutex_unlock(&m->job_mutex);
>>> +
>>> +        xe_bb_free(bb, fence);
>>> +        size -= src_L0;
>>> +        continue;
>>> +
>>> +err:
>>> +        mutex_unlock(&m->job_mutex);
>>> +        xe_bb_free(bb, NULL);
>>> +
>>> +err_sync:
>>> +        if (fence) {
>>> +            dma_fence_wait(fence, false);
>>> +            dma_fence_put(fence);
>>> +        }
>>> +        return ERR_PTR(err);
>>> +    }
>>> +
>>> +    return fence;
>>> +}
>>> +
>>> +static void test_clear(struct xe_device *xe, struct xe_tile *tile,
>>> +            struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct 
>>> kunit *test)
>>> +{
>>> +    struct dma_fence *fence;
>>> +    u64 expected, retval;
>>> +
>>> +    expected = 0xd0d0d0d0d0d0d0d0;
>>> +    xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
>>> +
>>> +    fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from 
>>> sysmem to vram", test);
>>> +    if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to 
>>> vram", test)) {
>>> +        retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
>>> +        if (retval == expected)
>>> +            KUNIT_FAIL(test, "Sanity check failed: VRAM must have 
>>> compressed value\n");
>>> +    }
>>> +    dma_fence_put(fence);
>>> +
>>> +    fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from 
>>> vram to sysmem", test);
>>> +    if (!sanity_fence_failed(xe, fence, "Blit copy from vram to 
>>> sysmem", test)) {
>>> +        retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
>>> +        check(retval, expected, "Decompressed value must be equal to 
>>> initial value", test);
>>> +        retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
>>> +        check(retval, expected, "Decompressed value must be equal to 
>>> initial value", test);
>>> +    }
>>> +    dma_fence_put(fence);
>>> +
>>> +    kunit_info(test, "Clear vram buffer object\n");
>>> +    expected = 0x0000000000000000;
>>> +    fence = xe_migrate_clear(tile->migrate, vram_bo, 
>>> vram_bo->ttm.resource);
>>> +
>>> +    fence = blt_copy(tile, vram_bo, sys_bo,
>>> +                false, "Blit copy from vram to sysmem", test);
>>> +    if (!sanity_fence_failed(xe, fence, "Clear main buffer data", 
>>> test)) {
>>> +        retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
>>> +        check(retval, expected, "Clear main buffer first value", test);
>>> +        retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
>>> +        check(retval, expected, "Clear main buffer last value", test);
>>> +    }
>>> +    dma_fence_put(fence);
>>> +
>>> +    fence = blt_copy(tile, vram_bo, sys_bo,
>>> +            true, "Blit surf copy from vram to sysmem", test);
>>> +    if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", 
>>> test)) {
>>> +        retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
>>> +        check(retval, expected, "Clear ccs data first value", test);
>>> +        retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
>>> +        check(retval, expected, "Clear ccs data last value", test);
>>> +    }
>>> +    dma_fence_put(fence);
>>> +}
>>> +
>>> +static void validate_ccs_test_run_tile(struct xe_device *xe, struct 
>>> xe_tile *tile, struct kunit *test)
>>> +{
>>> +    struct xe_bo *sys_bo, *vram_bo;
>>> +    unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
>>> +    long ret;
>>> +
>>> +    sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 
>>> DRM_XE_GEM_CPU_CACHING_WC,
>>> +            ttm_bo_type_device, XE_BO_FLAG_SYSTEM | 
>>> XE_BO_FLAG_NEEDS_CPU_ACCESS);
>>> +
>>> +    if (IS_ERR(sys_bo)) {
>>> +        KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
>>> +               PTR_ERR(sys_bo));
>>> +        return;
>>> +    }
>>> +
>>> +    xe_bo_lock(sys_bo, false);
>>> +    ret = xe_bo_validate(sys_bo, NULL, false);
>>> +    if (ret) {
>>> +        KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", 
>>> ret);
>>> +        goto out_unlock;
>>
>> vram_bo is not initialized here.

Also I guess needs an unlock. Below also.

>>
>>> +    }
>>> +
>>> +    ret = xe_bo_vmap(sys_bo);
>>> +    if (ret) {
>>> +        KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
>>> +        goto out_unlock;
>>> +    }
>>> +    xe_bo_unlock(sys_bo);
>>> +
>>> +    vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 
>>> DRM_XE_GEM_CPU_CACHING_WC,
>>> +                   ttm_bo_type_device, bo_flags | 
>>> XE_BO_FLAG_NEEDS_CPU_ACCESS);
>>> +    if (IS_ERR(vram_bo)) {
>>> +        KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
>>> +               PTR_ERR(vram_bo));
>>> +        return;
>>
>> Missing bo_put?
>>
> Since, bo creation failed, we don't call bo_put() again.
> This is taken care by the xe_bo_create_user() function.

The sys_bo needs bo_put() here, or maybe better jump to onion unwind.

>>
>>> +    }
>>> +
>>> +    xe_bo_lock(vram_bo, false);
>>> +    ret = xe_bo_validate(vram_bo, NULL, false);
>>> +    if (ret) {
>>> +        KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
>>> +        goto out_unlock;
>>> +    }
>>> +
>>> +    ret = xe_bo_vmap(vram_bo);
>>> +    if (ret) {
>>> +        KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
>>> +        goto out_unlock;
>>> +    }
>>> +
>>> +    test_clear(xe, tile, sys_bo, vram_bo, test);
>>> +    xe_bo_unlock(vram_bo);
>>> +
>>> +    xe_bo_lock(vram_bo, false);
>>> +    xe_bo_vunmap(vram_bo);
>>> +    xe_bo_unlock(vram_bo);
>>> +
>>> +    xe_bo_lock(sys_bo, false);
>>> +    xe_bo_vunmap(sys_bo);
>>> +    xe_bo_unlock(sys_bo);
>>> +out_unlock:
>>
>> There is no unlock here.
> Will change it to out_put here.
>>> +    xe_bo_put(vram_bo);
>>> +    xe_bo_put(sys_bo);
>>> +}
>>> +
>>> +static int validate_ccs_test_run_device(struct xe_device *xe)
>>> +{
>>> +    struct kunit *test = xe_cur_kunit();
>>> +    struct xe_tile *tile;
>>> +    int id;
>>> +
>>> +    if (!xe_device_has_flat_ccs(xe)) {
>>> +        kunit_info(test, "Skipping non-flat-ccs device.\n");
>>> +        return 0;
>>> +    }
>>> +
>>> +    if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
>>> +        kunit_info(test, "Skipping non-xe2 discrete device %s.\n",
>>> +               dev_name(xe->drm.dev));
>>> +        return 0;
>>> +    }
>>
>> So is there something else for xe2 igpu and ccs?
> Yes, so there is another kunit test xe_ccs_migrate_kunit for xe2 igpu
> that does data validation for ccs as well.
>>> +
>>> +    xe_pm_runtime_get(xe);
>>> +
>>> +    for_each_tile(tile, xe, id)
>>> +        validate_ccs_test_run_tile(xe, tile, test);
>>> +
>>> +    xe_pm_runtime_put(xe);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +void xe_validate_ccs_kunit(struct kunit *test)
>>> +{
>>> +    xe_call_for_each_device(validate_ccs_test_run_device);
>>> +}
>>> +EXPORT_SYMBOL_IF_KUNIT(xe_validate_ccs_kunit);
>>> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c 
>>> b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
>>> index eb0d8963419c..49b9ef060ad3 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
>>> +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
>>> @@ -9,6 +9,7 @@
>>>     static struct kunit_case xe_migrate_tests[] = {
>>>       KUNIT_CASE(xe_migrate_sanity_kunit),
>>> +    KUNIT_CASE(xe_validate_ccs_kunit),
>>>       {}
>>>   };
>>>   diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h 
>>> b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
>>> index 7c645c66824f..dc0ecfdfad39 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
>>> +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
>>> @@ -9,5 +9,6 @@
>>>   struct kunit;
>>>     void xe_migrate_sanity_kunit(struct kunit *test);
>>> +void xe_validate_ccs_kunit(struct kunit *test);
>>>     #endif

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2024-07-16  8:51 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-12  6:39 [PATCH 0/6] Implement compression support on BMG Akshata Jahagirdar
2024-07-12  6:39 ` [PATCH 1/6] drm/xe/migrate: Handle clear ccs logic for xe2 dgfx Akshata Jahagirdar
2024-07-12  6:39 ` [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality Akshata Jahagirdar
2024-07-12 12:56   ` Nirmoy Das
2024-07-12  6:39 ` [PATCH 3/6] drm/xe/xe2: Introduce identity map for compressed pat for vram Akshata Jahagirdar
2024-07-12  6:39 ` [PATCH 4/6] drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx Akshata Jahagirdar
2024-07-12 12:55   ` Nirmoy Das
2024-07-15 20:56     ` Jahagirdar, Akshata
2024-07-16  8:12       ` Nirmoy Das
2024-07-12  6:39 ` [PATCH 5/6] drm/xe/migrate: Add kunit to test migration functionality for BMG Akshata Jahagirdar
2024-07-12  6:39 ` [PATCH 6/6] drm/xe/xe2: Do not run xe_bo_test for xe2+ dgfx Akshata Jahagirdar
2024-07-12  6:44 ` ✓ CI.Patch_applied: success for Implement compression support on BMG Patchwork
2024-07-12  6:45 ` ✗ CI.checkpatch: warning " Patchwork
2024-07-12  6:45 ` ✗ CI.KUnit: failure " Patchwork
2024-07-12  7:24 ` [PATCH 0/6] " Akshata Jahagirdar
     [not found] <cover.1720768378.git.akshata.jahagirdar@intel.com>
2024-07-12  7:24 ` [PATCH 2/6] drm/xe/migrate: Add kunit to test clear functionality Akshata Jahagirdar
     [not found] <cover.1720689220.git.akshata.jahagirdar@intel.com>
2024-07-11  9:18 ` Akshata Jahagirdar
2024-07-11  9:19   ` Akshata Jahagirdar
2024-07-11 14:27   ` Matthew Auld
2024-07-12  4:15     ` Jahagirdar, Akshata
2024-07-16  8:51       ` Matthew Auld
     [not found] <cover.1720677099.git.akshata.jahagirdar@intel.com>
2024-07-11  5:55 ` Akshata Jahagirdar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox