[Intel-gfx] [PATCH 3/6] drm/i915/ttm: Failsafe migration blits

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: maarten.lankhorst@linux.intel.com, matthew.auld@intel.com,
	"Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Subject: [Intel-gfx] [PATCH 3/6] drm/i915/ttm: Failsafe migration blits
Date: Fri,  8 Oct 2021 15:35:27 +0200	[thread overview]
Message-ID: <20211008133530.664509-4-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20211008133530.664509-1-thomas.hellstrom@linux.intel.com>

If the initial fill blit or copy blit of an object fails, the old
content of the data might be exposed and read as soon as either CPU- or
GPU PTEs are set up to point at the pages.

Intercept the blit fence with an async dma_fence_work that checks the
blit fence for errors and if there are errors performs an async cpu blit
instead. If there is a failure to allocate the async dma_fence_work,
allocate it on the stack and sync wait for the blit to complete.

Add selftests that simulate gpu blit failures and failure to allocate
the async dma_fence_work.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 268 ++++++++++++++----
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h       |   4 +
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  24 +-
 3 files changed, 240 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4b4d7457bef9..79d4d50aa4e5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -7,6 +7,7 @@
 #include <drm/ttm/ttm_placement.h>
 
 #include "i915_drv.h"
+#include "i915_sw_fence_work.h"
 #include "intel_memory_region.h"
 #include "intel_region_ttm.h"
 
@@ -25,6 +26,18 @@
 #define I915_TTM_PRIO_NO_PAGES  1
 #define I915_TTM_PRIO_HAS_PAGES 2
 
+I915_SELFTEST_DECLARE(static bool fail_gpu_migration;)
+I915_SELFTEST_DECLARE(static bool fail_work_allocation;)
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+					bool work_allocation)
+{
+	fail_gpu_migration = gpu_migration;
+	fail_work_allocation = work_allocation;
+}
+#endif
+
 /*
  * Size of struct ttm_place vector in on-stack struct ttm_placement allocs
  */
@@ -466,11 +479,11 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
 	return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
 }
 
-static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
-			       bool clear,
-			       struct ttm_resource *dst_mem,
-			       struct ttm_tt *dst_ttm,
-			       struct sg_table *dst_st)
+static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
+					     bool clear,
+					     struct ttm_resource *dst_mem,
+					     struct ttm_tt *dst_ttm,
+					     struct sg_table *dst_st)
 {
 	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
 						     bdev);
@@ -481,30 +494,29 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 	int ret;
 
 	if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
+
+	/* With fail_gpu_migration, we always perform a GPU clear. */
+	if (I915_SELFTEST_ONLY(fail_gpu_migration))
+		clear = true;
 
 	dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
 	if (clear) {
-		if (bo->type == ttm_bo_type_kernel)
-			return -EINVAL;
+		if (bo->type == ttm_bo_type_kernel &&
+		    !I915_SELFTEST_ONLY(fail_gpu_migration))
+			return ERR_PTR(-EINVAL);
 
 		intel_engine_pm_get(i915->gt.migrate.context->engine);
 		ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
 						  dst_st->sgl, dst_level,
 						  gpu_binds_iomem(dst_mem),
 						  0, &rq);
-
-		if (!ret && rq) {
-			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-			i915_request_put(rq);
-		}
-		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	} else {
 		struct i915_refct_sgt *src_rsgt =
 			i915_ttm_resource_get_st(obj, bo->resource);
 
 		if (IS_ERR(src_rsgt))
-			return PTR_ERR(src_rsgt);
+			return ERR_CAST(src_rsgt);
 
 		src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
 		intel_engine_pm_get(i915->gt.migrate.context->engine);
@@ -515,55 +527,201 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 						 dst_st->sgl, dst_level,
 						 gpu_binds_iomem(dst_mem),
 						 &rq);
+
 		i915_refct_sgt_put(src_rsgt);
-		if (!ret && rq) {
-			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-			i915_request_put(rq);
-		}
-		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	}
 
-	return ret;
+	intel_engine_pm_put(i915->gt.migrate.context->engine);
+
+	if (ret && rq) {
+		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+	}
+
+	return ret ? ERR_PTR(ret) : &rq->fence;
+}
+
+/**
+ * struct i915_ttm_memcpy_work - memcpy work item under a dma-fence
+ * @base: The struct dma_fence_work we subclass.
+ * @_dst_iter: Storage space for the destination kmap iterator.
+ * @_src_iter: Storage space for the source kmap iterator.
+ * @dst_iter: Pointer to the destination kmap iterator.
+ * @src_iter: Pointer to the source kmap iterator.
+ * @clear: Whether to clear instead of copy.
+ * @num_pages: Number of pages in the copy.
+ * @src_rsgt: Refcounted scatter-gather list of source memory.
+ * @dst_rsgt: Refcounted scatter-gather list of destination memory.
+ */
+struct i915_ttm_memcpy_work {
+	struct dma_fence_work base;
+	union {
+		struct ttm_kmap_iter_tt tt;
+		struct ttm_kmap_iter_iomap io;
+	} _dst_iter,
+	_src_iter;
+	struct ttm_kmap_iter *dst_iter;
+	struct ttm_kmap_iter *src_iter;
+	unsigned long num_pages;
+	bool clear;
+	struct i915_refct_sgt *src_rsgt;
+	struct i915_refct_sgt *dst_rsgt;
+};
+
+static void __memcpy_work(struct dma_fence_work *work)
+{
+	struct i915_ttm_memcpy_work *copy_work =
+		container_of(work, typeof(*copy_work), base);
+
+	if (I915_SELFTEST_ONLY(fail_gpu_migration))
+		cmpxchg(&work->error, 0, -EINVAL);
+
+	/* If there was an error in the gpu copy operation, run memcpy. */
+	if (work->error)
+		ttm_move_memcpy(copy_work->clear, copy_work->num_pages,
+				copy_work->dst_iter, copy_work->src_iter);
+
+	/*
+	 * Can't signal before we unref the rsgts, because then
+	 * ttms might be unpopulated before we unref these and we'll hit
+	 * a GEM_WARN_ON() in i915_ttm_tt_unpopulate. Not a real problem,
+	 * but good to keep the GEM_WARN_ON to check that we don't leak rsgts.
+	 */
+	i915_refct_sgt_put(copy_work->src_rsgt);
+	i915_refct_sgt_put(copy_work->dst_rsgt);
+}
+
+static const struct dma_fence_work_ops i915_ttm_memcpy_ops = {
+	.work = __memcpy_work,
+};
+
+static void i915_ttm_memcpy_work_init(struct i915_ttm_memcpy_work *copy_work,
+				      struct ttm_buffer_object *bo, bool clear,
+				      struct ttm_resource *dst_mem,
+				      struct ttm_tt *dst_ttm,
+				      struct i915_refct_sgt *dst_rsgt)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct intel_memory_region *dst_reg, *src_reg;
+
+	dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+	src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+	GEM_BUG_ON(!dst_reg || !src_reg);
+
+	/*
+	 * We could consider populating only parts of this structure
+	 * (like avoiding the iterators) until it's actually
+	 * determined that we need it. But initializing the iterators
+	 * shouldn't be that costly really.
+	 */
+
+	copy_work->dst_iter = !cpu_maps_iomem(dst_mem) ?
+		ttm_kmap_iter_tt_init(&copy_work->_dst_iter.tt, dst_ttm) :
+		ttm_kmap_iter_iomap_init(&copy_work->_dst_iter.io, &dst_reg->iomap,
+					 &dst_rsgt->table, dst_reg->region.start);
+
+	copy_work->src_iter = !cpu_maps_iomem(bo->resource) ?
+		ttm_kmap_iter_tt_init(&copy_work->_src_iter.tt, bo->ttm) :
+		ttm_kmap_iter_iomap_init(&copy_work->_src_iter.io, &src_reg->iomap,
+					 &obj->ttm.cached_io_rsgt->table,
+					 src_reg->region.start);
+	copy_work->clear = clear;
+	copy_work->num_pages = bo->base.size >> PAGE_SHIFT;
+
+	copy_work->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
+	copy_work->src_rsgt = clear ? NULL :
+		i915_ttm_resource_get_st(obj, bo->resource);
 }
 
-static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
-			    struct ttm_resource *dst_mem,
-			    struct ttm_tt *dst_ttm,
-			    struct i915_refct_sgt *dst_rsgt,
-			    bool allow_accel)
+/*
+ * This is only used as a last fallback if the copy_work
+ * memory allocation fails, prohibiting async moves.
+ */
+static void __i915_ttm_move_fallback(struct ttm_buffer_object *bo, bool clear,
+				     struct ttm_resource *dst_mem,
+				     struct ttm_tt *dst_ttm,
+				     struct i915_refct_sgt *dst_rsgt,
+				     bool allow_accel)
 {
 	int ret = -EINVAL;
 
-	if (allow_accel)
-		ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
-					  &dst_rsgt->table);
-	if (ret) {
-		struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-		struct intel_memory_region *dst_reg, *src_reg;
-		union {
-			struct ttm_kmap_iter_tt tt;
-			struct ttm_kmap_iter_iomap io;
-		} _dst_iter, _src_iter;
-		struct ttm_kmap_iter *dst_iter, *src_iter;
-
-		dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
-		src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
-		GEM_BUG_ON(!dst_reg || !src_reg);
-
-		dst_iter = !cpu_maps_iomem(dst_mem) ?
-			ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
-			ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
-						 &dst_rsgt->table,
-						 dst_reg->region.start);
-
-		src_iter = !cpu_maps_iomem(bo->resource) ?
-			ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
-			ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
-						 &obj->ttm.cached_io_rsgt->table,
-						 src_reg->region.start);
-
-		ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
+	if (allow_accel) {
+		struct dma_fence *fence;
+
+		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+					    &dst_rsgt->table);
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+		} else {
+			ret = dma_fence_wait(fence, false);
+			if (!ret)
+				ret = fence->error;
+			dma_fence_put(fence);
+		}
+	}
+
+	if (ret || I915_SELFTEST_ONLY(fail_gpu_migration)) {
+		struct i915_ttm_memcpy_work copy_work;
+
+		i915_ttm_memcpy_work_init(&copy_work, bo, clear, dst_mem,
+					  dst_ttm, dst_rsgt);
+
+		/* Trigger a copy by setting an error value */
+		copy_work.base.dma.error = -EINVAL;
+		__memcpy_work(&copy_work.base);
+	}
+}
+
+static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+			   struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
+			   struct i915_refct_sgt *dst_rsgt, bool allow_accel)
+{
+	struct i915_ttm_memcpy_work *copy_work;
+	struct dma_fence *fence;
+	int ret;
+
+	if (!I915_SELFTEST_ONLY(fail_work_allocation))
+		copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
+	else
+		copy_work = NULL;
+
+	if (!copy_work) {
+		/* Don't fail with -ENOMEM. Move sync instead. */
+		__i915_ttm_move_fallback(bo, clear, dst_mem, dst_ttm, dst_rsgt,
+					 allow_accel);
+		return 0;
+	}
+
+	dma_fence_work_init(&copy_work->base, &i915_ttm_memcpy_ops);
+	if (allow_accel) {
+		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+					    &dst_rsgt->table);
+		if (IS_ERR(fence)) {
+			i915_sw_fence_set_error_once(&copy_work->base.chain,
+						     PTR_ERR(fence));
+		} else {
+			ret = dma_fence_work_chain(&copy_work->base, fence);
+			dma_fence_put(fence);
+			GEM_WARN_ON(ret < 0);
+		}
+	} else {
+		i915_sw_fence_set_error_once(&copy_work->base.chain, -EINVAL);
 	}
+
+	/* Setup async memcpy */
+	i915_ttm_memcpy_work_init(copy_work, bo, clear, dst_mem, dst_ttm,
+				  dst_rsgt);
+	fence = dma_fence_get(&copy_work->base.dma);
+	dma_fence_work_commit_imm(&copy_work->base);
+
+	/*
+	 * We're synchronizing here for now. For async moves, return the
+	 * fence.
+	 */
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	return ret;
 }
 
 static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index 0b7291dd897c..c5bf8863446d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -51,6 +51,10 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 			  struct drm_i915_gem_object *src,
 			  bool allow_accel, bool intr);
 
+I915_SELFTEST_DECLARE
+(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+					 bool work_allocation);)
+
 /* Internal I915 TTM declarations and definitions below. */
 
 #define I915_PL_LMEM0 TTM_PL_PRIV
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index 28a700f08b49..a2122bdcc1cb 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -4,6 +4,7 @@
  */
 
 #include "gt/intel_migrate.h"
+#include "gem/i915_gem_ttm.h"
 
 static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
 				 bool fill)
@@ -227,13 +228,34 @@ static int igt_lmem_pages_migrate(void *arg)
 	return err;
 }
 
+static int igt_lmem_pages_failsafe_migrate(void *arg)
+{
+	int fail_gpu, fail_alloc, ret;
+
+	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+		for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
+			pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+				fail_gpu, fail_alloc);
+			i915_ttm_migrate_set_failure_modes(fail_gpu,
+							   fail_alloc);
+			ret = igt_lmem_pages_migrate(arg);
+			if (ret)
+				goto out_err;
+		}
+	}
+
+out_err:
+	i915_ttm_migrate_set_failure_modes(false, false);
+	return ret;
+}
+
 int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_smem_create_migrate),
 		SUBTEST(igt_lmem_create_migrate),
 		SUBTEST(igt_same_create_migrate),
-		SUBTEST(igt_lmem_pages_migrate),
+		SUBTEST(igt_lmem_pages_failsafe_migrate),
 	};
 
 	if (!HAS_LMEM(i915))
-- 
2.31.1

WARNING: multiple messages have this Message-ID (diff)

From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: maarten.lankhorst@linux.intel.com, matthew.auld@intel.com,
	"Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Subject: [PATCH 3/6] drm/i915/ttm: Failsafe migration blits
Date: Fri,  8 Oct 2021 15:35:27 +0200	[thread overview]
Message-ID: <20211008133530.664509-4-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20211008133530.664509-1-thomas.hellstrom@linux.intel.com>

If the initial fill blit or copy blit of an object fails, the old
content of the data might be exposed and read as soon as either CPU- or
GPU PTEs are set up to point at the pages.

Intercept the blit fence with an async dma_fence_work that checks the
blit fence for errors and if there are errors performs an async cpu blit
instead. If there is a failure to allocate the async dma_fence_work,
allocate it on the stack and sync wait for the blit to complete.

Add selftests that simulate gpu blit failures and failure to allocate
the async dma_fence_work.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 268 ++++++++++++++----
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h       |   4 +
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  24 +-
 3 files changed, 240 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4b4d7457bef9..79d4d50aa4e5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -7,6 +7,7 @@
 #include <drm/ttm/ttm_placement.h>
 
 #include "i915_drv.h"
+#include "i915_sw_fence_work.h"
 #include "intel_memory_region.h"
 #include "intel_region_ttm.h"
 
@@ -25,6 +26,18 @@
 #define I915_TTM_PRIO_NO_PAGES  1
 #define I915_TTM_PRIO_HAS_PAGES 2
 
+I915_SELFTEST_DECLARE(static bool fail_gpu_migration;)
+I915_SELFTEST_DECLARE(static bool fail_work_allocation;)
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+					bool work_allocation)
+{
+	fail_gpu_migration = gpu_migration;
+	fail_work_allocation = work_allocation;
+}
+#endif
+
 /*
  * Size of struct ttm_place vector in on-stack struct ttm_placement allocs
  */
@@ -466,11 +479,11 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
 	return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
 }
 
-static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
-			       bool clear,
-			       struct ttm_resource *dst_mem,
-			       struct ttm_tt *dst_ttm,
-			       struct sg_table *dst_st)
+static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
+					     bool clear,
+					     struct ttm_resource *dst_mem,
+					     struct ttm_tt *dst_ttm,
+					     struct sg_table *dst_st)
 {
 	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
 						     bdev);
@@ -481,30 +494,29 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 	int ret;
 
 	if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
+
+	/* With fail_gpu_migration, we always perform a GPU clear. */
+	if (I915_SELFTEST_ONLY(fail_gpu_migration))
+		clear = true;
 
 	dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
 	if (clear) {
-		if (bo->type == ttm_bo_type_kernel)
-			return -EINVAL;
+		if (bo->type == ttm_bo_type_kernel &&
+		    !I915_SELFTEST_ONLY(fail_gpu_migration))
+			return ERR_PTR(-EINVAL);
 
 		intel_engine_pm_get(i915->gt.migrate.context->engine);
 		ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
 						  dst_st->sgl, dst_level,
 						  gpu_binds_iomem(dst_mem),
 						  0, &rq);
-
-		if (!ret && rq) {
-			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-			i915_request_put(rq);
-		}
-		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	} else {
 		struct i915_refct_sgt *src_rsgt =
 			i915_ttm_resource_get_st(obj, bo->resource);
 
 		if (IS_ERR(src_rsgt))
-			return PTR_ERR(src_rsgt);
+			return ERR_CAST(src_rsgt);
 
 		src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
 		intel_engine_pm_get(i915->gt.migrate.context->engine);
@@ -515,55 +527,201 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 						 dst_st->sgl, dst_level,
 						 gpu_binds_iomem(dst_mem),
 						 &rq);
+
 		i915_refct_sgt_put(src_rsgt);
-		if (!ret && rq) {
-			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-			i915_request_put(rq);
-		}
-		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	}
 
-	return ret;
+	intel_engine_pm_put(i915->gt.migrate.context->engine);
+
+	if (ret && rq) {
+		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+	}
+
+	return ret ? ERR_PTR(ret) : &rq->fence;
+}
+
+/**
+ * struct i915_ttm_memcpy_work - memcpy work item under a dma-fence
+ * @base: The struct dma_fence_work we subclass.
+ * @_dst_iter: Storage space for the destination kmap iterator.
+ * @_src_iter: Storage space for the source kmap iterator.
+ * @dst_iter: Pointer to the destination kmap iterator.
+ * @src_iter: Pointer to the source kmap iterator.
+ * @clear: Whether to clear instead of copy.
+ * @num_pages: Number of pages in the copy.
+ * @src_rsgt: Refcounted scatter-gather list of source memory.
+ * @dst_rsgt: Refcounted scatter-gather list of destination memory.
+ */
+struct i915_ttm_memcpy_work {
+	struct dma_fence_work base;
+	union {
+		struct ttm_kmap_iter_tt tt;
+		struct ttm_kmap_iter_iomap io;
+	} _dst_iter,
+	_src_iter;
+	struct ttm_kmap_iter *dst_iter;
+	struct ttm_kmap_iter *src_iter;
+	unsigned long num_pages;
+	bool clear;
+	struct i915_refct_sgt *src_rsgt;
+	struct i915_refct_sgt *dst_rsgt;
+};
+
+static void __memcpy_work(struct dma_fence_work *work)
+{
+	struct i915_ttm_memcpy_work *copy_work =
+		container_of(work, typeof(*copy_work), base);
+
+	if (I915_SELFTEST_ONLY(fail_gpu_migration))
+		cmpxchg(&work->error, 0, -EINVAL);
+
+	/* If there was an error in the gpu copy operation, run memcpy. */
+	if (work->error)
+		ttm_move_memcpy(copy_work->clear, copy_work->num_pages,
+				copy_work->dst_iter, copy_work->src_iter);
+
+	/*
+	 * Can't signal before we unref the rsgts, because then
+	 * ttms might be unpopulated before we unref these and we'll hit
+	 * a GEM_WARN_ON() in i915_ttm_tt_unpopulate. Not a real problem,
+	 * but good to keep the GEM_WARN_ON to check that we don't leak rsgts.
+	 */
+	i915_refct_sgt_put(copy_work->src_rsgt);
+	i915_refct_sgt_put(copy_work->dst_rsgt);
+}
+
+static const struct dma_fence_work_ops i915_ttm_memcpy_ops = {
+	.work = __memcpy_work,
+};
+
+static void i915_ttm_memcpy_work_init(struct i915_ttm_memcpy_work *copy_work,
+				      struct ttm_buffer_object *bo, bool clear,
+				      struct ttm_resource *dst_mem,
+				      struct ttm_tt *dst_ttm,
+				      struct i915_refct_sgt *dst_rsgt)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct intel_memory_region *dst_reg, *src_reg;
+
+	dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+	src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+	GEM_BUG_ON(!dst_reg || !src_reg);
+
+	/*
+	 * We could consider populating only parts of this structure
+	 * (like avoiding the iterators) until it's actually
+	 * determined that we need it. But initializing the iterators
+	 * shouldn't be that costly really.
+	 */
+
+	copy_work->dst_iter = !cpu_maps_iomem(dst_mem) ?
+		ttm_kmap_iter_tt_init(&copy_work->_dst_iter.tt, dst_ttm) :
+		ttm_kmap_iter_iomap_init(&copy_work->_dst_iter.io, &dst_reg->iomap,
+					 &dst_rsgt->table, dst_reg->region.start);
+
+	copy_work->src_iter = !cpu_maps_iomem(bo->resource) ?
+		ttm_kmap_iter_tt_init(&copy_work->_src_iter.tt, bo->ttm) :
+		ttm_kmap_iter_iomap_init(&copy_work->_src_iter.io, &src_reg->iomap,
+					 &obj->ttm.cached_io_rsgt->table,
+					 src_reg->region.start);
+	copy_work->clear = clear;
+	copy_work->num_pages = bo->base.size >> PAGE_SHIFT;
+
+	copy_work->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
+	copy_work->src_rsgt = clear ? NULL :
+		i915_ttm_resource_get_st(obj, bo->resource);
 }
 
-static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
-			    struct ttm_resource *dst_mem,
-			    struct ttm_tt *dst_ttm,
-			    struct i915_refct_sgt *dst_rsgt,
-			    bool allow_accel)
+/*
+ * This is only used as a last fallback if the copy_work
+ * memory allocation fails, prohibiting async moves.
+ */
+static void __i915_ttm_move_fallback(struct ttm_buffer_object *bo, bool clear,
+				     struct ttm_resource *dst_mem,
+				     struct ttm_tt *dst_ttm,
+				     struct i915_refct_sgt *dst_rsgt,
+				     bool allow_accel)
 {
 	int ret = -EINVAL;
 
-	if (allow_accel)
-		ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
-					  &dst_rsgt->table);
-	if (ret) {
-		struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-		struct intel_memory_region *dst_reg, *src_reg;
-		union {
-			struct ttm_kmap_iter_tt tt;
-			struct ttm_kmap_iter_iomap io;
-		} _dst_iter, _src_iter;
-		struct ttm_kmap_iter *dst_iter, *src_iter;
-
-		dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
-		src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
-		GEM_BUG_ON(!dst_reg || !src_reg);
-
-		dst_iter = !cpu_maps_iomem(dst_mem) ?
-			ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
-			ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
-						 &dst_rsgt->table,
-						 dst_reg->region.start);
-
-		src_iter = !cpu_maps_iomem(bo->resource) ?
-			ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
-			ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
-						 &obj->ttm.cached_io_rsgt->table,
-						 src_reg->region.start);
-
-		ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
+	if (allow_accel) {
+		struct dma_fence *fence;
+
+		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+					    &dst_rsgt->table);
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+		} else {
+			ret = dma_fence_wait(fence, false);
+			if (!ret)
+				ret = fence->error;
+			dma_fence_put(fence);
+		}
+	}
+
+	if (ret || I915_SELFTEST_ONLY(fail_gpu_migration)) {
+		struct i915_ttm_memcpy_work copy_work;
+
+		i915_ttm_memcpy_work_init(&copy_work, bo, clear, dst_mem,
+					  dst_ttm, dst_rsgt);
+
+		/* Trigger a copy by setting an error value */
+		copy_work.base.dma.error = -EINVAL;
+		__memcpy_work(&copy_work.base);
+	}
+}
+
+static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+			   struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
+			   struct i915_refct_sgt *dst_rsgt, bool allow_accel)
+{
+	struct i915_ttm_memcpy_work *copy_work;
+	struct dma_fence *fence;
+	int ret;
+
+	if (!I915_SELFTEST_ONLY(fail_work_allocation))
+		copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
+	else
+		copy_work = NULL;
+
+	if (!copy_work) {
+		/* Don't fail with -ENOMEM. Move sync instead. */
+		__i915_ttm_move_fallback(bo, clear, dst_mem, dst_ttm, dst_rsgt,
+					 allow_accel);
+		return 0;
+	}
+
+	dma_fence_work_init(&copy_work->base, &i915_ttm_memcpy_ops);
+	if (allow_accel) {
+		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+					    &dst_rsgt->table);
+		if (IS_ERR(fence)) {
+			i915_sw_fence_set_error_once(&copy_work->base.chain,
+						     PTR_ERR(fence));
+		} else {
+			ret = dma_fence_work_chain(&copy_work->base, fence);
+			dma_fence_put(fence);
+			GEM_WARN_ON(ret < 0);
+		}
+	} else {
+		i915_sw_fence_set_error_once(&copy_work->base.chain, -EINVAL);
 	}
+
+	/* Setup async memcpy */
+	i915_ttm_memcpy_work_init(copy_work, bo, clear, dst_mem, dst_ttm,
+				  dst_rsgt);
+	fence = dma_fence_get(&copy_work->base.dma);
+	dma_fence_work_commit_imm(&copy_work->base);
+
+	/*
+	 * We're synchronizing here for now. For async moves, return the
+	 * fence.
+	 */
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	return ret;
 }
 
 static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index 0b7291dd897c..c5bf8863446d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -51,6 +51,10 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 			  struct drm_i915_gem_object *src,
 			  bool allow_accel, bool intr);
 
+I915_SELFTEST_DECLARE
+(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+					 bool work_allocation);)
+
 /* Internal I915 TTM declarations and definitions below. */
 
 #define I915_PL_LMEM0 TTM_PL_PRIV
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index 28a700f08b49..a2122bdcc1cb 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -4,6 +4,7 @@
  */
 
 #include "gt/intel_migrate.h"
+#include "gem/i915_gem_ttm.h"
 
 static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
 				 bool fill)
@@ -227,13 +228,34 @@ static int igt_lmem_pages_migrate(void *arg)
 	return err;
 }
 
+static int igt_lmem_pages_failsafe_migrate(void *arg)
+{
+	int fail_gpu, fail_alloc, ret;
+
+	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+		for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
+			pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+				fail_gpu, fail_alloc);
+			i915_ttm_migrate_set_failure_modes(fail_gpu,
+							   fail_alloc);
+			ret = igt_lmem_pages_migrate(arg);
+			if (ret)
+				goto out_err;
+		}
+	}
+
+out_err:
+	i915_ttm_migrate_set_failure_modes(false, false);
+	return ret;
+}
+
 int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_smem_create_migrate),
 		SUBTEST(igt_lmem_create_migrate),
 		SUBTEST(igt_same_create_migrate),
-		SUBTEST(igt_lmem_pages_migrate),
+		SUBTEST(igt_lmem_pages_failsafe_migrate),
 	};
 
 	if (!HAS_LMEM(i915))
-- 
2.31.1

next prev parent reply	other threads:[~2021-10-08 13:36 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-08 13:35 [Intel-gfx] [PATCH 0/6] drm/i915: Failsafe migration blits Thomas Hellström
2021-10-08 13:35 ` Thomas Hellström
2021-10-08 13:35 ` [Intel-gfx] [PATCH 1/6] drm/i915: Update dma_fence_work Thomas Hellström
2021-10-08 13:35   ` Thomas Hellström
2021-10-13 12:41   ` [Intel-gfx] " Daniel Vetter
2021-10-13 12:41     ` Daniel Vetter
2021-10-13 12:59     ` [Intel-gfx] " Thomas Hellström
2021-10-13 12:59       ` Thomas Hellström
2021-10-08 13:35 ` [Intel-gfx] [PATCH 2/6] drm/i915: Introduce refcounted sg-tables Thomas Hellström
2021-10-08 13:35   ` Thomas Hellström
2021-10-13 14:41   ` [Intel-gfx] " Daniel Vetter
2021-10-13 14:41     ` Daniel Vetter
2021-10-13 14:55     ` [Intel-gfx] " Thomas Hellström
2021-10-13 14:55       ` Thomas Hellström
2021-10-08 13:35 ` Thomas Hellström [this message]
2021-10-08 13:35   ` [PATCH 3/6] drm/i915/ttm: Failsafe migration blits Thomas Hellström
2021-10-08 13:35 ` [Intel-gfx] [PATCH 4/6] drm/i915: Add a struct dma_fence_work timeline Thomas Hellström
2021-10-08 13:35   ` Thomas Hellström
2021-10-13 12:43   ` [Intel-gfx] " Daniel Vetter
2021-10-13 14:21     ` Thomas Hellström
2021-10-13 14:33       ` Daniel Vetter
2021-10-13 14:39         ` Thomas Hellström
2021-10-08 13:35 ` [Intel-gfx] [PATCH 5/6] drm/i915/ttm: Attach the migration fence to a region timeline on eviction Thomas Hellström
2021-10-08 13:35   ` Thomas Hellström
2021-10-08 13:35 ` [Intel-gfx] [PATCH 6/6] drm/i915: Use irq work for coalescing-only dma-fence-work Thomas Hellström
2021-10-08 13:35   ` Thomas Hellström
2021-10-08 17:00 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Failsafe migration blits Patchwork
2021-10-08 17:29 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-10-09  0:04 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2021-10-14  1:50 ` [Intel-gfx] [PATCH 0/6] " Dave Airlie
2021-10-14  1:50   ` Dave Airlie
2021-10-14  7:29   ` [Intel-gfx] " Thomas Hellström
2021-10-14  7:29     ` Thomas Hellström

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4b4d7457bef dfblob:79d4d50aa4e dfblob:0b7291dd897
dfblob:c5bf8863446 dfblob:28a700f08b4 dfblob:a2122bdcc1c
dfblob:4b4d7457bef dfblob:79d4d50aa4e dfblob:0b7291dd897
dfblob:c5bf8863446 dfblob:28a700f08b4 dfblob:a2122bdcc1c )
 OR (
bs:"[PATCH 3/6] drm/i915/ttm: Failsafe migration blits" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211008133530.664509-4-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=matthew.auld@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.