[Intel-gfx] [RFC PATCH v2 2/2] drm/i915: Introduce a i915_gem_do_ww(){} utility

From: "Thomas Hellström (Intel)" <thomas_os@shipmail.org>
To: intel-gfx@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@intel.com>,
	maarten.lankhorst@intel.com, chris@chris-wilson.co.uk
Subject: [Intel-gfx] [RFC PATCH v2 2/2] drm/i915: Introduce a i915_gem_do_ww(){} utility
Date: Thu, 17 Sep 2020 20:59:45 +0200	[thread overview]
Message-ID: <20200917185945.11734-3-thomas_os@shipmail.org> (raw)
In-Reply-To: <20200917185945.11734-1-thomas_os@shipmail.org>

From: Thomas Hellström <thomas.hellstrom@intel.com>

With the huge number of sites where multiple-object locking is
needed in the driver, it becomes difficult to avoid recursive
ww_acquire_ctx initialization, and the function prototypes become
bloated passing the ww_acquire_ctx around. Furthermore it's not
always easy to get the -EDEADLK handling correct and to follow it.

Introduce a i915_gem_do_ww utility that tries to remedy all these problems
by enclosing parts of a ww transaction in the following way:

my_function() {
	struct i915_gem_ww_ctx *ww, template;
	int err;
	bool interruptible = true;

	i915_do_ww(ww, &template, err, interruptible) {
		err = ww_transaction_part(ww);
	}
	return err;
}

The utility will automatically look up an active ww_acquire_ctx if one
is initialized previously in the call chain, and if one found will
forward the -EDEADLK instead of handling it, which takes care of the
recursive initalization. Using the utility also discourages nested ww
unlocking / relocking that is both very fragile and hard to follow.

To look up and register an active ww_acquire_ctx, use a
driver-wide hash table for now. But noting that a task could only have
a single active ww_acqurie_ctx per ww_class, the active CTX is really
task state and a generic version of this utility in the ww_mutex code
could thus probably use a quick lookup from a list in the
struct task_struct.

Signed-off-by: Thomas Hellström <thomas.hellstrom@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_ww.c  | 74 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_ww.h  | 56 +++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_globals.c |  1 +
 drivers/gpu/drm/i915/i915_globals.h |  1 +
 4 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_ww.c b/drivers/gpu/drm/i915/i915_gem_ww.c
index 3490b72cf613..6247af1dba87 100644
--- a/drivers/gpu/drm/i915/i915_gem_ww.c
+++ b/drivers/gpu/drm/i915/i915_gem_ww.c
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: MIT
 /*
- * Copyright © 2020 Intel Corporation
+ * Copyright © 2019 Intel Corporation
  */
+#include <linux/rhashtable.h>
 #include <linux/dma-resv.h>
 #include <linux/stacktrace.h>
 #include "i915_gem_ww.h"
+#include "i915_globals.h"
 #include "gem/i915_gem_object.h"
 
 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
@@ -70,3 +72,73 @@ int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
 
 	return ret;
 }
+
+static struct rhashtable ww_ht;
+static const struct rhashtable_params ww_params = {
+	.key_len = sizeof(struct task_struct *),
+	.key_offset = offsetof(struct i915_gem_ww_ctx, ctx.task),
+	.head_offset = offsetof(struct i915_gem_ww_ctx, head),
+	.min_size = 128,
+};
+
+static void i915_ww_item_free(void *ptr, void *arg)
+{
+	WARN_ON_ONCE(1);
+}
+
+static void i915_global_ww_exit(void)
+{
+	rhashtable_free_and_destroy(&ww_ht, i915_ww_item_free, NULL);
+}
+
+static void i915_global_ww_shrink(void)
+{
+}
+
+static struct i915_global global = {
+	.shrink = i915_global_ww_shrink,
+	.exit = i915_global_ww_exit,
+};
+
+int __init i915_global_ww_init(void)
+{
+	int ret = rhashtable_init(&ww_ht, &ww_params);
+
+	if (ret)
+		return ret;
+
+	i915_global_register(&global);
+
+	return 0;
+}
+
+void __i915_gem_ww_mark_unused(struct i915_gem_ww_ctx *ww)
+{
+	GEM_WARN_ON(rhashtable_remove_fast(&ww_ht, &ww->head, ww_params));
+}
+
+/**
+ * __i915_gem_ww_locate_or_use - return the task's i915_gem_ww_ctx context
+ * to use.
+ *
+ * @template: The context to use if there was none initialized previously
+ * in the call chain.
+ *
+ * RETURN: The task's i915_gem_ww_ctx context.
+ */
+struct i915_gem_ww_ctx *
+__i915_gem_ww_locate_or_use(struct i915_gem_ww_ctx *template)
+{
+	struct i915_gem_ww_ctx *tmp;
+
+	/* ctx.task is the hash key, so set it first. */
+	template->ctx.task = current;
+
+	/*
+	 * Ideally we'd just hook the active context to the
+	 * struct task_struct. But for now use a hash table.
+	 */
+	tmp = rhashtable_lookup_get_insert_fast(&ww_ht, &template->head,
+						ww_params);
+	return tmp;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h
index 94fdf8c5f89b..b844596067c7 100644
--- a/drivers/gpu/drm/i915/i915_gem_ww.h
+++ b/drivers/gpu/drm/i915/i915_gem_ww.h
@@ -6,18 +6,72 @@
 #define __I915_GEM_WW_H__
 
 #include <linux/stackdepot.h>
+#include <linux/rhashtable-types.h>
 #include <drm/drm_drv.h>
 
 struct i915_gem_ww_ctx {
 	struct ww_acquire_ctx ctx;
+	struct rhash_head head;
 	struct list_head obj_list;
 	struct drm_i915_gem_object *contended;
 	depot_stack_handle_t contended_bt;
-	bool intr;
+	u32 call_depth;
+	unsigned short intr;
+	unsigned short loop;
 };
 
 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
 void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
+
+/* Internal functions used by the inlines! Don't use. */
+void __i915_gem_ww_mark_unused(struct i915_gem_ww_ctx *ww);
+struct i915_gem_ww_ctx *
+__i915_gem_ww_locate_or_use(struct i915_gem_ww_ctx *template);
+
+static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err)
+{
+	ww->loop = 0;
+	if (ww->call_depth) {
+		ww->call_depth--;
+		return err;
+	}
+
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(ww);
+		if (!err)
+			ww->loop = 1;
+	}
+
+	if (!ww->loop) {
+		i915_gem_ww_ctx_fini(ww);
+		__i915_gem_ww_mark_unused(ww);
+	}
+
+	return err;
+}
+
+static inline struct i915_gem_ww_ctx *
+__i915_gem_ww_init(struct i915_gem_ww_ctx *template, bool intr)
+{
+	struct i915_gem_ww_ctx *ww = __i915_gem_ww_locate_or_use(template);
+
+	if (!ww) {
+		ww = template;
+		ww->call_depth = 0;
+		i915_gem_ww_ctx_init(ww, intr);
+	} else {
+		ww->call_depth++;
+	}
+
+	ww->loop = 1;
+
+	return ww;
+}
+
+#define i915_gem_do_ww(_ww, _template, _err, _intr)			\
+	for ((_ww) = __i915_gem_ww_init(_template, _intr); (_ww)->loop; \
+	     _err = __i915_gem_ww_fini(_ww, _err))
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index 3aa213684293..9087cc8c2ee3 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -94,6 +94,7 @@ static __initconst int (* const initfn[])(void) = {
 	i915_global_request_init,
 	i915_global_scheduler_init,
 	i915_global_vma_init,
+	i915_global_ww_init,
 };
 
 int __init i915_globals_init(void)
diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h
index b2f5cd9b9b1a..5976b460ee39 100644
--- a/drivers/gpu/drm/i915/i915_globals.h
+++ b/drivers/gpu/drm/i915/i915_globals.h
@@ -34,5 +34,6 @@ int i915_global_objects_init(void);
 int i915_global_request_init(void);
 int i915_global_scheduler_init(void);
 int i915_global_vma_init(void);
+int i915_global_ww_init(void);
 
 #endif /* _I915_GLOBALS_H_ */
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx