* [PATCH v2 0/2] drm/i915: timeline semaphore support
@ 2019-06-27 8:03 Lionel Landwerlin
2019-06-27 8:03 ` [PATCH v2 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Lionel Landwerlin @ 2019-06-27 8:03 UTC (permalink / raw)
To: intel-gfx
Hi,
This revision gets rid of the synchronous wait. We now implement the
synchronous wait as part of the userspace driver. A thread is spawned
for each engine and waits for availability of the syncobjs before
calling into execbuffer.
Cheers,
Lionel Landwerlin (2):
drm/i915: introduce a mechanism to extend execbuf2
drm/i915: add syncobj timeline support
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 310 ++++++++++++++----
drivers/gpu/drm/i915/i915_drv.c | 2 +
include/uapi/drm/i915_drm.h | 62 +++-
3 files changed, 314 insertions(+), 60 deletions(-)
--
2.21.0.392.gf8f6787159e
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 1/2] drm/i915: introduce a mechanism to extend execbuf2
2019-06-27 8:03 [PATCH v2 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
@ 2019-06-27 8:03 ` Lionel Landwerlin
2019-06-27 8:03 ` [PATCH v2 2/2] drm/i915: add syncobj timeline support Lionel Landwerlin
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Lionel Landwerlin @ 2019-06-27 8:03 UTC (permalink / raw)
To: intel-gfx
We're planning to use this for a couple of new feature where we need
to provide additional parameters to execbuf.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 30 ++++++++++++++++++-
include/uapi/drm/i915_drm.h | 25 ++++++++++++++--
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cf8edb6822ee..1970dd8cbac3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -23,6 +23,7 @@
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
#include "i915_trace.h"
+#include "i915_user_extensions.h"
#include "intel_drv.h"
enum {
@@ -271,6 +272,10 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+
+ struct {
+ u64 flags; /** Available extensions parameters */
+ } extensions;
};
#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1969,7 +1974,7 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return false;
/* Kernel clipping was a DRI1 misfeature */
- if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+ if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | I915_EXEC_EXT))) {
if (exec->num_cliprects || exec->cliprects_ptr)
return false;
}
@@ -2347,6 +2352,22 @@ signal_fence_array(struct i915_execbuffer *eb,
}
}
+static const i915_user_extension_fn execbuf_extensions[] = {
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+ struct i915_execbuffer *eb)
+{
+ if (args->num_cliprects != 0)
+ return -EINVAL;
+
+ return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+ execbuf_extensions,
+ ARRAY_SIZE(execbuf_extensions),
+ eb);
+}
+
static int
i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_file *file,
@@ -2393,6 +2414,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (args->flags & I915_EXEC_IS_PINNED)
eb.batch_flags |= I915_DISPATCH_PINNED;
+ eb.extensions.flags = 0;
+ if (args->flags & I915_EXEC_EXT) {
+ err = parse_execbuf2_extensions(args, &eb);
+ if (err)
+ return err;
+ }
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e27a8eda9121..efa195d6994e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1013,6 +1013,10 @@ struct drm_i915_gem_exec_fence {
__u32 flags;
};
+enum drm_i915_gem_execbuffer_ext {
+ DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
@@ -1029,8 +1033,14 @@ struct drm_i915_gem_execbuffer2 {
__u32 num_cliprects;
/**
* This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
- * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
- * struct drm_i915_gem_exec_fence *fences.
+ * & I915_EXEC_EXT are not set.
+ *
+ * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+ * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+ * of the array.
+ *
+ * If I915_EXEC_EXT is set, then this is a pointer to a single struct
+ * drm_i915_gem_base_execbuffer_ext and num_cliprects is 0.
*/
__u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (0x3f)
@@ -1148,7 +1158,16 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_FENCE_SUBMIT (1 << 20)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_EXT implies that drm_i915_gem_execbuffer2.cliprects_ptr
+ * is treated as a pointer to an linked list of i915_user_extension. Each
+ * i915_user_extension node is the base of a larger structure. The list of
+ * supported structures are listed in the drm_i915_gem_execbuffer_ext
+ * enum.
+ */
+#define I915_EXEC_EXT (1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_EXT<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
--
2.21.0.392.gf8f6787159e
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 2/2] drm/i915: add syncobj timeline support
2019-06-27 8:03 [PATCH v2 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
2019-06-27 8:03 ` [PATCH v2 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
@ 2019-06-27 8:03 ` Lionel Landwerlin
2019-06-27 9:58 ` ✗ Fi.CI.BAT: failure for drm/i915: timeline semaphore support (rev2) Patchwork
2019-06-27 10:21 ` [PATCH v2 0/2] drm/i915: timeline semaphore support Chris Wilson
3 siblings, 0 replies; 8+ messages in thread
From: Lionel Landwerlin @ 2019-06-27 8:03 UTC (permalink / raw)
To: intel-gfx
Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.
v2: Reuse i915_user_extension_fn
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 280 ++++++++++++++----
drivers/gpu/drm/i915/i915_drv.c | 2 +
include/uapi/drm/i915_drm.h | 37 +++
3 files changed, 263 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1970dd8cbac3..476fade6fcab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -213,6 +213,13 @@ enum {
* the batchbuffer in trusted mode, otherwise the ioctl is rejected.
*/
+struct i915_drm_dma_fences {
+ struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+ struct dma_fence *dma_fence;
+ u64 value;
+ struct dma_fence_chain *chain_fence;
+};
+
struct i915_execbuffer {
struct drm_i915_private *i915; /** i915 backpointer */
struct drm_file *file; /** per-file lookup tables and limits */
@@ -275,6 +282,7 @@ struct i915_execbuffer {
struct {
u64 flags; /** Available extensions parameters */
+ struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
} extensions;
};
@@ -2224,67 +2232,193 @@ eb_select_engine(struct i915_execbuffer *eb,
}
static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct i915_drm_dma_fences *fences, unsigned int n)
{
- while (n--)
- drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+ while (n--) {
+ drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+ dma_fence_put(fences[n].dma_fence);
+ kfree(fences[n].chain_fence);
+ }
kvfree(fences);
}
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
- struct drm_file *file)
+static struct i915_drm_dma_fences *
+get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
{
- const unsigned long nfences = args->num_cliprects;
+ struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
+ &eb->extensions.timeline_fences;
+ struct drm_i915_gem_exec_fence __user *user_fences;
+ struct i915_drm_dma_fences *fences;
+ u64 __user *user_values;
+ unsigned long n;
+ int err;
+
+ *out_n_fences = timeline_fences->handle_count;
+
+ /* Check multiplication overflow for access_ok() and kvmalloc_array() */
+ BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+ if (*out_n_fences > min_t(unsigned long,
+ ULONG_MAX / sizeof(*user_fences),
+ SIZE_MAX / sizeof(*fences)))
+ return ERR_PTR(-EINVAL);
+
+ user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+ if (!access_ok(user_fences, *out_n_fences * sizeof(*user_fences)))
+ return ERR_PTR(-EFAULT);
+
+ user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+ if (!access_ok(user_values, *out_n_fences * sizeof(*user_values)))
+ return ERR_PTR(-EFAULT);
+
+ fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
+ __GFP_NOWARN | GFP_KERNEL);
+ if (!fences)
+ return ERR_PTR(-ENOMEM);
+
+ BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+ ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+ for (n = 0; n < *out_n_fences; n++) {
+ struct drm_i915_gem_exec_fence user_fence;
+ struct drm_syncobj *syncobj;
+ struct dma_fence *fence = NULL;
+ u64 point;
+
+ if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (__get_user(point, user_values++)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+ if (!syncobj) {
+ DRM_DEBUG("Invalid syncobj handle provided\n");
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+ fence = drm_syncobj_fence_get(syncobj);
+ if (!fence) {
+ DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_syncobj_put(syncobj);
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = dma_fence_chain_find_seqno(&fence, point);
+ if (err) {
+ DRM_DEBUG("Syncobj handle missing requested point\n");
+ goto err;
+ }
+ }
+
+ /*
+ * For timeline syncobjs we need to create a chain.
+ */
+ if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+ fences[n].chain_fence =
+ kmalloc(sizeof(*fences[n].chain_fence),
+ GFP_KERNEL);
+ if (!fences[n].chain_fence) {
+ dma_fence_put(fence);
+ drm_syncobj_put(syncobj);
+ err = -ENOMEM;
+ DRM_DEBUG("Unable to alloc chain_fence\n");
+ goto err;
+ }
+ } else {
+ fences[n].chain_fence = NULL;
+ }
+
+ fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+ fences[n].dma_fence = fence;
+ fences[n].value = point;
+ }
+
+ return fences;
+
+err:
+ __free_fence_array(fences, n);
+ return ERR_PTR(err);
+}
+
+static struct i915_drm_dma_fences *
+get_legacy_fence_array(struct i915_execbuffer *eb,
+ int *out_n_fences)
+{
+ struct drm_i915_gem_execbuffer2 *args = eb->args;
struct drm_i915_gem_exec_fence __user *user;
- struct drm_syncobj **fences;
+ struct i915_drm_dma_fences *fences;
unsigned long n;
int err;
- if (!(args->flags & I915_EXEC_FENCE_ARRAY))
- return NULL;
+ *out_n_fences = args->num_cliprects;
/* Check multiplication overflow for access_ok() and kvmalloc_array() */
BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
- if (nfences > min_t(unsigned long,
- ULONG_MAX / sizeof(*user),
- SIZE_MAX / sizeof(*fences)))
+ if (*out_n_fences > min_t(unsigned long,
+ ULONG_MAX / sizeof(*user),
+ SIZE_MAX / sizeof(*fences)))
return ERR_PTR(-EINVAL);
user = u64_to_user_ptr(args->cliprects_ptr);
- if (!access_ok(user, nfences * sizeof(*user)))
+ if (!access_ok(user, *out_n_fences * sizeof(*user)))
return ERR_PTR(-EFAULT);
- fences = kvmalloc_array(nfences, sizeof(*fences),
+ fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
__GFP_NOWARN | GFP_KERNEL);
if (!fences)
return ERR_PTR(-ENOMEM);
- for (n = 0; n < nfences; n++) {
- struct drm_i915_gem_exec_fence fence;
+ for (n = 0; n < *out_n_fences; n++) {
+ struct drm_i915_gem_exec_fence user_fence;
struct drm_syncobj *syncobj;
+ struct dma_fence *fence = NULL;
- if (__copy_from_user(&fence, user++, sizeof(fence))) {
+ if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
err = -EFAULT;
goto err;
}
- if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+ if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
err = -EINVAL;
goto err;
}
- syncobj = drm_syncobj_find(file, fence.handle);
+ syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
DRM_DEBUG("Invalid syncobj handle provided\n");
err = -ENOENT;
goto err;
}
+ if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+ fence = drm_syncobj_fence_get(syncobj);
+ if (!fence) {
+ DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_syncobj_put(syncobj);
+ err = -EINVAL;
+ goto err;
+ }
+ }
+
BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
- fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+ fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+ fences[n].dma_fence = fence;
+ fences[n].value = 0;
+ fences[n].chain_fence = NULL;
}
return fences;
@@ -2294,37 +2428,44 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
return ERR_PTR(err);
}
+static struct i915_drm_dma_fences *
+get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+ if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+ return get_legacy_fence_array(eb, out_n_fences);
+ if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+ return get_timeline_fence_array(eb, out_n_fences);
+
+ *out_n_fences = 0;
+
+ return NULL;
+}
+
static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
- struct drm_syncobj **fences)
+put_fence_array(struct i915_drm_dma_fences *fences, int nfences)
{
if (fences)
- __free_fence_array(fences, args->num_cliprects);
+ __free_fence_array(fences, nfences);
}
static int
await_fence_array(struct i915_execbuffer *eb,
- struct drm_syncobj **fences)
+ struct i915_drm_dma_fences *fences,
+ int nfences)
{
- const unsigned int nfences = eb->args->num_cliprects;
unsigned int n;
int err;
for (n = 0; n < nfences; n++) {
struct drm_syncobj *syncobj;
- struct dma_fence *fence;
unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+ syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
if (!(flags & I915_EXEC_FENCE_WAIT))
continue;
- fence = drm_syncobj_fence_get(syncobj);
- if (!fence)
- return -EINVAL;
-
- err = i915_request_await_dma_fence(eb->request, fence);
- dma_fence_put(fence);
+ err = i915_request_await_dma_fence(eb->request,
+ fences[n].dma_fence);
if (err < 0)
return err;
}
@@ -2334,9 +2475,9 @@ await_fence_array(struct i915_execbuffer *eb,
static void
signal_fence_array(struct i915_execbuffer *eb,
- struct drm_syncobj **fences)
+ struct i915_drm_dma_fences *fences,
+ int nfences)
{
- const unsigned int nfences = eb->args->num_cliprects;
struct dma_fence * const fence = &eb->request->fence;
unsigned int n;
@@ -2344,15 +2485,43 @@ signal_fence_array(struct i915_execbuffer *eb,
struct drm_syncobj *syncobj;
unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+ syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
if (!(flags & I915_EXEC_FENCE_SIGNAL))
continue;
- drm_syncobj_replace_fence(syncobj, fence);
+ if (fences[n].chain_fence) {
+ drm_syncobj_add_point(syncobj, fences[n].chain_fence,
+ fence, fences[n].value);
+ /*
+ * The chain's ownership is transfered to the
+ * timeline.
+ */
+ fences[n].chain_fence = NULL;
+ } else {
+ drm_syncobj_replace_fence(syncobj, fence);
+ }
}
}
+static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+ struct i915_execbuffer *eb = data;
+
+ /* Timeline fences are incompatible with the fence array flag. */
+ if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+ return -EINVAL;
+
+ if (copy_from_user(&eb->extensions.timeline_fences, ext,
+ sizeof(eb->extensions.timeline_fences)))
+ return -EFAULT;
+
+ eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
+
+ return 0;
+}
+
static const i915_user_extension_fn execbuf_extensions[] = {
+ [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
};
static int
@@ -2372,14 +2541,15 @@ static int
i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args,
- struct drm_i915_gem_exec_object2 *exec,
- struct drm_syncobj **fences)
+ struct drm_i915_gem_exec_object2 *exec)
{
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct dma_fence *exec_fence = NULL;
struct sync_file *out_fence = NULL;
+ struct i915_drm_dma_fences *fences = NULL;
int out_fence_fd = -1;
+ int nfences = 0;
int err;
BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
@@ -2421,10 +2591,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
return err;
}
+ fences = get_fence_array(&eb, &nfences);
+ if (IS_ERR(fences))
+ return PTR_ERR(fences);
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
- if (!in_fence)
- return -EINVAL;
+ if (!in_fence) {
+ err = -EINVAL;
+ goto err_fences;
+ }
}
if (args->flags & I915_EXEC_FENCE_SUBMIT) {
@@ -2582,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
if (fences) {
- err = await_fence_array(&eb, fences);
+ err = await_fence_array(&eb, fences, nfences);
if (err)
goto err_request;
}
@@ -2611,7 +2787,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
i915_request_add(eb.request);
if (fences)
- signal_fence_array(&eb, fences);
+ signal_fence_array(&eb, fences, nfences);
if (out_fence) {
if (err == 0) {
@@ -2646,6 +2822,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
dma_fence_put(exec_fence);
err_in_fence:
dma_fence_put(in_fence);
+err_fences:
+ put_fence_array(fences, nfences);
return err;
}
@@ -2739,7 +2917,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
exec2_list[i].flags = 0;
}
- err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+ err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
if (exec2.flags & __EXEC_HAS_RELOC) {
struct drm_i915_gem_exec_object __user *user_exec_list =
u64_to_user_ptr(args->buffers_ptr);
@@ -2770,7 +2948,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_execbuffer2 *args = data;
struct drm_i915_gem_exec_object2 *exec2_list;
- struct drm_syncobj **fences = NULL;
const size_t count = args->buffer_count;
int err;
@@ -2798,15 +2975,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
return -EFAULT;
}
- if (args->flags & I915_EXEC_FENCE_ARRAY) {
- fences = get_fence_array(args, file);
- if (IS_ERR(fences)) {
- kvfree(exec2_list);
- return PTR_ERR(fences);
- }
- }
-
- err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+ err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
/*
* Now that we have begun execution of the batchbuffer, we ignore
@@ -2846,7 +3015,6 @@ end:;
}
args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
- put_fence_array(args, fences);
kvfree(exec2_list);
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 7c3c3b8ab339..48f9009a6318 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -456,6 +456,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_BATCH_FIRST:
case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+ case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
/* For the time being all of these are always true;
* if some supported hardware does not have one of these
* features this value needs to be provided from
@@ -3220,6 +3221,7 @@ static struct drm_driver driver = {
.driver_features =
DRIVER_GEM |
DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+ DRIVER_SYNCOBJ_TIMELINE,
.release = i915_driver_release,
.open = i915_driver_open,
.lastclose = i915_driver_lastclose,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index efa195d6994e..b7fe1915e34d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -617,6 +617,12 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_PERF_REVISION 54
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence {
};
enum drm_i915_gem_execbuffer_ext {
+ /**
+ * This identifier is associated with
+ * drm_i915_gem_execbuf_ext_timeline_fences.
+ */
+ DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
+
DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
};
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+ struct i915_user_extension base;
+
+ /**
+ * Number of element in the handles_ptr & value_ptr arrays.
+ */
+ __u64 handle_count;
+
+ /**
+ * Pointer to an array of struct drm_i915_gem_exec_fence of size handle_count.
+ */
+ __u64 handles_ptr;
+
+ /**
+ * Pointer to an array of u64 values of size handle_count. Values must
+ * be 0 for a binary drm_syncobj.
+ */
+ __u64 values_ptr;
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
--
2.21.0.392.gf8f6787159e
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* ✗ Fi.CI.BAT: failure for drm/i915: timeline semaphore support (rev2)
2019-06-27 8:03 [PATCH v2 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
2019-06-27 8:03 ` [PATCH v2 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
2019-06-27 8:03 ` [PATCH v2 2/2] drm/i915: add syncobj timeline support Lionel Landwerlin
@ 2019-06-27 9:58 ` Patchwork
2019-06-27 10:21 ` [PATCH v2 0/2] drm/i915: timeline semaphore support Chris Wilson
3 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2019-06-27 9:58 UTC (permalink / raw)
To: Lionel Landwerlin; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: timeline semaphore support (rev2)
URL : https://patchwork.freedesktop.org/series/61032/
State : failure
== Summary ==
Applying: drm/i915: introduce a mechanism to extend execbuf2
Applying: drm/i915: add syncobj timeline support
error: sha1 information is lacking or useless (drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0002 drm/i915: add syncobj timeline support
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 0/2] drm/i915: timeline semaphore support
2019-06-27 8:03 [PATCH v2 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
` (2 preceding siblings ...)
2019-06-27 9:58 ` ✗ Fi.CI.BAT: failure for drm/i915: timeline semaphore support (rev2) Patchwork
@ 2019-06-27 10:21 ` Chris Wilson
2019-06-27 10:39 ` Lionel Landwerlin
3 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2019-06-27 10:21 UTC (permalink / raw)
To: Lionel Landwerlin, intel-gfx
Quoting Lionel Landwerlin (2019-06-27 09:03:37)
> Hi,
>
> This revision gets rid of the synchronous wait. We now implement the
> synchronous wait as part of the userspace driver. A thread is spawned
> for each engine and waits for availability of the syncobjs before
> calling into execbuffer.
Why would you do that? It's not like the kernel already has the ability
to serialises execution asynchronously...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 0/2] drm/i915: timeline semaphore support
2019-06-27 10:21 ` [PATCH v2 0/2] drm/i915: timeline semaphore support Chris Wilson
@ 2019-06-27 10:39 ` Lionel Landwerlin
2019-06-27 10:43 ` Chris Wilson
0 siblings, 1 reply; 8+ messages in thread
From: Lionel Landwerlin @ 2019-06-27 10:39 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 27/06/2019 13:21, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-06-27 09:03:37)
>> Hi,
>>
>> This revision gets rid of the synchronous wait. We now implement the
>> synchronous wait as part of the userspace driver. A thread is spawned
>> for each engine and waits for availability of the syncobjs before
>> calling into execbuffer.
> Why would you do that? It's not like the kernel already has the ability
> to serialises execution asynchronously...
> -Chris
>
Maybe I didn't express myself well.
There is a requirement from the Vulkan spec that we should be able to
queue a workload depending on fences that haven't materialized yet.
The last revision implemented that in the i915 by blocking in the
execbuffer until the input fences had all materialized.
We moved that into the userspace driver. That makes the i915 execbuffer
path not block (which is one thing you mentioned was a behavior change
in rev1).
It also makes Anv not block on QueueSubmit() because it hands over the
waiting to a thread dedicated to the queue.
-Lionel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 0/2] drm/i915: timeline semaphore support
2019-06-27 10:39 ` Lionel Landwerlin
@ 2019-06-27 10:43 ` Chris Wilson
2019-06-27 11:05 ` Lionel Landwerlin
0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2019-06-27 10:43 UTC (permalink / raw)
To: Lionel Landwerlin, intel-gfx
Quoting Lionel Landwerlin (2019-06-27 11:39:24)
> On 27/06/2019 13:21, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2019-06-27 09:03:37)
> >> Hi,
> >>
> >> This revision gets rid of the synchronous wait. We now implement the
> >> synchronous wait as part of the userspace driver. A thread is spawned
> >> for each engine and waits for availability of the syncobjs before
> >> calling into execbuffer.
> > Why would you do that? It's not like the kernel already has the ability
> > to serialises execution asynchronously...
> > -Chris
> >
> Maybe I didn't express myself well.
>
> There is a requirement from the Vulkan spec that we should be able to
> queue a workload depending on fences that haven't materialized yet.
>
>
> The last revision implemented that in the i915 by blocking in the
> execbuffer until the input fences had all materialized.
My point was that the syncobj seqno itself could be expressed as a proxy
fence that was replaced with the real fence later. (And that we
previously had code to do exactly that :( i915 would listen to the
proxy fence for its scheduling and so be signaled by the right fence
when it was known. That will be a few orders of magnitude lower latency,
more if we can optimise away the irq by knowing the proxy fence ahead of
time.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 0/2] drm/i915: timeline semaphore support
2019-06-27 10:43 ` Chris Wilson
@ 2019-06-27 11:05 ` Lionel Landwerlin
0 siblings, 0 replies; 8+ messages in thread
From: Lionel Landwerlin @ 2019-06-27 11:05 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 27/06/2019 13:43, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-06-27 11:39:24)
>> On 27/06/2019 13:21, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2019-06-27 09:03:37)
>>>> Hi,
>>>>
>>>> This revision gets rid of the synchronous wait. We now implement the
>>>> synchronous wait as part of the userspace driver. A thread is spawned
>>>> for each engine and waits for availability of the syncobjs before
>>>> calling into execbuffer.
>>> Why would you do that? It's not like the kernel already has the ability
>>> to serialises execution asynchronously...
>>> -Chris
>>>
>> Maybe I didn't express myself well.
>>
>> There is a requirement from the Vulkan spec that we should be able to
>> queue a workload depending on fences that haven't materialized yet.
>>
>>
>> The last revision implemented that in the i915 by blocking in the
>> execbuffer until the input fences had all materialized.
> My point was that the syncobj seqno itself could be expressed as a proxy
> fence that was replaced with the real fence later. (And that we
> previously had code to do exactly that :( i915 would listen to the
> proxy fence for its scheduling and so be signaled by the right fence
> when it was known. That will be a few orders of magnitude lower latency,
> more if we can optimise away the irq by knowing the proxy fence ahead of
> time.
> -Chris
>
Agreed, but there is a problem here with the dma_fence_chain implementation.
With timeline semaphores, you could be waiting on for point 6 and nobody
is ever going to signal that point.
Instead point 8 is going to be signaled and we need to signal point 6
when that happens.
I don't think the current dma_fence_chain implementation allows to do
that at the moment.
It would need to be more complex with likely more locking.
I have thought a little bit about what you're proposing and found some
issues with other features we need to support (like import/export).
You could end up building a arbitrary long chain of proxy fences firing
one another in cascade and blowing up the stack in a kernel task
signaling one of them.
-Lionel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2019-06-27 11:05 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-06-27 8:03 [PATCH v2 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
2019-06-27 8:03 ` [PATCH v2 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
2019-06-27 8:03 ` [PATCH v2 2/2] drm/i915: add syncobj timeline support Lionel Landwerlin
2019-06-27 9:58 ` ✗ Fi.CI.BAT: failure for drm/i915: timeline semaphore support (rev2) Patchwork
2019-06-27 10:21 ` [PATCH v2 0/2] drm/i915: timeline semaphore support Chris Wilson
2019-06-27 10:39 ` Lionel Landwerlin
2019-06-27 10:43 ` Chris Wilson
2019-06-27 11:05 ` Lionel Landwerlin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox