* [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2
@ 2020-04-06 20:07 Venkata Sandeep Dhanalakota
2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
` (4 more replies)
0 siblings, 5 replies; 17+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw)
To: intel-gfx; +Cc: chris.p.wilson
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
We're planning to use this for a couple of new feature where we need
to provide additional parameters to execbuf.
v2: Check for invalid flags in execbuffer2 (Lionel)
v3: Rename I915_EXEC_EXT -> I915_EXEC_USE_EXTENSIONS (Chris)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 39 ++++++++++++++++++-
include/uapi/drm/i915_drm.h | 26 +++++++++++--
2 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9d11bad74e9a..16831f715daa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -26,6 +26,7 @@
#include "i915_gem_ioctls.h"
#include "i915_sw_fence_work.h"
#include "i915_trace.h"
+#include "i915_user_extensions.h"
struct eb_vma {
struct i915_vma *vma;
@@ -288,6 +289,10 @@ struct i915_execbuffer {
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
struct eb_vma_array *array;
+
+ struct {
+ u64 flags; /** Available extensions parameters */
+ } extensions;
};
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -1698,7 +1703,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return -EINVAL;
/* Kernel clipping was a DRI1 misfeature */
- if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+ if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
+ I915_EXEC_USE_EXTENSIONS))) {
if (exec->num_cliprects || exec->cliprects_ptr)
return -EINVAL;
}
@@ -2431,6 +2437,33 @@ static void eb_request_add(struct i915_execbuffer *eb)
mutex_unlock(&tl->mutex);
}
+static const i915_user_extension_fn execbuf_extensions[] = {
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+ struct i915_execbuffer *eb)
+{
+ eb->extensions.flags = 0;
+
+ if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
+ return 0;
+
+ /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
+ * have another flag also using it at the same time.
+ */
+ if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+ return -EINVAL;
+
+ if (args->num_cliprects != 0)
+ return -EINVAL;
+
+ return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+ execbuf_extensions,
+ ARRAY_SIZE(execbuf_extensions),
+ eb);
+}
+
static int
i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_file *file,
@@ -2484,6 +2517,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (args->flags & I915_EXEC_IS_PINNED)
eb.batch_flags |= I915_DISPATCH_PINNED;
+ err = parse_execbuf2_extensions(args, &eb);
+ if (err)
+ return err;
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..7ea38aa6502c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1046,6 +1046,10 @@ struct drm_i915_gem_exec_fence {
__u32 flags;
};
+enum drm_i915_gem_execbuffer_ext {
+ DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
@@ -1062,8 +1066,15 @@ struct drm_i915_gem_execbuffer2 {
__u32 num_cliprects;
/**
* This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
- * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
- * struct drm_i915_gem_exec_fence *fences.
+ * & I915_EXEC_USE_EXTENSIONS are not set.
+ *
+ * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+ * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+ * of the array.
+ *
+ * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
+ * single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is
+ * 0.
*/
__u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (0x3f)
@@ -1181,7 +1192,16 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_FENCE_SUBMIT (1 << 20)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_USE_EXTENSIONS implies that
+ * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
+ * list of i915_user_extension. Each i915_user_extension node is the base of a
+ * larger structure. The list of supported structures are listed in the
+ * drm_i915_gem_execbuffer_ext enum.
+ */
+#define I915_EXEC_USE_EXTENSIONS (1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 17+ messages in thread* [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 ` Venkata Sandeep Dhanalakota 2020-04-08 16:29 ` Lionel Landwerlin 2020-04-08 17:14 ` Lionel Landwerlin 2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota ` (3 subsequent siblings) 4 siblings, 2 replies; 17+ messages in thread From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw) To: intel-gfx; +Cc: chris.p.wilson Introduces a new parameters to execbuf so that we can specify syncobj handles as well as timeline points. v2: Reuse i915_user_extension_fn v3: Check that the chained extension is only present once (Chris) v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel) v5: Use BIT_ULL (Chris) v6: Fix issue with already signaled timeline points, dma_fence_chain_find_seqno() setting fence to NULL (Chris) v7: Report ENOENT with invalid syncobj handle (Lionel) v8: Check for out of order timeline point insertion (Chris) v9: After explanations on https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html drop the ordering check from v8 (Lionel) v10: Set first extension enum item to 1 (Jason) v11: Add wait on previous sync points in timelines (Sandeep) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 ++++++++++++++---- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 38 +++ 4 files changed, 296 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 16831f715daa..4cb4cd035daa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -230,6 +230,13 @@ enum { * the batchbuffer in trusted mode, otherwise the ioctl is rejected. */ +struct i915_eb_fences { + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + struct dma_fence *dma_fence; + u64 value; + struct dma_fence_chain *chain_fence; +}; + struct i915_execbuffer { struct drm_i915_private *i915; /** i915 backpointer */ struct drm_file *file; /** per-file lookup tables and limits */ @@ -292,6 +299,7 @@ struct i915_execbuffer { struct { u64 flags; /** Available extensions parameters */ + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; } extensions; }; @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb, } static void -__free_fence_array(struct drm_syncobj **fences, unsigned int n) +__free_fence_array(struct i915_eb_fences *fences, unsigned int n) { - while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + while (n--) { + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + dma_fence_put(fences[n].dma_fence); + kfree(fences[n].chain_fence); + } kvfree(fences); } -static struct drm_syncobj ** -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) +static struct i915_eb_fences * +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) +{ + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = + &eb->extensions.timeline_fences; + struct drm_i915_gem_exec_fence __user *user_fences; + struct i915_eb_fences *fences; + u64 __user *user_values; + u64 num_fences, num_user_fences = timeline_fences->fence_count; + unsigned long n; + int err = 0; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (num_user_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*fences))) + return ERR_PTR(-EINVAL); + + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) + return ERR_PTR(-EFAULT); + + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) + return ERR_PTR(-EFAULT); + + fences = kvmalloc_array(num_user_fences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { + struct drm_i915_gem_exec_fence user_fence; + struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; + u64 point; + + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { + err = -EFAULT; + goto err; + } + + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + + if (__get_user(point, user_values++)) { + err = -EFAULT; + goto err; + } + + syncobj = drm_syncobj_find(eb->file, user_fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -ENOENT; + goto err; + } + + fence = drm_syncobj_fence_get(syncobj); + + if (!fence && user_fence.flags && + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { + DRM_DEBUG("Syncobj handle has no fence\n"); + drm_syncobj_put(syncobj); + err = -EINVAL; + goto err; + } + + if (fence) + err = dma_fence_chain_find_seqno(&fence, point); + + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); + drm_syncobj_put(syncobj); + goto err; + } + + /* A point might have been signaled already and + * garbage collected from the timeline. In this case + * just ignore the point and carry on. + */ + if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) { + drm_syncobj_put(syncobj); + continue; + } + + /* + * For timeline syncobjs we need to preallocate chains for + * later signaling. + */ + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { + /* + * Waiting and signaling the same point (when point != + * 0) would break the timeline. + */ + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { + DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); + err = -EINVAL; + drm_syncobj_put(syncobj); + goto err; + } + + fences[num_fences].chain_fence = + kmalloc(sizeof(*fences[num_fences].chain_fence), + GFP_KERNEL); + if (!fences[num_fences].chain_fence) { + drm_syncobj_put(syncobj); + err = -ENOMEM; + DRM_DEBUG("Unable to alloc chain_fence\n"); + goto err; + } + } else { + fences[num_fences].chain_fence = NULL; + } + + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); + fences[num_fences].dma_fence = fence; + fences[num_fences].value = point; + num_fences++; + } + + *out_n_fences = num_fences; + + return fences; + +err: + __free_fence_array(fences, num_fences); + return ERR_PTR(err); +} + +static struct i915_eb_fences * +get_legacy_fence_array(struct i915_execbuffer *eb, + int *out_n_fences) { - const unsigned long nfences = args->num_cliprects; + struct drm_i915_gem_execbuffer2 *args = eb->args; struct drm_i915_gem_exec_fence __user *user; - struct drm_syncobj **fences; + struct i915_eb_fences *fences; + const u32 num_fences = args->num_cliprects; unsigned long n; int err; - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) - return NULL; + *out_n_fences = num_fences; /* Check multiplication overflow for access_ok() and kvmalloc_array() */ BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); - if (nfences > min_t(unsigned long, - ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) + if (*out_n_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user), + SIZE_MAX / sizeof(*fences))) return ERR_PTR(-EINVAL); user = u64_to_user_ptr(args->cliprects_ptr); - if (!access_ok(user, nfences * sizeof(*user))) + if (!access_ok(user, *out_n_fences * sizeof(*user))) return ERR_PTR(-EFAULT); - fences = kvmalloc_array(nfences, sizeof(*fences), + fences = kvmalloc_array(*out_n_fences, sizeof(*fences), __GFP_NOWARN | GFP_KERNEL); if (!fences) return ERR_PTR(-ENOMEM); - for (n = 0; n < nfences; n++) { - struct drm_i915_gem_exec_fence fence; + for (n = 0; n < *out_n_fences; n++) { + struct drm_i915_gem_exec_fence user_fence; struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; - if (__copy_from_user(&fence, user++, sizeof(fence))) { + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) { err = -EFAULT; goto err; } - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { err = -EINVAL; goto err; } - syncobj = drm_syncobj_find(file, fence.handle); + syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { DRM_DEBUG("Invalid syncobj handle provided\n"); err = -ENOENT; goto err; } + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { + fence = drm_syncobj_fence_get(syncobj); + if (!fence) { + DRM_DEBUG("Syncobj handle has no fence\n"); + drm_syncobj_put(syncobj); + err = -EINVAL; + goto err; + } + } + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); + fences[n].dma_fence = fence; + fences[n].value = 0; + fences[n].chain_fence = NULL; } return fences; @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, return ERR_PTR(err); } +static struct i915_eb_fences * +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences) +{ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return get_legacy_fence_array(eb, out_n_fences); + + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) + return get_timeline_fence_array(eb, out_n_fences); + + *out_n_fences = 0; + return NULL; +} + static void -put_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_syncobj **fences) +put_fence_array(struct i915_eb_fences *fences, int nfences) { if (fences) - __free_fence_array(fences, args->num_cliprects); + __free_fence_array(fences, nfences); } static int await_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) + struct i915_eb_fences *fences, + int nfences) { - const unsigned int nfences = eb->args->num_cliprects; unsigned int n; int err; for (n = 0; n < nfences; n++) { struct drm_syncobj *syncobj; - struct dma_fence *fence; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); - if (!(flags & I915_EXEC_FENCE_WAIT)) - continue; + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); - fence = drm_syncobj_fence_get(syncobj); - if (!fence) - return -EINVAL; + if (!fences[n].dma_fence) + continue; - err = i915_request_await_dma_fence(eb->request, fence); - dma_fence_put(fence); + err = i915_request_await_dma_fence(eb->request, + fences[n].dma_fence); if (err < 0) return err; } @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb, static void signal_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) + struct i915_eb_fences *fences, + int nfences) { - const unsigned int nfences = eb->args->num_cliprects; struct dma_fence * const fence = &eb->request->fence; unsigned int n; @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb, struct drm_syncobj *syncobj; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); if (!(flags & I915_EXEC_FENCE_SIGNAL)) continue; - drm_syncobj_replace_fence(syncobj, fence); + if (fences[n].chain_fence) { + drm_syncobj_add_point(syncobj, fences[n].chain_fence, + fence, fences[n].value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + fences[n].chain_fence = NULL; + } else { + drm_syncobj_replace_fence(syncobj, fence); + } } } +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) +{ + struct i915_execbuffer *eb = data; + + /* Timeline fences are incompatible with the fence array flag. */ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) + return -EINVAL; + + if (copy_from_user(&eb->extensions.timeline_fences, ext, + sizeof(eb->extensions.timeline_fences))) + return -EFAULT; + + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); + + return 0; +} + static void retire_requests(struct intel_timeline *tl, struct i915_request *end) { struct i915_request *rq, *rn; @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb) } static const i915_user_extension_fn execbuf_extensions[] = { + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, }; static int @@ -2468,16 +2667,17 @@ static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec, - struct drm_syncobj **fences) + struct drm_i915_gem_exec_object2 *exec) { struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct dma_fence *exec_fence = NULL; struct sync_file *out_fence = NULL; + struct i915_eb_fences *fences = NULL; struct i915_vma *batch; int out_fence_fd = -1; + int nfences = 0; int err; BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (err) return err; + fences = get_fence_array(&eb, &nfences); + if (IS_ERR(fences)) + return PTR_ERR(fences); + if (args->flags & I915_EXEC_FENCE_IN) { in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!in_fence) - return -EINVAL; + if (!in_fence) { + err = -EINVAL; + goto err_fences; + } } if (args->flags & I915_EXEC_FENCE_SUBMIT) { @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, } if (fences) { - err = await_fence_array(&eb, fences); + err = await_fence_array(&eb, fences, nfences); if (err) goto err_request; } @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb_request_add(&eb); if (fences) - signal_fence_array(&eb, fences); + signal_fence_array(&eb, fences, nfences); if (out_fence) { if (err == 0) { @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, dma_fence_put(exec_fence); err_in_fence: dma_fence_put(in_fence); +err_fences: + put_fence_array(fences, nfences); return err; } @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, exec2_list[i].flags = 0; } - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; - struct drm_syncobj **fences = NULL; const size_t count = args->buffer_count; int err; @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EFAULT; } - if (args->flags & I915_EXEC_FENCE_ARRAY) { - fences = get_fence_array(args, file); - if (IS_ERR(fences)) { - kvfree(exec2_list); - return PTR_ERR(fences); - } - } - - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); /* * Now that we have begun execution of the batchbuffer, we ignore @@ -2917,7 +3116,6 @@ end:; } args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; - put_fence_array(args, fences); kvfree(exec2_list); return err; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a7a3b4b98572..f7f868c3c510 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1828,7 +1828,8 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_GEM | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 54fce81d5724..b9d3aab53c03 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7ea38aa6502c..7b8680e3b49d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence { }; enum drm_i915_gem_execbuffer_ext { + /** + * See drm_i915_gem_execbuf_ext_timeline_fences. + */ + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1, + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ }; +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 fence_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of length + * fence_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of length fence_count. Values + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs -- 2.21.0.5.gaeb582a983 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota @ 2020-04-08 16:29 ` Lionel Landwerlin 2020-04-08 17:00 ` Venkata Sandeep Dhanalakota 2020-04-08 17:14 ` Lionel Landwerlin 1 sibling, 1 reply; 17+ messages in thread From: Lionel Landwerlin @ 2020-04-08 16:29 UTC (permalink / raw) To: Venkata Sandeep Dhanalakota, intel-gfx; +Cc: chris.p.wilson On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote: > Introduces a new parameters to execbuf so that we can specify syncobj > handles as well as timeline points. > > v2: Reuse i915_user_extension_fn > > v3: Check that the chained extension is only present once (Chris) > > v4: Check that dma_fence_chain_find_seqno returns a non NULL fence > (Lionel) > > v5: Use BIT_ULL (Chris) > > v6: Fix issue with already signaled timeline points, > dma_fence_chain_find_seqno() setting fence to NULL (Chris) > > v7: Report ENOENT with invalid syncobj handle (Lionel) > > v8: Check for out of order timeline point insertion (Chris) > > v9: After explanations on > https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html > drop the ordering check from v8 (Lionel) > > v10: Set first extension enum item to 1 (Jason) > > v11: Add wait on previous sync points in timelines (Sandeep) Thanks for picking this series up! Could you point to the changes in v11? I haven't look at it in a while and I can't remember what you would have changed. Thanks a lot, -Lionel > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 ++++++++++++++---- > drivers/gpu/drm/i915/i915_drv.c | 3 +- > drivers/gpu/drm/i915/i915_getparam.c | 1 + > include/uapi/drm/i915_drm.h | 38 +++ > 4 files changed, 296 insertions(+), 58 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 16831f715daa..4cb4cd035daa 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -230,6 +230,13 @@ enum { > * the batchbuffer in trusted mode, otherwise the ioctl is rejected. > */ > > +struct i915_eb_fences { > + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ > + struct dma_fence *dma_fence; > + u64 value; > + struct dma_fence_chain *chain_fence; > +}; > + > struct i915_execbuffer { > struct drm_i915_private *i915; /** i915 backpointer */ > struct drm_file *file; /** per-file lookup tables and limits */ > @@ -292,6 +299,7 @@ struct i915_execbuffer { > > struct { > u64 flags; /** Available extensions parameters */ > + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; > } extensions; > }; > > @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb, > } > > static void > -__free_fence_array(struct drm_syncobj **fences, unsigned int n) > +__free_fence_array(struct i915_eb_fences *fences, unsigned int n) > { > - while (n--) > - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); > + while (n--) { > + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); > + dma_fence_put(fences[n].dma_fence); > + kfree(fences[n].chain_fence); > + } > kvfree(fences); > } > > -static struct drm_syncobj ** > -get_fence_array(struct drm_i915_gem_execbuffer2 *args, > - struct drm_file *file) > +static struct i915_eb_fences * > +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > +{ > + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = > + &eb->extensions.timeline_fences; > + struct drm_i915_gem_exec_fence __user *user_fences; > + struct i915_eb_fences *fences; > + u64 __user *user_values; > + u64 num_fences, num_user_fences = timeline_fences->fence_count; > + unsigned long n; > + int err = 0; > + > + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > + if (num_user_fences > min_t(unsigned long, > + ULONG_MAX / sizeof(*user_fences), > + SIZE_MAX / sizeof(*fences))) > + return ERR_PTR(-EINVAL); > + > + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); > + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) > + return ERR_PTR(-EFAULT); > + > + user_values = u64_to_user_ptr(timeline_fences->values_ptr); > + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) > + return ERR_PTR(-EFAULT); > + > + fences = kvmalloc_array(num_user_fences, sizeof(*fences), > + __GFP_NOWARN | GFP_KERNEL); > + if (!fences) > + return ERR_PTR(-ENOMEM); > + > + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > + > + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { > + struct drm_i915_gem_exec_fence user_fence; > + struct drm_syncobj *syncobj; > + struct dma_fence *fence = NULL; > + u64 point; > + > + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { > + err = -EFAULT; > + goto err; > + } > + > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > + err = -EINVAL; > + goto err; > + } > + > + if (__get_user(point, user_values++)) { > + err = -EFAULT; > + goto err; > + } > + > + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > + if (!syncobj) { > + DRM_DEBUG("Invalid syncobj handle provided\n"); > + err = -ENOENT; > + goto err; > + } > + > + fence = drm_syncobj_fence_get(syncobj); > + > + if (!fence && user_fence.flags && > + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { > + DRM_DEBUG("Syncobj handle has no fence\n"); > + drm_syncobj_put(syncobj); > + err = -EINVAL; > + goto err; > + } > + > + if (fence) > + err = dma_fence_chain_find_seqno(&fence, point); > + > + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { > + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); > + drm_syncobj_put(syncobj); > + goto err; > + } > + > + /* A point might have been signaled already and > + * garbage collected from the timeline. In this case > + * just ignore the point and carry on. > + */ > + if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) { > + drm_syncobj_put(syncobj); > + continue; > + } > + > + /* > + * For timeline syncobjs we need to preallocate chains for > + * later signaling. > + */ > + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { > + /* > + * Waiting and signaling the same point (when point != > + * 0) would break the timeline. > + */ > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > + DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); > + err = -EINVAL; > + drm_syncobj_put(syncobj); > + goto err; > + } > + > + fences[num_fences].chain_fence = > + kmalloc(sizeof(*fences[num_fences].chain_fence), > + GFP_KERNEL); > + if (!fences[num_fences].chain_fence) { > + drm_syncobj_put(syncobj); > + err = -ENOMEM; > + DRM_DEBUG("Unable to alloc chain_fence\n"); > + goto err; > + } > + } else { > + fences[num_fences].chain_fence = NULL; > + } > + > + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > + fences[num_fences].dma_fence = fence; > + fences[num_fences].value = point; > + num_fences++; > + } > + > + *out_n_fences = num_fences; > + > + return fences; > + > +err: > + __free_fence_array(fences, num_fences); > + return ERR_PTR(err); > +} > + > +static struct i915_eb_fences * > +get_legacy_fence_array(struct i915_execbuffer *eb, > + int *out_n_fences) > { > - const unsigned long nfences = args->num_cliprects; > + struct drm_i915_gem_execbuffer2 *args = eb->args; > struct drm_i915_gem_exec_fence __user *user; > - struct drm_syncobj **fences; > + struct i915_eb_fences *fences; > + const u32 num_fences = args->num_cliprects; > unsigned long n; > int err; > > - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) > - return NULL; > + *out_n_fences = num_fences; > > /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > - if (nfences > min_t(unsigned long, > - ULONG_MAX / sizeof(*user), > - SIZE_MAX / sizeof(*fences))) > + if (*out_n_fences > min_t(unsigned long, > + ULONG_MAX / sizeof(*user), > + SIZE_MAX / sizeof(*fences))) > return ERR_PTR(-EINVAL); > > user = u64_to_user_ptr(args->cliprects_ptr); > - if (!access_ok(user, nfences * sizeof(*user))) > + if (!access_ok(user, *out_n_fences * sizeof(*user))) > return ERR_PTR(-EFAULT); > > - fences = kvmalloc_array(nfences, sizeof(*fences), > + fences = kvmalloc_array(*out_n_fences, sizeof(*fences), > __GFP_NOWARN | GFP_KERNEL); > if (!fences) > return ERR_PTR(-ENOMEM); > > - for (n = 0; n < nfences; n++) { > - struct drm_i915_gem_exec_fence fence; > + for (n = 0; n < *out_n_fences; n++) { > + struct drm_i915_gem_exec_fence user_fence; > struct drm_syncobj *syncobj; > + struct dma_fence *fence = NULL; > > - if (__copy_from_user(&fence, user++, sizeof(fence))) { > + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) { > err = -EFAULT; > goto err; > } > > - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > err = -EINVAL; > goto err; > } > > - syncobj = drm_syncobj_find(file, fence.handle); > + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > if (!syncobj) { > DRM_DEBUG("Invalid syncobj handle provided\n"); > err = -ENOENT; > goto err; > } > > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > + fence = drm_syncobj_fence_get(syncobj); > + if (!fence) { > + DRM_DEBUG("Syncobj handle has no fence\n"); > + drm_syncobj_put(syncobj); > + err = -EINVAL; > + goto err; > + } > + } > + > BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > > - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); > + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > + fences[n].dma_fence = fence; > + fences[n].value = 0; > + fences[n].chain_fence = NULL; > } > > return fences; > @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, > return ERR_PTR(err); > } > > +static struct i915_eb_fences * > +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > +{ > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) > + return get_legacy_fence_array(eb, out_n_fences); > + > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) > + return get_timeline_fence_array(eb, out_n_fences); > + > + *out_n_fences = 0; > + return NULL; > +} > + > static void > -put_fence_array(struct drm_i915_gem_execbuffer2 *args, > - struct drm_syncobj **fences) > +put_fence_array(struct i915_eb_fences *fences, int nfences) > { > if (fences) > - __free_fence_array(fences, args->num_cliprects); > + __free_fence_array(fences, nfences); > } > > static int > await_fence_array(struct i915_execbuffer *eb, > - struct drm_syncobj **fences) > + struct i915_eb_fences *fences, > + int nfences) > { > - const unsigned int nfences = eb->args->num_cliprects; > unsigned int n; > int err; > > for (n = 0; n < nfences; n++) { > struct drm_syncobj *syncobj; > - struct dma_fence *fence; > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > - if (!(flags & I915_EXEC_FENCE_WAIT)) > - continue; > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > > - fence = drm_syncobj_fence_get(syncobj); > - if (!fence) > - return -EINVAL; > + if (!fences[n].dma_fence) > + continue; > > - err = i915_request_await_dma_fence(eb->request, fence); > - dma_fence_put(fence); > + err = i915_request_await_dma_fence(eb->request, > + fences[n].dma_fence); > if (err < 0) > return err; > } > @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb, > > static void > signal_fence_array(struct i915_execbuffer *eb, > - struct drm_syncobj **fences) > + struct i915_eb_fences *fences, > + int nfences) > { > - const unsigned int nfences = eb->args->num_cliprects; > struct dma_fence * const fence = &eb->request->fence; > unsigned int n; > > @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb, > struct drm_syncobj *syncobj; > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > if (!(flags & I915_EXEC_FENCE_SIGNAL)) > continue; > > - drm_syncobj_replace_fence(syncobj, fence); > + if (fences[n].chain_fence) { > + drm_syncobj_add_point(syncobj, fences[n].chain_fence, > + fence, fences[n].value); > + /* > + * The chain's ownership is transferred to the > + * timeline. > + */ > + fences[n].chain_fence = NULL; > + } else { > + drm_syncobj_replace_fence(syncobj, fence); > + } > } > } > > +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) > +{ > + struct i915_execbuffer *eb = data; > + > + /* Timeline fences are incompatible with the fence array flag. */ > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) > + return -EINVAL; > + > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) > + return -EINVAL; > + > + if (copy_from_user(&eb->extensions.timeline_fences, ext, > + sizeof(eb->extensions.timeline_fences))) > + return -EFAULT; > + > + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); > + > + return 0; > +} > + > static void retire_requests(struct intel_timeline *tl, struct i915_request *end) > { > struct i915_request *rq, *rn; > @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb) > } > > static const i915_user_extension_fn execbuf_extensions[] = { > + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, > }; > > static int > @@ -2468,16 +2667,17 @@ static int > i915_gem_do_execbuffer(struct drm_device *dev, > struct drm_file *file, > struct drm_i915_gem_execbuffer2 *args, > - struct drm_i915_gem_exec_object2 *exec, > - struct drm_syncobj **fences) > + struct drm_i915_gem_exec_object2 *exec) > { > struct drm_i915_private *i915 = to_i915(dev); > struct i915_execbuffer eb; > struct dma_fence *in_fence = NULL; > struct dma_fence *exec_fence = NULL; > struct sync_file *out_fence = NULL; > + struct i915_eb_fences *fences = NULL; > struct i915_vma *batch; > int out_fence_fd = -1; > + int nfences = 0; > int err; > > BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); > @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, > if (err) > return err; > > + fences = get_fence_array(&eb, &nfences); > + if (IS_ERR(fences)) > + return PTR_ERR(fences); > + > if (args->flags & I915_EXEC_FENCE_IN) { > in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); > - if (!in_fence) > - return -EINVAL; > + if (!in_fence) { > + err = -EINVAL; > + goto err_fences; > + } > } > > if (args->flags & I915_EXEC_FENCE_SUBMIT) { > @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > } > > if (fences) { > - err = await_fence_array(&eb, fences); > + err = await_fence_array(&eb, fences, nfences); > if (err) > goto err_request; > } > @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > eb_request_add(&eb); > > if (fences) > - signal_fence_array(&eb, fences); > + signal_fence_array(&eb, fences, nfences); > > if (out_fence) { > if (err == 0) { > @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, > dma_fence_put(exec_fence); > err_in_fence: > dma_fence_put(in_fence); > +err_fences: > + put_fence_array(fences, nfences); > return err; > } > > @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, > exec2_list[i].flags = 0; > } > > - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); > + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); > if (exec2.flags & __EXEC_HAS_RELOC) { > struct drm_i915_gem_exec_object __user *user_exec_list = > u64_to_user_ptr(args->buffers_ptr); > @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > struct drm_i915_private *i915 = to_i915(dev); > struct drm_i915_gem_execbuffer2 *args = data; > struct drm_i915_gem_exec_object2 *exec2_list; > - struct drm_syncobj **fences = NULL; > const size_t count = args->buffer_count; > int err; > > @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > return -EFAULT; > } > > - if (args->flags & I915_EXEC_FENCE_ARRAY) { > - fences = get_fence_array(args, file); > - if (IS_ERR(fences)) { > - kvfree(exec2_list); > - return PTR_ERR(fences); > - } > - } > - > - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); > + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); > > /* > * Now that we have begun execution of the batchbuffer, we ignore > @@ -2917,7 +3116,6 @@ end:; > } > > args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; > - put_fence_array(args, fences); > kvfree(exec2_list); > return err; > } > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index a7a3b4b98572..f7f868c3c510 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -1828,7 +1828,8 @@ static struct drm_driver driver = { > */ > .driver_features = > DRIVER_GEM | > - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, > + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | > + DRIVER_SYNCOBJ_TIMELINE, > .release = i915_driver_release, > .open = i915_driver_open, > .lastclose = i915_driver_lastclose, > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c > index 54fce81d5724..b9d3aab53c03 100644 > --- a/drivers/gpu/drm/i915/i915_getparam.c > +++ b/drivers/gpu/drm/i915/i915_getparam.c > @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, > case I915_PARAM_HAS_EXEC_BATCH_FIRST: > case I915_PARAM_HAS_EXEC_FENCE_ARRAY: > case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: > + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: > /* For the time being all of these are always true; > * if some supported hardware does not have one of these > * features this value needs to be provided from > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 7ea38aa6502c..7b8680e3b49d 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait { > */ > #define I915_PARAM_PERF_REVISION 54 > > +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of > + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See > + * I915_EXEC_USE_EXTENSIONS. > + */ > +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 > + > /* Must be kept compact -- no holes and well documented */ > > typedef struct drm_i915_getparam { > @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence { > }; > > enum drm_i915_gem_execbuffer_ext { > + /** > + * See drm_i915_gem_execbuf_ext_timeline_fences. > + */ > + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1, > + > DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ > }; > > +/** > + * This structure describes an array of drm_syncobj and associated points for > + * timeline variants of drm_syncobj. It is invalid to append this structure to > + * the execbuf if I915_EXEC_FENCE_ARRAY is set. > + */ > +struct drm_i915_gem_execbuffer_ext_timeline_fences { > + struct i915_user_extension base; > + > + /** > + * Number of element in the handles_ptr & value_ptr arrays. > + */ > + __u64 fence_count; > + > + /** > + * Pointer to an array of struct drm_i915_gem_exec_fence of length > + * fence_count. > + */ > + __u64 handles_ptr; > + > + /** > + * Pointer to an array of u64 values of length fence_count. Values > + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline > + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. > + */ > + __u64 values_ptr; > +}; > + > struct drm_i915_gem_execbuffer2 { > /** > * List of gem_exec_object2 structs _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-04-08 16:29 ` Lionel Landwerlin @ 2020-04-08 17:00 ` Venkata Sandeep Dhanalakota 0 siblings, 0 replies; 17+ messages in thread From: Venkata Sandeep Dhanalakota @ 2020-04-08 17:00 UTC (permalink / raw) To: Lionel Landwerlin; +Cc: intel-gfx, chris.p.wilson On 20/04/08 07:29, Lionel Landwerlin wrote: > On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote: > > Introduces a new parameters to execbuf so that we can specify syncobj > > handles as well as timeline points. > > > > v2: Reuse i915_user_extension_fn > > > > v3: Check that the chained extension is only present once (Chris) > > > > v4: Check that dma_fence_chain_find_seqno returns a non NULL fence > > (Lionel) > > > > v5: Use BIT_ULL (Chris) > > > > v6: Fix issue with already signaled timeline points, > > dma_fence_chain_find_seqno() setting fence to NULL (Chris) > > > > v7: Report ENOENT with invalid syncobj handle (Lionel) > > > > v8: Check for out of order timeline point insertion (Chris) > > > > v9: After explanations on > > https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html > > drop the ordering check from v8 (Lionel) > > > > v10: Set first extension enum item to 1 (Jason) > > > > v11: Add wait on previous sync points in timelines (Sandeep) > > > Thanks for picking this series up! > > > Could you point to the changes in v11? > > I haven't look at it in a while and I can't remember what you would have > changed. > Hi, Mainly the changes are in get_timeline_fence_array(), to enforce the implicit dependencies in signal fence-array. we want have efficient waits on the last point on timelines so that we signal at a correct point in time along the timeline the order is controlled so that we always wait on the previous request/sync point in the timeline and signal after the completion of the current request. Thank you, ~sandeep > > Thanks a lot, > > > -Lionel > > > > > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com> > > --- > > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 ++++++++++++++---- > > drivers/gpu/drm/i915/i915_drv.c | 3 +- > > drivers/gpu/drm/i915/i915_getparam.c | 1 + > > include/uapi/drm/i915_drm.h | 38 +++ > > 4 files changed, 296 insertions(+), 58 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > index 16831f715daa..4cb4cd035daa 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > @@ -230,6 +230,13 @@ enum { > > * the batchbuffer in trusted mode, otherwise the ioctl is rejected. > > */ > > +struct i915_eb_fences { > > + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ > > + struct dma_fence *dma_fence; > > + u64 value; > > + struct dma_fence_chain *chain_fence; > > +}; > > + > > struct i915_execbuffer { > > struct drm_i915_private *i915; /** i915 backpointer */ > > struct drm_file *file; /** per-file lookup tables and limits */ > > @@ -292,6 +299,7 @@ struct i915_execbuffer { > > struct { > > u64 flags; /** Available extensions parameters */ > > + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; > > } extensions; > > }; > > @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb, > > } > > static void > > -__free_fence_array(struct drm_syncobj **fences, unsigned int n) > > +__free_fence_array(struct i915_eb_fences *fences, unsigned int n) > > { > > - while (n--) > > - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); > > + while (n--) { > > + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); > > + dma_fence_put(fences[n].dma_fence); > > + kfree(fences[n].chain_fence); > > + } > > kvfree(fences); > > } > > -static struct drm_syncobj ** > > -get_fence_array(struct drm_i915_gem_execbuffer2 *args, > > - struct drm_file *file) > > +static struct i915_eb_fences * > > +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > > +{ > > + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = > > + &eb->extensions.timeline_fences; > > + struct drm_i915_gem_exec_fence __user *user_fences; > > + struct i915_eb_fences *fences; > > + u64 __user *user_values; > > + u64 num_fences, num_user_fences = timeline_fences->fence_count; > > + unsigned long n; > > + int err = 0; > > + > > + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > > + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > > + if (num_user_fences > min_t(unsigned long, > > + ULONG_MAX / sizeof(*user_fences), > > + SIZE_MAX / sizeof(*fences))) > > + return ERR_PTR(-EINVAL); > > + > > + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); > > + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) > > + return ERR_PTR(-EFAULT); > > + > > + user_values = u64_to_user_ptr(timeline_fences->values_ptr); > > + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) > > + return ERR_PTR(-EFAULT); > > + > > + fences = kvmalloc_array(num_user_fences, sizeof(*fences), > > + __GFP_NOWARN | GFP_KERNEL); > > + if (!fences) > > + return ERR_PTR(-ENOMEM); > > + > > + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > > + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > > + > > + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { > > + struct drm_i915_gem_exec_fence user_fence; > > + struct drm_syncobj *syncobj; > > + struct dma_fence *fence = NULL; > > + u64 point; > > + > > + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { > > + err = -EFAULT; > > + goto err; > > + } > > + > > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > > + err = -EINVAL; > > + goto err; > > + } > > + > > + if (__get_user(point, user_values++)) { > > + err = -EFAULT; > > + goto err; > > + } > > + > > + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > > + if (!syncobj) { > > + DRM_DEBUG("Invalid syncobj handle provided\n"); > > + err = -ENOENT; > > + goto err; > > + } > > + > > + fence = drm_syncobj_fence_get(syncobj); > > + > > + if (!fence && user_fence.flags && > > + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { > > + DRM_DEBUG("Syncobj handle has no fence\n"); > > + drm_syncobj_put(syncobj); > > + err = -EINVAL; > > + goto err; > > + } > > + > > + if (fence) > > + err = dma_fence_chain_find_seqno(&fence, point); > > + > > + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { > > + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); > > + drm_syncobj_put(syncobj); > > + goto err; > > + } > > + > > + /* A point might have been signaled already and > > + * garbage collected from the timeline. In this case > > + * just ignore the point and carry on. > > + */ > > + if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) { > > + drm_syncobj_put(syncobj); > > + continue; > > + } > > + > > + /* > > + * For timeline syncobjs we need to preallocate chains for > > + * later signaling. > > + */ > > + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { > > + /* > > + * Waiting and signaling the same point (when point != > > + * 0) would break the timeline. > > + */ > > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > > + DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); > > + err = -EINVAL; > > + drm_syncobj_put(syncobj); > > + goto err; > > + } > > + > > + fences[num_fences].chain_fence = > > + kmalloc(sizeof(*fences[num_fences].chain_fence), > > + GFP_KERNEL); > > + if (!fences[num_fences].chain_fence) { > > + drm_syncobj_put(syncobj); > > + err = -ENOMEM; > > + DRM_DEBUG("Unable to alloc chain_fence\n"); > > + goto err; > > + } > > + } else { > > + fences[num_fences].chain_fence = NULL; > > + } > > + > > + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > > + fences[num_fences].dma_fence = fence; > > + fences[num_fences].value = point; > > + num_fences++; > > + } > > + > > + *out_n_fences = num_fences; > > + > > + return fences; > > + > > +err: > > + __free_fence_array(fences, num_fences); > > + return ERR_PTR(err); > > +} > > + > > +static struct i915_eb_fences * > > +get_legacy_fence_array(struct i915_execbuffer *eb, > > + int *out_n_fences) > > { > > - const unsigned long nfences = args->num_cliprects; > > + struct drm_i915_gem_execbuffer2 *args = eb->args; > > struct drm_i915_gem_exec_fence __user *user; > > - struct drm_syncobj **fences; > > + struct i915_eb_fences *fences; > > + const u32 num_fences = args->num_cliprects; > > unsigned long n; > > int err; > > - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) > > - return NULL; > > + *out_n_fences = num_fences; > > /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > > BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > > - if (nfences > min_t(unsigned long, > > - ULONG_MAX / sizeof(*user), > > - SIZE_MAX / sizeof(*fences))) > > + if (*out_n_fences > min_t(unsigned long, > > + ULONG_MAX / sizeof(*user), > > + SIZE_MAX / sizeof(*fences))) > > return ERR_PTR(-EINVAL); > > user = u64_to_user_ptr(args->cliprects_ptr); > > - if (!access_ok(user, nfences * sizeof(*user))) > > + if (!access_ok(user, *out_n_fences * sizeof(*user))) > > return ERR_PTR(-EFAULT); > > - fences = kvmalloc_array(nfences, sizeof(*fences), > > + fences = kvmalloc_array(*out_n_fences, sizeof(*fences), > > __GFP_NOWARN | GFP_KERNEL); > > if (!fences) > > return ERR_PTR(-ENOMEM); > > - for (n = 0; n < nfences; n++) { > > - struct drm_i915_gem_exec_fence fence; > > + for (n = 0; n < *out_n_fences; n++) { > > + struct drm_i915_gem_exec_fence user_fence; > > struct drm_syncobj *syncobj; > > + struct dma_fence *fence = NULL; > > - if (__copy_from_user(&fence, user++, sizeof(fence))) { > > + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) { > > err = -EFAULT; > > goto err; > > } > > - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > > err = -EINVAL; > > goto err; > > } > > - syncobj = drm_syncobj_find(file, fence.handle); > > + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > > if (!syncobj) { > > DRM_DEBUG("Invalid syncobj handle provided\n"); > > err = -ENOENT; > > goto err; > > } > > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > > + fence = drm_syncobj_fence_get(syncobj); > > + if (!fence) { > > + DRM_DEBUG("Syncobj handle has no fence\n"); > > + drm_syncobj_put(syncobj); > > + err = -EINVAL; > > + goto err; > > + } > > + } > > + > > BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > > ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > > - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); > > + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > > + fences[n].dma_fence = fence; > > + fences[n].value = 0; > > + fences[n].chain_fence = NULL; > > } > > return fences; > > @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, > > return ERR_PTR(err); > > } > > +static struct i915_eb_fences * > > +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > > +{ > > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) > > + return get_legacy_fence_array(eb, out_n_fences); > > + > > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) > > + return get_timeline_fence_array(eb, out_n_fences); > > + > > + *out_n_fences = 0; > > + return NULL; > > +} > > + > > static void > > -put_fence_array(struct drm_i915_gem_execbuffer2 *args, > > - struct drm_syncobj **fences) > > +put_fence_array(struct i915_eb_fences *fences, int nfences) > > { > > if (fences) > > - __free_fence_array(fences, args->num_cliprects); > > + __free_fence_array(fences, nfences); > > } > > static int > > await_fence_array(struct i915_execbuffer *eb, > > - struct drm_syncobj **fences) > > + struct i915_eb_fences *fences, > > + int nfences) > > { > > - const unsigned int nfences = eb->args->num_cliprects; > > unsigned int n; > > int err; > > for (n = 0; n < nfences; n++) { > > struct drm_syncobj *syncobj; > > - struct dma_fence *fence; > > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > > - if (!(flags & I915_EXEC_FENCE_WAIT)) > > - continue; > > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > > - fence = drm_syncobj_fence_get(syncobj); > > - if (!fence) > > - return -EINVAL; > > + if (!fences[n].dma_fence) > > + continue; > > - err = i915_request_await_dma_fence(eb->request, fence); > > - dma_fence_put(fence); > > + err = i915_request_await_dma_fence(eb->request, > > + fences[n].dma_fence); > > if (err < 0) > > return err; > > } > > @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb, > > static void > > signal_fence_array(struct i915_execbuffer *eb, > > - struct drm_syncobj **fences) > > + struct i915_eb_fences *fences, > > + int nfences) > > { > > - const unsigned int nfences = eb->args->num_cliprects; > > struct dma_fence * const fence = &eb->request->fence; > > unsigned int n; > > @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb, > > struct drm_syncobj *syncobj; > > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > > if (!(flags & I915_EXEC_FENCE_SIGNAL)) > > continue; > > - drm_syncobj_replace_fence(syncobj, fence); > > + if (fences[n].chain_fence) { > > + drm_syncobj_add_point(syncobj, fences[n].chain_fence, > > + fence, fences[n].value); > > + /* > > + * The chain's ownership is transferred to the > > + * timeline. > > + */ > > + fences[n].chain_fence = NULL; > > + } else { > > + drm_syncobj_replace_fence(syncobj, fence); > > + } > > } > > } > > +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) > > +{ > > + struct i915_execbuffer *eb = data; > > + > > + /* Timeline fences are incompatible with the fence array flag. */ > > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) > > + return -EINVAL; > > + > > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) > > + return -EINVAL; > > + > > + if (copy_from_user(&eb->extensions.timeline_fences, ext, > > + sizeof(eb->extensions.timeline_fences))) > > + return -EFAULT; > > + > > + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); > > + > > + return 0; > > +} > > + > > static void retire_requests(struct intel_timeline *tl, struct i915_request *end) > > { > > struct i915_request *rq, *rn; > > @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb) > > } > > static const i915_user_extension_fn execbuf_extensions[] = { > > + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, > > }; > > static int > > @@ -2468,16 +2667,17 @@ static int > > i915_gem_do_execbuffer(struct drm_device *dev, > > struct drm_file *file, > > struct drm_i915_gem_execbuffer2 *args, > > - struct drm_i915_gem_exec_object2 *exec, > > - struct drm_syncobj **fences) > > + struct drm_i915_gem_exec_object2 *exec) > > { > > struct drm_i915_private *i915 = to_i915(dev); > > struct i915_execbuffer eb; > > struct dma_fence *in_fence = NULL; > > struct dma_fence *exec_fence = NULL; > > struct sync_file *out_fence = NULL; > > + struct i915_eb_fences *fences = NULL; > > struct i915_vma *batch; > > int out_fence_fd = -1; > > + int nfences = 0; > > int err; > > BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); > > @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > if (err) > > return err; > > + fences = get_fence_array(&eb, &nfences); > > + if (IS_ERR(fences)) > > + return PTR_ERR(fences); > > + > > if (args->flags & I915_EXEC_FENCE_IN) { > > in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); > > - if (!in_fence) > > - return -EINVAL; > > + if (!in_fence) { > > + err = -EINVAL; > > + goto err_fences; > > + } > > } > > if (args->flags & I915_EXEC_FENCE_SUBMIT) { > > @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > } > > if (fences) { > > - err = await_fence_array(&eb, fences); > > + err = await_fence_array(&eb, fences, nfences); > > if (err) > > goto err_request; > > } > > @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > eb_request_add(&eb); > > if (fences) > > - signal_fence_array(&eb, fences); > > + signal_fence_array(&eb, fences, nfences); > > if (out_fence) { > > if (err == 0) { > > @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > dma_fence_put(exec_fence); > > err_in_fence: > > dma_fence_put(in_fence); > > +err_fences: > > + put_fence_array(fences, nfences); > > return err; > > } > > @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, > > exec2_list[i].flags = 0; > > } > > - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); > > + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); > > if (exec2.flags & __EXEC_HAS_RELOC) { > > struct drm_i915_gem_exec_object __user *user_exec_list = > > u64_to_user_ptr(args->buffers_ptr); > > @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > > struct drm_i915_private *i915 = to_i915(dev); > > struct drm_i915_gem_execbuffer2 *args = data; > > struct drm_i915_gem_exec_object2 *exec2_list; > > - struct drm_syncobj **fences = NULL; > > const size_t count = args->buffer_count; > > int err; > > @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > > return -EFAULT; > > } > > - if (args->flags & I915_EXEC_FENCE_ARRAY) { > > - fences = get_fence_array(args, file); > > - if (IS_ERR(fences)) { > > - kvfree(exec2_list); > > - return PTR_ERR(fences); > > - } > > - } > > - > > - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); > > + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); > > /* > > * Now that we have begun execution of the batchbuffer, we ignore > > @@ -2917,7 +3116,6 @@ end:; > > } > > args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; > > - put_fence_array(args, fences); > > kvfree(exec2_list); > > return err; > > } > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > > index a7a3b4b98572..f7f868c3c510 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.c > > +++ b/drivers/gpu/drm/i915/i915_drv.c > > @@ -1828,7 +1828,8 @@ static struct drm_driver driver = { > > */ > > .driver_features = > > DRIVER_GEM | > > - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, > > + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | > > + DRIVER_SYNCOBJ_TIMELINE, > > .release = i915_driver_release, > > .open = i915_driver_open, > > .lastclose = i915_driver_lastclose, > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c > > index 54fce81d5724..b9d3aab53c03 100644 > > --- a/drivers/gpu/drm/i915/i915_getparam.c > > +++ b/drivers/gpu/drm/i915/i915_getparam.c > > @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, > > case I915_PARAM_HAS_EXEC_BATCH_FIRST: > > case I915_PARAM_HAS_EXEC_FENCE_ARRAY: > > case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: > > + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: > > /* For the time being all of these are always true; > > * if some supported hardware does not have one of these > > * features this value needs to be provided from > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > > index 7ea38aa6502c..7b8680e3b49d 100644 > > --- a/include/uapi/drm/i915_drm.h > > +++ b/include/uapi/drm/i915_drm.h > > @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait { > > */ > > #define I915_PARAM_PERF_REVISION 54 > > +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of > > + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See > > + * I915_EXEC_USE_EXTENSIONS. > > + */ > > +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 > > + > > /* Must be kept compact -- no holes and well documented */ > > typedef struct drm_i915_getparam { > > @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence { > > }; > > enum drm_i915_gem_execbuffer_ext { > > + /** > > + * See drm_i915_gem_execbuf_ext_timeline_fences. > > + */ > > + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1, > > + > > DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ > > }; > > +/** > > + * This structure describes an array of drm_syncobj and associated points for > > + * timeline variants of drm_syncobj. It is invalid to append this structure to > > + * the execbuf if I915_EXEC_FENCE_ARRAY is set. > > + */ > > +struct drm_i915_gem_execbuffer_ext_timeline_fences { > > + struct i915_user_extension base; > > + > > + /** > > + * Number of element in the handles_ptr & value_ptr arrays. > > + */ > > + __u64 fence_count; > > + > > + /** > > + * Pointer to an array of struct drm_i915_gem_exec_fence of length > > + * fence_count. > > + */ > > + __u64 handles_ptr; > > + > > + /** > > + * Pointer to an array of u64 values of length fence_count. Values > > + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline > > + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. > > + */ > > + __u64 values_ptr; > > +}; > > + > > struct drm_i915_gem_execbuffer2 { > > /** > > * List of gem_exec_object2 structs > > _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota 2020-04-08 16:29 ` Lionel Landwerlin @ 2020-04-08 17:14 ` Lionel Landwerlin 1 sibling, 0 replies; 17+ messages in thread From: Lionel Landwerlin @ 2020-04-08 17:14 UTC (permalink / raw) To: Venkata Sandeep Dhanalakota, intel-gfx; +Cc: chris.p.wilson On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote: > Introduces a new parameters to execbuf so that we can specify syncobj > handles as well as timeline points. > > v2: Reuse i915_user_extension_fn > > v3: Check that the chained extension is only present once (Chris) > > v4: Check that dma_fence_chain_find_seqno returns a non NULL fence > (Lionel) > > v5: Use BIT_ULL (Chris) > > v6: Fix issue with already signaled timeline points, > dma_fence_chain_find_seqno() setting fence to NULL (Chris) > > v7: Report ENOENT with invalid syncobj handle (Lionel) > > v8: Check for out of order timeline point insertion (Chris) > > v9: After explanations on > https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html > drop the ordering check from v8 (Lionel) > > v10: Set first extension enum item to 1 (Jason) > > v11: Add wait on previous sync points in timelines (Sandeep) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 ++++++++++++++---- > drivers/gpu/drm/i915/i915_drv.c | 3 +- > drivers/gpu/drm/i915/i915_getparam.c | 1 + > include/uapi/drm/i915_drm.h | 38 +++ > 4 files changed, 296 insertions(+), 58 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 16831f715daa..4cb4cd035daa 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -230,6 +230,13 @@ enum { > * the batchbuffer in trusted mode, otherwise the ioctl is rejected. > */ > > +struct i915_eb_fences { > + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ > + struct dma_fence *dma_fence; > + u64 value; > + struct dma_fence_chain *chain_fence; > +}; > + > struct i915_execbuffer { > struct drm_i915_private *i915; /** i915 backpointer */ > struct drm_file *file; /** per-file lookup tables and limits */ > @@ -292,6 +299,7 @@ struct i915_execbuffer { > > struct { > u64 flags; /** Available extensions parameters */ > + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; > } extensions; > }; > > @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb, > } > > static void > -__free_fence_array(struct drm_syncobj **fences, unsigned int n) > +__free_fence_array(struct i915_eb_fences *fences, unsigned int n) > { > - while (n--) > - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); > + while (n--) { > + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); > + dma_fence_put(fences[n].dma_fence); > + kfree(fences[n].chain_fence); > + } > kvfree(fences); > } > > -static struct drm_syncobj ** > -get_fence_array(struct drm_i915_gem_execbuffer2 *args, > - struct drm_file *file) > +static struct i915_eb_fences * > +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > +{ > + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = > + &eb->extensions.timeline_fences; > + struct drm_i915_gem_exec_fence __user *user_fences; > + struct i915_eb_fences *fences; > + u64 __user *user_values; > + u64 num_fences, num_user_fences = timeline_fences->fence_count; > + unsigned long n; > + int err = 0; > + > + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > + if (num_user_fences > min_t(unsigned long, > + ULONG_MAX / sizeof(*user_fences), > + SIZE_MAX / sizeof(*fences))) > + return ERR_PTR(-EINVAL); > + > + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); > + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) > + return ERR_PTR(-EFAULT); > + > + user_values = u64_to_user_ptr(timeline_fences->values_ptr); > + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) > + return ERR_PTR(-EFAULT); > + > + fences = kvmalloc_array(num_user_fences, sizeof(*fences), > + __GFP_NOWARN | GFP_KERNEL); > + if (!fences) > + return ERR_PTR(-ENOMEM); > + > + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > + > + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { > + struct drm_i915_gem_exec_fence user_fence; > + struct drm_syncobj *syncobj; > + struct dma_fence *fence = NULL; > + u64 point; > + > + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { > + err = -EFAULT; > + goto err; > + } > + > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > + err = -EINVAL; > + goto err; > + } > + > + if (__get_user(point, user_values++)) { > + err = -EFAULT; > + goto err; > + } > + > + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > + if (!syncobj) { > + DRM_DEBUG("Invalid syncobj handle provided\n"); > + err = -ENOENT; > + goto err; > + } > + > + fence = drm_syncobj_fence_get(syncobj); > + > + if (!fence && user_fence.flags && > + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { > + DRM_DEBUG("Syncobj handle has no fence\n"); > + drm_syncobj_put(syncobj); > + err = -EINVAL; > + goto err; > + } > + > + if (fence) > + err = dma_fence_chain_find_seqno(&fence, point); > + > + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { > + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); > + drm_syncobj_put(syncobj); > + goto err; > + } > + > + /* A point might have been signaled already and > + * garbage collected from the timeline. In this case > + * just ignore the point and carry on. > + */ > + if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) { I think we can only skip if we're only waiting. If there is a signal request we still need to honor it. So I would replace this function above with : if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { > + drm_syncobj_put(syncobj); > + continue; > + } > + > + /* > + * For timeline syncobjs we need to preallocate chains for > + * later signaling. > + */ > + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { > + /* > + * Waiting and signaling the same point (when point != > + * 0) would break the timeline. > + */ > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > + DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); > + err = -EINVAL; > + drm_syncobj_put(syncobj); > + goto err; > + } I think we can actually allow this. Wait and signal operations are added in order so we could wait a point 3 and then replace it by another point 3. If we keep this limitation, we need to add : if (fence) dma_fence_put(fence); > + > + fences[num_fences].chain_fence = > + kmalloc(sizeof(*fences[num_fences].chain_fence), > + GFP_KERNEL); > + if (!fences[num_fences].chain_fence) { > + drm_syncobj_put(syncobj); We the change above, we could arrive here with fence != NULL. We probably need to add : if (fence) dma_fence_put(fence); > + err = -ENOMEM; > + DRM_DEBUG("Unable to alloc chain_fence\n"); > + goto err; > + } > + } else { > + fences[num_fences].chain_fence = NULL; > + } > + > + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > + fences[num_fences].dma_fence = fence; > + fences[num_fences].value = point; > + num_fences++; > + } > + > + *out_n_fences = num_fences; > + > + return fences; > + > +err: > + __free_fence_array(fences, num_fences); > + return ERR_PTR(err); > +} > + > +static struct i915_eb_fences * > +get_legacy_fence_array(struct i915_execbuffer *eb, > + int *out_n_fences) > { > - const unsigned long nfences = args->num_cliprects; > + struct drm_i915_gem_execbuffer2 *args = eb->args; > struct drm_i915_gem_exec_fence __user *user; > - struct drm_syncobj **fences; > + struct i915_eb_fences *fences; > + const u32 num_fences = args->num_cliprects; > unsigned long n; > int err; > > - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) > - return NULL; > + *out_n_fences = num_fences; > > /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > - if (nfences > min_t(unsigned long, > - ULONG_MAX / sizeof(*user), > - SIZE_MAX / sizeof(*fences))) > + if (*out_n_fences > min_t(unsigned long, > + ULONG_MAX / sizeof(*user), > + SIZE_MAX / sizeof(*fences))) > return ERR_PTR(-EINVAL); > > user = u64_to_user_ptr(args->cliprects_ptr); > - if (!access_ok(user, nfences * sizeof(*user))) > + if (!access_ok(user, *out_n_fences * sizeof(*user))) > return ERR_PTR(-EFAULT); > > - fences = kvmalloc_array(nfences, sizeof(*fences), > + fences = kvmalloc_array(*out_n_fences, sizeof(*fences), > __GFP_NOWARN | GFP_KERNEL); > if (!fences) > return ERR_PTR(-ENOMEM); > > - for (n = 0; n < nfences; n++) { > - struct drm_i915_gem_exec_fence fence; > + for (n = 0; n < *out_n_fences; n++) { > + struct drm_i915_gem_exec_fence user_fence; > struct drm_syncobj *syncobj; > + struct dma_fence *fence = NULL; > > - if (__copy_from_user(&fence, user++, sizeof(fence))) { > + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) { > err = -EFAULT; > goto err; > } > > - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > err = -EINVAL; > goto err; > } > > - syncobj = drm_syncobj_find(file, fence.handle); > + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > if (!syncobj) { > DRM_DEBUG("Invalid syncobj handle provided\n"); > err = -ENOENT; > goto err; > } > > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > + fence = drm_syncobj_fence_get(syncobj); > + if (!fence) { > + DRM_DEBUG("Syncobj handle has no fence\n"); > + drm_syncobj_put(syncobj); > + err = -EINVAL; > + goto err; > + } > + } > + > BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > > - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); > + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > + fences[n].dma_fence = fence; > + fences[n].value = 0; > + fences[n].chain_fence = NULL; > } > > return fences; > @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, > return ERR_PTR(err); > } > > +static struct i915_eb_fences * > +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > +{ > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) > + return get_legacy_fence_array(eb, out_n_fences); > + > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) > + return get_timeline_fence_array(eb, out_n_fences); > + > + *out_n_fences = 0; > + return NULL; > +} > + > static void > -put_fence_array(struct drm_i915_gem_execbuffer2 *args, > - struct drm_syncobj **fences) > +put_fence_array(struct i915_eb_fences *fences, int nfences) > { > if (fences) > - __free_fence_array(fences, args->num_cliprects); > + __free_fence_array(fences, nfences); > } > > static int > await_fence_array(struct i915_execbuffer *eb, > - struct drm_syncobj **fences) > + struct i915_eb_fences *fences, > + int nfences) > { > - const unsigned int nfences = eb->args->num_cliprects; > unsigned int n; > int err; > > for (n = 0; n < nfences; n++) { > struct drm_syncobj *syncobj; > - struct dma_fence *fence; > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > - if (!(flags & I915_EXEC_FENCE_WAIT)) > - continue; > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > > - fence = drm_syncobj_fence_get(syncobj); > - if (!fence) > - return -EINVAL; > + if (!fences[n].dma_fence) > + continue; > > - err = i915_request_await_dma_fence(eb->request, fence); > - dma_fence_put(fence); > + err = i915_request_await_dma_fence(eb->request, > + fences[n].dma_fence); > if (err < 0) > return err; > } > @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb, > > static void > signal_fence_array(struct i915_execbuffer *eb, > - struct drm_syncobj **fences) > + struct i915_eb_fences *fences, > + int nfences) > { > - const unsigned int nfences = eb->args->num_cliprects; > struct dma_fence * const fence = &eb->request->fence; > unsigned int n; > > @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb, > struct drm_syncobj *syncobj; > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > if (!(flags & I915_EXEC_FENCE_SIGNAL)) > continue; > > - drm_syncobj_replace_fence(syncobj, fence); > + if (fences[n].chain_fence) { > + drm_syncobj_add_point(syncobj, fences[n].chain_fence, > + fence, fences[n].value); > + /* > + * The chain's ownership is transferred to the > + * timeline. > + */ > + fences[n].chain_fence = NULL; > + } else { > + drm_syncobj_replace_fence(syncobj, fence); > + } > } > } > > +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) > +{ > + struct i915_execbuffer *eb = data; > + > + /* Timeline fences are incompatible with the fence array flag. */ > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) > + return -EINVAL; > + > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) > + return -EINVAL; > + > + if (copy_from_user(&eb->extensions.timeline_fences, ext, > + sizeof(eb->extensions.timeline_fences))) > + return -EFAULT; > + > + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); > + > + return 0; > +} > + > static void retire_requests(struct intel_timeline *tl, struct i915_request *end) > { > struct i915_request *rq, *rn; > @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb) > } > > static const i915_user_extension_fn execbuf_extensions[] = { > + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, > }; > > static int > @@ -2468,16 +2667,17 @@ static int > i915_gem_do_execbuffer(struct drm_device *dev, > struct drm_file *file, > struct drm_i915_gem_execbuffer2 *args, > - struct drm_i915_gem_exec_object2 *exec, > - struct drm_syncobj **fences) > + struct drm_i915_gem_exec_object2 *exec) > { > struct drm_i915_private *i915 = to_i915(dev); > struct i915_execbuffer eb; > struct dma_fence *in_fence = NULL; > struct dma_fence *exec_fence = NULL; > struct sync_file *out_fence = NULL; > + struct i915_eb_fences *fences = NULL; > struct i915_vma *batch; > int out_fence_fd = -1; > + int nfences = 0; > int err; > > BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); > @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, > if (err) > return err; > > + fences = get_fence_array(&eb, &nfences); > + if (IS_ERR(fences)) > + return PTR_ERR(fences); > + > if (args->flags & I915_EXEC_FENCE_IN) { > in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); > - if (!in_fence) > - return -EINVAL; > + if (!in_fence) { > + err = -EINVAL; > + goto err_fences; > + } > } > > if (args->flags & I915_EXEC_FENCE_SUBMIT) { > @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > } > > if (fences) { > - err = await_fence_array(&eb, fences); > + err = await_fence_array(&eb, fences, nfences); > if (err) > goto err_request; > } > @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > eb_request_add(&eb); > > if (fences) > - signal_fence_array(&eb, fences); > + signal_fence_array(&eb, fences, nfences); > > if (out_fence) { > if (err == 0) { > @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, > dma_fence_put(exec_fence); > err_in_fence: > dma_fence_put(in_fence); > +err_fences: > + put_fence_array(fences, nfences); > return err; > } > > @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, > exec2_list[i].flags = 0; > } > > - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); > + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); > if (exec2.flags & __EXEC_HAS_RELOC) { > struct drm_i915_gem_exec_object __user *user_exec_list = > u64_to_user_ptr(args->buffers_ptr); > @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > struct drm_i915_private *i915 = to_i915(dev); > struct drm_i915_gem_execbuffer2 *args = data; > struct drm_i915_gem_exec_object2 *exec2_list; > - struct drm_syncobj **fences = NULL; > const size_t count = args->buffer_count; > int err; > > @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > return -EFAULT; > } > > - if (args->flags & I915_EXEC_FENCE_ARRAY) { > - fences = get_fence_array(args, file); > - if (IS_ERR(fences)) { > - kvfree(exec2_list); > - return PTR_ERR(fences); > - } > - } > - > - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); > + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); > > /* > * Now that we have begun execution of the batchbuffer, we ignore > @@ -2917,7 +3116,6 @@ end:; > } > > args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; > - put_fence_array(args, fences); > kvfree(exec2_list); > return err; > } > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index a7a3b4b98572..f7f868c3c510 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -1828,7 +1828,8 @@ static struct drm_driver driver = { > */ > .driver_features = > DRIVER_GEM | > - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, > + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | > + DRIVER_SYNCOBJ_TIMELINE, > .release = i915_driver_release, > .open = i915_driver_open, > .lastclose = i915_driver_lastclose, > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c > index 54fce81d5724..b9d3aab53c03 100644 > --- a/drivers/gpu/drm/i915/i915_getparam.c > +++ b/drivers/gpu/drm/i915/i915_getparam.c > @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, > case I915_PARAM_HAS_EXEC_BATCH_FIRST: > case I915_PARAM_HAS_EXEC_FENCE_ARRAY: > case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: > + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: > /* For the time being all of these are always true; > * if some supported hardware does not have one of these > * features this value needs to be provided from > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 7ea38aa6502c..7b8680e3b49d 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait { > */ > #define I915_PARAM_PERF_REVISION 54 > > +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of > + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See > + * I915_EXEC_USE_EXTENSIONS. > + */ > +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 > + > /* Must be kept compact -- no holes and well documented */ > > typedef struct drm_i915_getparam { > @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence { > }; > > enum drm_i915_gem_execbuffer_ext { > + /** > + * See drm_i915_gem_execbuf_ext_timeline_fences. > + */ > + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1, > + > DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ > }; > > +/** > + * This structure describes an array of drm_syncobj and associated points for > + * timeline variants of drm_syncobj. It is invalid to append this structure to > + * the execbuf if I915_EXEC_FENCE_ARRAY is set. > + */ > +struct drm_i915_gem_execbuffer_ext_timeline_fences { > + struct i915_user_extension base; > + > + /** > + * Number of element in the handles_ptr & value_ptr arrays. > + */ > + __u64 fence_count; > + > + /** > + * Pointer to an array of struct drm_i915_gem_exec_fence of length > + * fence_count. > + */ > + __u64 handles_ptr; > + > + /** > + * Pointer to an array of u64 values of length fence_count. Values > + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline > + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. > + */ > + __u64 values_ptr; > +}; > + > struct drm_i915_gem_execbuffer2 { > /** > * List of gem_exec_object2 structs _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences 2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota 2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 ` Venkata Sandeep Dhanalakota 2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork ` (2 subsequent siblings) 4 siblings, 0 replies; 17+ messages in thread From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw) To: intel-gfx; +Cc: chris.p.wilson From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> To allow faster engine to engine synchronization, peel the layer of dma-fence-chain to expose potential i915 fences so that the i915-request code can emit HW semaphore wait/signal operations in the ring which is faster than waking up the host to submit unblocked workloads after interrupt notification. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4cb4cd035daa..9b01f7c51b65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2504,6 +2504,7 @@ await_fence_array(struct i915_execbuffer *eb, for (n = 0; n < nfences; n++) { struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; unsigned int flags; syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); @@ -2511,10 +2512,40 @@ await_fence_array(struct i915_execbuffer *eb, if (!fences[n].dma_fence) continue; - err = i915_request_await_dma_fence(eb->request, - fences[n].dma_fence); - if (err < 0) - return err; + /* + * If we're dealing with a dma-fence-chain, peel the chain by + * adding all of the unsignaled fences + * (dma_fence_chain_for_each does that for us) the chain + * points to. + * + * This enables us to identify waits on i915 fences and allows + * for faster engine-to-engine synchronization using HW + * semaphores. + */ + chain = to_dma_fence_chain(fences[n].dma_fence); + if (chain) { + struct dma_fence *iter; + + dma_fence_chain_for_each(iter, fences[n].dma_fence) { + struct dma_fence_chain *iter_chain = + to_dma_fence_chain(iter); + + GEM_BUG_ON(!iter_chain); + + err = i915_request_await_dma_fence(eb->request, + iter_chain->fence); + if (err < 0) { + dma_fence_put(iter); + return err; + } + } + + } else { + err = i915_request_await_dma_fence(eb->request, + fences[n].dma_fence); + if (err < 0) + return err; + } } return 0; -- 2.21.0.5.gaeb582a983 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 17+ messages in thread
* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota 2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota 2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota @ 2020-04-06 22:13 ` Patchwork 2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork 2020-04-07 8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork 4 siblings, 0 replies; 17+ messages in thread From: Patchwork @ 2020-04-06 22:13 UTC (permalink / raw) To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx == Series Details == Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 URL : https://patchwork.freedesktop.org/series/75570/ State : warning == Summary == $ dim checkpatch origin/drm-tip 296bd5133612 drm/i915: introduce a mechanism to extend execbuf2 -:141: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV) #141: FILE: include/uapi/drm/i915_drm.h:1204: +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS<<1)) ^ total: 0 errors, 0 warnings, 1 checks, 113 lines checked 26b10e8e5551 drm/i915: add syncobj timeline support -:26: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line) #26: https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html -:34: ERROR:BAD_SIGN_OFF: Unrecognized email address: 'Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>' #34: Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com> -:622: WARNING:NO_AUTHOR_SIGN_OFF: Missing Signed-off-by: line by nominal patch author 'Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>' total: 1 errors, 2 warnings, 0 checks, 551 lines checked afdb9ac918ac drm/i915: peel dma-fence-chains wait fences _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota ` (2 preceding siblings ...) 2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork @ 2020-04-06 22:37 ` Patchwork 2020-04-07 8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork 4 siblings, 0 replies; 17+ messages in thread From: Patchwork @ 2020-04-06 22:37 UTC (permalink / raw) To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx == Series Details == Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 URL : https://patchwork.freedesktop.org/series/75570/ State : success == Summary == CI Bug Log - changes from CI_DRM_8264 -> Patchwork_17225 ==================================================== Summary ------- **SUCCESS** No regressions found. External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html Known issues ------------ Here are the changes found in Patchwork_17225 that come from known issues: ### IGT changes ### #### Issues hit #### * igt@i915_pm_rpm@basic-rte: - fi-icl-dsi: [PASS][1] -> [INCOMPLETE][2] ([i915#189]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-icl-dsi/igt@i915_pm_rpm@basic-rte.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-icl-dsi/igt@i915_pm_rpm@basic-rte.html #### Possible fixes #### * igt@gem_exec_suspend@basic-s4-devices: - fi-tgl-y: [FAIL][3] ([i915#1158]) -> [PASS][4] [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-tgl-y/igt@gem_exec_suspend@basic-s4-devices.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-tgl-y/igt@gem_exec_suspend@basic-s4-devices.html * igt@i915_selftest@live@hangcheck: - fi-icl-y: [INCOMPLETE][5] ([i915#1580]) -> [PASS][6] [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-icl-y/igt@i915_selftest@live@hangcheck.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-icl-y/igt@i915_selftest@live@hangcheck.html [i915#1158]: https://gitlab.freedesktop.org/drm/intel/issues/1158 [i915#1580]: https://gitlab.freedesktop.org/drm/intel/issues/1580 [i915#189]: https://gitlab.freedesktop.org/drm/intel/issues/189 Participating hosts (53 -> 46) ------------------------------ Missing (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus Build changes ------------- * CI: CI-20190529 -> None * Linux: CI_DRM_8264 -> Patchwork_17225 CI-20190529: 20190529 CI_DRM_8264: e0104585f880a64d4a9b40803cf4fb51ab499f7c @ git://anongit.freedesktop.org/gfx-ci/linux IGT_5573: 9c582425d6b4fc1de9fc2ffc8015cc6f0a0d3e98 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools Patchwork_17225: afdb9ac918aca5d4a603ec3d33e2b4932e3dc1ca @ git://anongit.freedesktop.org/gfx-ci/linux == Linux commits == afdb9ac918ac drm/i915: peel dma-fence-chains wait fences 26b10e8e5551 drm/i915: add syncobj timeline support 296bd5133612 drm/i915: introduce a mechanism to extend execbuf2 == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota ` (3 preceding siblings ...) 2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork @ 2020-04-07 8:13 ` Patchwork 4 siblings, 0 replies; 17+ messages in thread From: Patchwork @ 2020-04-07 8:13 UTC (permalink / raw) To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx == Series Details == Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 URL : https://patchwork.freedesktop.org/series/75570/ State : success == Summary == CI Bug Log - changes from CI_DRM_8264_full -> Patchwork_17225_full ==================================================== Summary ------- **SUCCESS** No regressions found. Known issues ------------ Here are the changes found in Patchwork_17225_full that come from known issues: ### IGT changes ### #### Issues hit #### * igt@i915_pm_dc@dc6-psr: - shard-iclb: [PASS][1] -> [FAIL][2] ([i915#454]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb4/igt@i915_pm_dc@dc6-psr.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb4/igt@i915_pm_dc@dc6-psr.html * igt@i915_pm_rpm@modeset-non-lpsp-stress: - shard-kbl: [PASS][3] -> [DMESG-WARN][4] ([i915#165]) [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl6/igt@i915_pm_rpm@modeset-non-lpsp-stress.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl2/igt@i915_pm_rpm@modeset-non-lpsp-stress.html * igt@kms_cursor_crc@pipe-a-cursor-suspend: - shard-skl: [PASS][5] -> [INCOMPLETE][6] ([i915#300]) [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl8/igt@kms_cursor_crc@pipe-a-cursor-suspend.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl3/igt@kms_cursor_crc@pipe-a-cursor-suspend.html * igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge: - shard-kbl: [PASS][7] -> [DMESG-WARN][8] ([i915#78]) [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl6/igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl2/igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge.html * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic: - shard-glk: [PASS][9] -> [FAIL][10] ([i915#72]) [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk8/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk8/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html * igt@kms_flip@flip-vs-suspend: - shard-snb: [PASS][11] -> [DMESG-WARN][12] ([i915#42]) [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb7/igt@kms_flip@flip-vs-suspend.html [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb5/igt@kms_flip@flip-vs-suspend.html * igt@kms_flip@plain-flip-fb-recreate-interruptible: - shard-glk: [PASS][13] -> [FAIL][14] ([i915#1487]) [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk1/igt@kms_flip@plain-flip-fb-recreate-interruptible.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk1/igt@kms_flip@plain-flip-fb-recreate-interruptible.html * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes: - shard-kbl: [PASS][15] -> [DMESG-WARN][16] ([i915#180]) [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl7/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl4/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html * igt@kms_psr@psr2_cursor_mmap_cpu: - shard-iclb: [PASS][17] -> [SKIP][18] ([fdo#109441]) +2 similar issues [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb1/igt@kms_psr@psr2_cursor_mmap_cpu.html * igt@kms_vblank@pipe-b-ts-continuation-suspend: - shard-skl: [PASS][19] -> [INCOMPLETE][20] ([i915#69]) [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl1/igt@kms_vblank@pipe-b-ts-continuation-suspend.html [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl9/igt@kms_vblank@pipe-b-ts-continuation-suspend.html #### Possible fixes #### * {igt@gem_ctx_isolation@preservation-s3@rcs0}: - shard-apl: [DMESG-WARN][21] ([i915#180]) -> [PASS][22] +2 similar issues [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl6/igt@gem_ctx_isolation@preservation-s3@rcs0.html [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl2/igt@gem_ctx_isolation@preservation-s3@rcs0.html * igt@gem_exec_balancer@hang: - shard-tglb: [FAIL][23] ([i915#1277]) -> [PASS][24] [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-tglb6/igt@gem_exec_balancer@hang.html [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-tglb8/igt@gem_exec_balancer@hang.html * igt@i915_pm_rpm@basic-pci-d3-state: - shard-skl: [FAIL][25] ([i915#138]) -> [PASS][26] [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl5/igt@i915_pm_rpm@basic-pci-d3-state.html [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl8/igt@i915_pm_rpm@basic-pci-d3-state.html * igt@i915_selftest@live@blt: - shard-snb: [DMESG-FAIL][27] ([i915#1409]) -> [PASS][28] [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb4/igt@i915_selftest@live@blt.html [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb4/igt@i915_selftest@live@blt.html * igt@i915_suspend@fence-restore-tiled2untiled: - shard-skl: [INCOMPLETE][29] ([i915#69]) -> [PASS][30] [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl3/igt@i915_suspend@fence-restore-tiled2untiled.html [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl8/igt@i915_suspend@fence-restore-tiled2untiled.html * igt@kms_fbcon_fbt@fbc-suspend: - shard-kbl: [DMESG-WARN][31] ([i915#180] / [i915#93] / [i915#95]) -> [PASS][32] [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl1/igt@kms_fbcon_fbt@fbc-suspend.html [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl4/igt@kms_fbcon_fbt@fbc-suspend.html * igt@kms_flip@2x-plain-flip-ts-check: - shard-glk: [FAIL][33] ([i915#34]) -> [PASS][34] [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk5/igt@kms_flip@2x-plain-flip-ts-check.html [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk6/igt@kms_flip@2x-plain-flip-ts-check.html * igt@kms_flip@flip-vs-expired-vblank: - shard-apl: [FAIL][35] ([i915#79]) -> [PASS][36] [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl4/igt@kms_flip@flip-vs-expired-vblank.html [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl6/igt@kms_flip@flip-vs-expired-vblank.html * igt@kms_flip@flip-vs-suspend-interruptible: - shard-kbl: [DMESG-WARN][37] ([i915#180]) -> [PASS][38] +2 similar issues [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl3/igt@kms_flip@flip-vs-suspend-interruptible.html [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl1/igt@kms_flip@flip-vs-suspend-interruptible.html * igt@kms_hdr@bpc-switch-dpms: - shard-skl: [FAIL][39] ([i915#1188]) -> [PASS][40] [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl4/igt@kms_hdr@bpc-switch-dpms.html [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl4/igt@kms_hdr@bpc-switch-dpms.html * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes: - shard-snb: [DMESG-WARN][41] ([i915#42]) -> [PASS][42] [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb6/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb2/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html * igt@kms_plane_lowres@pipe-a-tiling-none: - shard-kbl: [DMESG-WARN][43] ([i915#165] / [i915#78]) -> [PASS][44] [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl2/igt@kms_plane_lowres@pipe-a-tiling-none.html [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl1/igt@kms_plane_lowres@pipe-a-tiling-none.html * igt@kms_psr@psr2_cursor_plane_onoff: - shard-iclb: [SKIP][45] ([fdo#109441]) -> [PASS][46] [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb1/igt@kms_psr@psr2_cursor_plane_onoff.html [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html * igt@kms_setmode@basic: - shard-apl: [FAIL][47] ([i915#31]) -> [PASS][48] [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl7/igt@kms_setmode@basic.html [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl4/igt@kms_setmode@basic.html - shard-glk: [FAIL][49] ([i915#31]) -> [PASS][50] [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk5/igt@kms_setmode@basic.html [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk6/igt@kms_setmode@basic.html #### Warnings #### * igt@i915_pm_dc@dc6-psr: - shard-tglb: [FAIL][51] ([i915#454]) -> [SKIP][52] ([i915#468]) [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-tglb1/igt@i915_pm_dc@dc6-psr.html [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-tglb2/igt@i915_pm_dc@dc6-psr.html * igt@i915_pm_rpm@system-suspend-devices: - shard-snb: [SKIP][53] ([fdo#109271]) -> [INCOMPLETE][54] ([i915#82]) [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb4/igt@i915_pm_rpm@system-suspend-devices.html [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb5/igt@i915_pm_rpm@system-suspend-devices.html {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271 [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441 [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188 [i915#1277]: https://gitlab.freedesktop.org/drm/intel/issues/1277 [i915#138]: https://gitlab.freedesktop.org/drm/intel/issues/138 [i915#1409]: https://gitlab.freedesktop.org/drm/intel/issues/1409 [i915#1487]: https://gitlab.freedesktop.org/drm/intel/issues/1487 [i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165 [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180 [i915#300]: https://gitlab.freedesktop.org/drm/intel/issues/300 [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31 [i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34 [i915#42]: https://gitlab.freedesktop.org/drm/intel/issues/42 [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454 [i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468 [i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69 [i915#72]: https://gitlab.freedesktop.org/drm/intel/issues/72 [i915#78]: https://gitlab.freedesktop.org/drm/intel/issues/78 [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79 [i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82 [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93 [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95 Participating hosts (10 -> 10) ------------------------------ No changes in participating hosts Build changes ------------- * CI: CI-20190529 -> None * Linux: CI_DRM_8264 -> Patchwork_17225 CI-20190529: 20190529 CI_DRM_8264: e0104585f880a64d4a9b40803cf4fb51ab499f7c @ git://anongit.freedesktop.org/gfx-ci/linux IGT_5573: 9c582425d6b4fc1de9fc2ffc8015cc6f0a0d3e98 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools Patchwork_17225: afdb9ac918aca5d4a603ec3d33e2b4932e3dc1ca @ git://anongit.freedesktop.org/gfx-ci/linux piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support @ 2020-07-31 13:45 Lionel Landwerlin 2020-07-31 13:45 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Lionel Landwerlin 0 siblings, 1 reply; 17+ messages in thread From: Lionel Landwerlin @ 2020-07-31 13:45 UTC (permalink / raw) To: intel-gfx Hi all, Reviewed series, just getting a CI run with the CI. Cheers, Test-with: 20200731134120.156288-1-lionel.g.landwerlin@intel.com Lionel Landwerlin (3): drm/i915: introduce a mechanism to extend execbuf2 drm/i915: add syncobj timeline support drm/i915: peel dma-fence-chains wait fences .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 333 +++++++++++++++--- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 65 +++- 4 files changed, 342 insertions(+), 60 deletions(-) -- 2.28.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-07-31 13:45 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin @ 2020-07-31 13:45 ` Lionel Landwerlin 2020-07-31 14:30 ` Chris Wilson 2020-07-31 14:32 ` Chris Wilson 0 siblings, 2 replies; 17+ messages in thread From: Lionel Landwerlin @ 2020-07-31 13:45 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter Introduces a new parameters to execbuf so that we can specify syncobj handles as well as timeline points. v2: Reuse i915_user_extension_fn v3: Check that the chained extension is only present once (Chris) v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel) v5: Use BIT_ULL (Chris) v6: Fix issue with already signaled timeline points, dma_fence_chain_find_seqno() setting fence to NULL (Chris) v7: Report ENOENT with invalid syncobj handle (Lionel) v8: Check for out of order timeline point insertion (Chris) v9: After explanations on https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html drop the ordering check from v8 (Lionel) v10: Set first extension enum item to 1 (Jason) v11: Rebase Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 167 +++++++++++++++++- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 39 ++++ 4 files changed, 203 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5aac474c058f..652f3b30a374 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -285,6 +285,8 @@ struct i915_execbuffer { struct i915_eb_fence { struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + u64 value; + struct dma_fence_chain *chain_fence; } *fences; u32 n_fences; @@ -2200,14 +2202,121 @@ eb_pin_engine(struct i915_execbuffer *eb, static void __free_fence_array(struct i915_eb_fence *fences, unsigned int n) { - while (n--) + while (n--) { drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + kfree(fences[n].chain_fence); + } kvfree(fences); } static int -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct i915_execbuffer *eb) +get_timeline_fence_array(const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences, + struct i915_execbuffer *eb) +{ + struct drm_i915_gem_exec_fence __user *user_fences; + struct i915_eb_fence *fences; + u64 __user *user_values; + u64 num_fences, num_user_fences = timeline_fences->fence_count; + unsigned long n; + int err; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (num_user_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*fences))) + return -EINVAL; + + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) + return -EFAULT; + + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) + return -EFAULT; + + fences = kvmalloc_array(num_user_fences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return -ENOMEM; + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { + struct drm_i915_gem_exec_fence fence; + struct drm_syncobj *syncobj; + u64 point; + + if (__copy_from_user(&fence, user_fences++, sizeof(fence))) { + err = -EFAULT; + goto err; + } + + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + + if (__get_user(point, user_values++)) { + err = -EFAULT; + goto err; + } + + syncobj = drm_syncobj_find(eb->file, fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -ENOENT; + goto err; + } + + /* + * For timeline syncobjs we need to preallocate chains for + * later signaling. + */ + if (point != 0 && fence.flags & I915_EXEC_FENCE_SIGNAL) { + /* + * Waiting and signaling the same point (when point != + * 0) would break the timeline. + */ + if (fence.flags & I915_EXEC_FENCE_WAIT) { + DRM_DEBUG("Tring to wait & signal the same timeline point.\n"); + err = -EINVAL; + drm_syncobj_put(syncobj); + goto err; + } + + fences[num_fences].chain_fence = + kmalloc(sizeof(*fences[num_fences].chain_fence), + GFP_KERNEL); + if (!fences[num_fences].chain_fence) { + drm_syncobj_put(syncobj); + err = -ENOMEM; + DRM_DEBUG("Unable to alloc chain_fence\n"); + goto err; + } + } else { + fences[num_fences].chain_fence = NULL; + } + + fences[num_fences].syncobj = ptr_pack_bits(syncobj, fence.flags, 2); + fences[num_fences].value = point; + num_fences++; + } + + eb->fences = fences; + eb->n_fences = num_fences; + + return 0; + +err: + __free_fence_array(fences, num_fences); + return err; +} + +static int +get_legacy_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct i915_execbuffer *eb) { const unsigned long nfences = args->num_cliprects; struct drm_i915_gem_exec_fence __user *user; @@ -2222,7 +2331,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); if (nfences > min_t(unsigned long, ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) + SIZE_MAX / sizeof(*eb->fences))) return -EINVAL; user = u64_to_user_ptr(args->cliprects_ptr); @@ -2259,6 +2368,8 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); fences[n].syncobj = ptr_pack_bits(syncobj, fence.flags, 2); + fences[n].value = 0; + fences[n].chain_fence = NULL; } eb->fences = fences; @@ -2290,6 +2401,15 @@ await_fence_array(struct i915_execbuffer *eb) if (!fence) return -EINVAL; + if (eb->fences[n].value) { + err = dma_fence_chain_find_seqno(&fence, eb->fences[n].value); + if (err) + return err; + + if (!fence) + continue; + } + err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); if (err < 0) @@ -2313,7 +2433,17 @@ signal_fence_array(struct i915_execbuffer *eb) if (!(flags & I915_EXEC_FENCE_SIGNAL)) continue; - drm_syncobj_replace_fence(syncobj, fence); + if (eb->fences[n].chain_fence) { + drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence, + fence, eb->fences[n].value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + eb->fences[n].chain_fence = NULL; + } else { + drm_syncobj_replace_fence(syncobj, fence); + } } } @@ -2358,7 +2488,32 @@ static void eb_request_add(struct i915_execbuffer *eb) mutex_unlock(&tl->mutex); } +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) +{ + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + struct i915_execbuffer *eb = data; + + /* Timeline fences are incompatible with the fence array flag. */ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + /* + * Prevent the drm_i915_gem_execbuffer_ext_timeline_fences structure + * to be included multiple times. + */ + if (eb->extension_flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) + return -EINVAL; + + if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) + return -EFAULT; + + eb->extension_flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); + + return get_timeline_fence_array(&timeline_fences, eb); +} + static const i915_user_extension_fn execbuf_extensions[] = { + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, }; static int @@ -2462,7 +2617,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (err) goto err_out_fence; - err = get_fence_array(args, &eb); + err = get_legacy_fence_array(args, &eb); if (err) goto err_arr_fence; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a5f58ed219fe..705e20e62db1 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1845,7 +1845,8 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_GEM | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 421613219ae9..f96032c60a12 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0efded7b15f0..021276c39842 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -619,6 +619,13 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1047,9 +1054,41 @@ struct drm_i915_gem_exec_fence { }; enum drm_i915_gem_execbuffer_ext { + /** + * See drm_i915_gem_execbuf_ext_timeline_fences. + */ + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1, + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ }; +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 fence_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of length + * fence_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of length fence_count. Values + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs -- 2.28.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-07-31 13:45 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Lionel Landwerlin @ 2020-07-31 14:30 ` Chris Wilson 2020-07-31 19:06 ` Lionel Landwerlin 2020-07-31 14:32 ` Chris Wilson 1 sibling, 1 reply; 17+ messages in thread From: Chris Wilson @ 2020-07-31 14:30 UTC (permalink / raw) To: Lionel Landwerlin, intel-gfx; +Cc: Daniel Vetter Quoting Lionel Landwerlin (2020-07-31 14:45:52) > - drm_syncobj_replace_fence(syncobj, fence); > + if (eb->fences[n].chain_fence) { > + drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence, > + fence, eb->fences[n].value); This function remains not acceptable. It is trivial to write an igt test that causes the DRM_ERROR. A user controllable error message is still not allowed. If you do not have such a test in your igt series, then that too is lacking. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-07-31 14:30 ` Chris Wilson @ 2020-07-31 19:06 ` Lionel Landwerlin 2020-08-01 9:21 ` Daniel Vetter 0 siblings, 1 reply; 17+ messages in thread From: Lionel Landwerlin @ 2020-07-31 19:06 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter On 31/07/2020 17:30, Chris Wilson wrote: > Quoting Lionel Landwerlin (2020-07-31 14:45:52) >> - drm_syncobj_replace_fence(syncobj, fence); >> + if (eb->fences[n].chain_fence) { >> + drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence, >> + fence, eb->fences[n].value); > This function remains not acceptable. It is trivial to write an igt test > that causes the DRM_ERROR. A user controllable error message is still > not allowed. If you do not have such a test in your igt series, then that > too is lacking. > -Chris As far as I remember there are IGT tests for this (*unordered* subtests). So CI should report a fairlure. The IGT test itself won't fail though. I thought we removed that DRM_ERROR a while ago. I'll send a patch to remove it. Thanks. -Lionel _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-07-31 19:06 ` Lionel Landwerlin @ 2020-08-01 9:21 ` Daniel Vetter 0 siblings, 0 replies; 17+ messages in thread From: Daniel Vetter @ 2020-08-01 9:21 UTC (permalink / raw) To: Lionel Landwerlin; +Cc: intel-gfx, Chris Wilson On Fri, Jul 31, 2020 at 9:07 PM Lionel Landwerlin <lionel.g.landwerlin@intel.com> wrote: > > On 31/07/2020 17:30, Chris Wilson wrote: > > Quoting Lionel Landwerlin (2020-07-31 14:45:52) > >> - drm_syncobj_replace_fence(syncobj, fence); > >> + if (eb->fences[n].chain_fence) { > >> + drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence, > >> + fence, eb->fences[n].value); > > This function remains not acceptable. It is trivial to write an igt test > > that causes the DRM_ERROR. A user controllable error message is still > > not allowed. If you do not have such a test in your igt series, then that > > too is lacking. > > -Chris > > As far as I remember there are IGT tests for this (*unordered* subtests). > > So CI should report a fairlure. The IGT test itself won't fail though. CI did catch it. > I thought we removed that DRM_ERROR a while ago. > > I'll send a patch to remove it. Thanks. Typed it already (since I didn't see yours), as soon as it's pushed you can just hit the retest button with your current series and then we see whether it's all gone. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-07-31 13:45 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Lionel Landwerlin 2020-07-31 14:30 ` Chris Wilson @ 2020-07-31 14:32 ` Chris Wilson 2020-07-31 19:11 ` Lionel Landwerlin 1 sibling, 1 reply; 17+ messages in thread From: Chris Wilson @ 2020-07-31 14:32 UTC (permalink / raw) To: Lionel Landwerlin, intel-gfx; +Cc: Daniel Vetter Quoting Lionel Landwerlin (2020-07-31 14:45:52) > Introduces a new parameters to execbuf so that we can specify syncobj > handles as well as timeline points. > > v2: Reuse i915_user_extension_fn > > v3: Check that the chained extension is only present once (Chris) > > v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel) > > v5: Use BIT_ULL (Chris) > > v6: Fix issue with already signaled timeline points, > dma_fence_chain_find_seqno() setting fence to NULL (Chris) > > v7: Report ENOENT with invalid syncobj handle (Lionel) > > v8: Check for out of order timeline point insertion (Chris) > > v9: After explanations on > https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html > drop the ordering check from v8 (Lionel) > > v10: Set first extension enum item to 1 (Jason) The other unaddressed issue here is that we do not need to arbitrarily limit the caller to only a single extension. The code to handle multiple invocations is actually smaller... -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-07-31 14:32 ` Chris Wilson @ 2020-07-31 19:11 ` Lionel Landwerlin 0 siblings, 0 replies; 17+ messages in thread From: Lionel Landwerlin @ 2020-07-31 19:11 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter On 31/07/2020 17:32, Chris Wilson wrote: > Quoting Lionel Landwerlin (2020-07-31 14:45:52) >> Introduces a new parameters to execbuf so that we can specify syncobj >> handles as well as timeline points. >> >> v2: Reuse i915_user_extension_fn >> >> v3: Check that the chained extension is only present once (Chris) >> >> v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel) >> >> v5: Use BIT_ULL (Chris) >> >> v6: Fix issue with already signaled timeline points, >> dma_fence_chain_find_seqno() setting fence to NULL (Chris) >> >> v7: Report ENOENT with invalid syncobj handle (Lionel) >> >> v8: Check for out of order timeline point insertion (Chris) >> >> v9: After explanations on >> https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html >> drop the ordering check from v8 (Lionel) >> >> v10: Set first extension enum item to 1 (Jason) > The other unaddressed issue here is that we do not need to arbitrarily > limit the caller to only a single extension. The code to handle multiple > invocations is actually smaller... > -Chris You mean an application could send multiple DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES items in the chain of extensions? That's somewhat different than how the current fences are handled. If other extension want to support that, that's up to them. We don't have any use for multiple arrays of timeline fences for a given execbuf in our userspace driver. -Lionel _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support @ 2020-08-03 9:05 Lionel Landwerlin 2020-08-03 9:05 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Lionel Landwerlin 0 siblings, 1 reply; 17+ messages in thread From: Lionel Landwerlin @ 2020-08-03 9:05 UTC (permalink / raw) To: intel-gfx Hi all, Hopefully this last run is all clean. No changes in this series, just a rebase on top of Danvet's s/DRM_ERROR/DRM_DEBUG/. Test-with: 20200803090053.260115-1-lionel.g.landwerlin@intel.com Cheers, Lionel Landwerlin (3): drm/i915: introduce a mechanism to extend execbuf2 drm/i915: add syncobj timeline support drm/i915: peel dma-fence-chains wait fences .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 333 +++++++++++++++--- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 65 +++- 4 files changed, 342 insertions(+), 60 deletions(-) -- 2.28.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-08-03 9:05 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin @ 2020-08-03 9:05 ` Lionel Landwerlin 0 siblings, 0 replies; 17+ messages in thread From: Lionel Landwerlin @ 2020-08-03 9:05 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter Introduces a new parameters to execbuf so that we can specify syncobj handles as well as timeline points. v2: Reuse i915_user_extension_fn v3: Check that the chained extension is only present once (Chris) v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel) v5: Use BIT_ULL (Chris) v6: Fix issue with already signaled timeline points, dma_fence_chain_find_seqno() setting fence to NULL (Chris) v7: Report ENOENT with invalid syncobj handle (Lionel) v8: Check for out of order timeline point insertion (Chris) v9: After explanations on https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html drop the ordering check from v8 (Lionel) v10: Set first extension enum item to 1 (Jason) v11: Rebase Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 167 +++++++++++++++++- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 39 ++++ 4 files changed, 203 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index ed8d1c2517f6..1f766431f3a3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -285,6 +285,8 @@ struct i915_execbuffer { struct i915_eb_fence { struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + u64 value; + struct dma_fence_chain *chain_fence; } *fences; u32 n_fences; @@ -2200,14 +2202,121 @@ eb_pin_engine(struct i915_execbuffer *eb, static void __free_fence_array(struct i915_eb_fence *fences, unsigned int n) { - while (n--) + while (n--) { drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + kfree(fences[n].chain_fence); + } kvfree(fences); } static int -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct i915_execbuffer *eb) +get_timeline_fence_array(const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences, + struct i915_execbuffer *eb) +{ + struct drm_i915_gem_exec_fence __user *user_fences; + struct i915_eb_fence *fences; + u64 __user *user_values; + u64 num_fences, num_user_fences = timeline_fences->fence_count; + unsigned long n; + int err; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (num_user_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*fences))) + return -EINVAL; + + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) + return -EFAULT; + + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) + return -EFAULT; + + fences = kvmalloc_array(num_user_fences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return -ENOMEM; + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { + struct drm_i915_gem_exec_fence fence; + struct drm_syncobj *syncobj; + u64 point; + + if (__copy_from_user(&fence, user_fences++, sizeof(fence))) { + err = -EFAULT; + goto err; + } + + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + + if (__get_user(point, user_values++)) { + err = -EFAULT; + goto err; + } + + syncobj = drm_syncobj_find(eb->file, fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -ENOENT; + goto err; + } + + /* + * For timeline syncobjs we need to preallocate chains for + * later signaling. + */ + if (point != 0 && fence.flags & I915_EXEC_FENCE_SIGNAL) { + /* + * Waiting and signaling the same point (when point != + * 0) would break the timeline. + */ + if (fence.flags & I915_EXEC_FENCE_WAIT) { + DRM_DEBUG("Tring to wait & signal the same timeline point.\n"); + err = -EINVAL; + drm_syncobj_put(syncobj); + goto err; + } + + fences[num_fences].chain_fence = + kmalloc(sizeof(*fences[num_fences].chain_fence), + GFP_KERNEL); + if (!fences[num_fences].chain_fence) { + drm_syncobj_put(syncobj); + err = -ENOMEM; + DRM_DEBUG("Unable to alloc chain_fence\n"); + goto err; + } + } else { + fences[num_fences].chain_fence = NULL; + } + + fences[num_fences].syncobj = ptr_pack_bits(syncobj, fence.flags, 2); + fences[num_fences].value = point; + num_fences++; + } + + eb->fences = fences; + eb->n_fences = num_fences; + + return 0; + +err: + __free_fence_array(fences, num_fences); + return err; +} + +static int +get_legacy_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct i915_execbuffer *eb) { const unsigned long nfences = args->num_cliprects; struct drm_i915_gem_exec_fence __user *user; @@ -2222,7 +2331,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); if (nfences > min_t(unsigned long, ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) + SIZE_MAX / sizeof(*eb->fences))) return -EINVAL; user = u64_to_user_ptr(args->cliprects_ptr); @@ -2259,6 +2368,8 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); fences[n].syncobj = ptr_pack_bits(syncobj, fence.flags, 2); + fences[n].value = 0; + fences[n].chain_fence = NULL; } eb->fences = fences; @@ -2290,6 +2401,15 @@ await_fence_array(struct i915_execbuffer *eb) if (!fence) return -EINVAL; + if (eb->fences[n].value) { + err = dma_fence_chain_find_seqno(&fence, eb->fences[n].value); + if (err) + return err; + + if (!fence) + continue; + } + err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); if (err < 0) @@ -2313,7 +2433,17 @@ signal_fence_array(struct i915_execbuffer *eb) if (!(flags & I915_EXEC_FENCE_SIGNAL)) continue; - drm_syncobj_replace_fence(syncobj, fence); + if (eb->fences[n].chain_fence) { + drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence, + fence, eb->fences[n].value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + eb->fences[n].chain_fence = NULL; + } else { + drm_syncobj_replace_fence(syncobj, fence); + } } } @@ -2358,7 +2488,32 @@ static void eb_request_add(struct i915_execbuffer *eb) mutex_unlock(&tl->mutex); } +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) +{ + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + struct i915_execbuffer *eb = data; + + /* Timeline fences are incompatible with the fence array flag. */ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + /* + * Prevent the drm_i915_gem_execbuffer_ext_timeline_fences structure + * to be included multiple times. + */ + if (eb->extension_flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) + return -EINVAL; + + if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) + return -EFAULT; + + eb->extension_flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); + + return get_timeline_fence_array(&timeline_fences, eb); +} + static const i915_user_extension_fn execbuf_extensions[] = { + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, }; static int @@ -2462,7 +2617,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (err) goto err_out_fence; - err = get_fence_array(args, &eb); + err = get_legacy_fence_array(args, &eb); if (err) goto err_arr_fence; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8b9bd929ba45..068447f565a9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1846,7 +1846,8 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_GEM | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 421613219ae9..f96032c60a12 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0efded7b15f0..021276c39842 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -619,6 +619,13 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1047,9 +1054,41 @@ struct drm_i915_gem_exec_fence { }; enum drm_i915_gem_execbuffer_ext { + /** + * See drm_i915_gem_execbuf_ext_timeline_fences. + */ + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1, + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ }; +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 fence_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of length + * fence_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of length fence_count. Values + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs -- 2.28.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 17+ messages in thread
* [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support @ 2020-08-03 14:01 Lionel Landwerlin 2020-08-03 14:01 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Lionel Landwerlin 0 siblings, 1 reply; 17+ messages in thread From: Lionel Landwerlin @ 2020-08-03 14:01 UTC (permalink / raw) To: intel-gfx Hi all, Hopefully last CI run with the IGT tests :) Test-with: 20200803135851.316355-1-lionel.g.landwerlin@intel.com Cheers, Lionel Landwerlin (3): drm/i915: introduce a mechanism to extend execbuf2 drm/i915: add syncobj timeline support drm/i915: peel dma-fence-chains wait fences .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 333 +++++++++++++++--- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 65 +++- 4 files changed, 342 insertions(+), 60 deletions(-) -- 2.28.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 17+ messages in thread
* [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support 2020-08-03 14:01 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin @ 2020-08-03 14:01 ` Lionel Landwerlin 0 siblings, 0 replies; 17+ messages in thread From: Lionel Landwerlin @ 2020-08-03 14:01 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter Introduces a new parameters to execbuf so that we can specify syncobj handles as well as timeline points. v2: Reuse i915_user_extension_fn v3: Check that the chained extension is only present once (Chris) v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel) v5: Use BIT_ULL (Chris) v6: Fix issue with already signaled timeline points, dma_fence_chain_find_seqno() setting fence to NULL (Chris) v7: Report ENOENT with invalid syncobj handle (Lionel) v8: Check for out of order timeline point insertion (Chris) v9: After explanations on https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html drop the ordering check from v8 (Lionel) v10: Set first extension enum item to 1 (Jason) v11: Rebase Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 167 +++++++++++++++++- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 39 ++++ 4 files changed, 203 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index ed8d1c2517f6..1f766431f3a3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -285,6 +285,8 @@ struct i915_execbuffer { struct i915_eb_fence { struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + u64 value; + struct dma_fence_chain *chain_fence; } *fences; u32 n_fences; @@ -2200,14 +2202,121 @@ eb_pin_engine(struct i915_execbuffer *eb, static void __free_fence_array(struct i915_eb_fence *fences, unsigned int n) { - while (n--) + while (n--) { drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + kfree(fences[n].chain_fence); + } kvfree(fences); } static int -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct i915_execbuffer *eb) +get_timeline_fence_array(const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences, + struct i915_execbuffer *eb) +{ + struct drm_i915_gem_exec_fence __user *user_fences; + struct i915_eb_fence *fences; + u64 __user *user_values; + u64 num_fences, num_user_fences = timeline_fences->fence_count; + unsigned long n; + int err; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (num_user_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*fences))) + return -EINVAL; + + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) + return -EFAULT; + + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) + return -EFAULT; + + fences = kvmalloc_array(num_user_fences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return -ENOMEM; + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { + struct drm_i915_gem_exec_fence fence; + struct drm_syncobj *syncobj; + u64 point; + + if (__copy_from_user(&fence, user_fences++, sizeof(fence))) { + err = -EFAULT; + goto err; + } + + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + + if (__get_user(point, user_values++)) { + err = -EFAULT; + goto err; + } + + syncobj = drm_syncobj_find(eb->file, fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -ENOENT; + goto err; + } + + /* + * For timeline syncobjs we need to preallocate chains for + * later signaling. + */ + if (point != 0 && fence.flags & I915_EXEC_FENCE_SIGNAL) { + /* + * Waiting and signaling the same point (when point != + * 0) would break the timeline. + */ + if (fence.flags & I915_EXEC_FENCE_WAIT) { + DRM_DEBUG("Tring to wait & signal the same timeline point.\n"); + err = -EINVAL; + drm_syncobj_put(syncobj); + goto err; + } + + fences[num_fences].chain_fence = + kmalloc(sizeof(*fences[num_fences].chain_fence), + GFP_KERNEL); + if (!fences[num_fences].chain_fence) { + drm_syncobj_put(syncobj); + err = -ENOMEM; + DRM_DEBUG("Unable to alloc chain_fence\n"); + goto err; + } + } else { + fences[num_fences].chain_fence = NULL; + } + + fences[num_fences].syncobj = ptr_pack_bits(syncobj, fence.flags, 2); + fences[num_fences].value = point; + num_fences++; + } + + eb->fences = fences; + eb->n_fences = num_fences; + + return 0; + +err: + __free_fence_array(fences, num_fences); + return err; +} + +static int +get_legacy_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct i915_execbuffer *eb) { const unsigned long nfences = args->num_cliprects; struct drm_i915_gem_exec_fence __user *user; @@ -2222,7 +2331,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); if (nfences > min_t(unsigned long, ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) + SIZE_MAX / sizeof(*eb->fences))) return -EINVAL; user = u64_to_user_ptr(args->cliprects_ptr); @@ -2259,6 +2368,8 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); fences[n].syncobj = ptr_pack_bits(syncobj, fence.flags, 2); + fences[n].value = 0; + fences[n].chain_fence = NULL; } eb->fences = fences; @@ -2290,6 +2401,15 @@ await_fence_array(struct i915_execbuffer *eb) if (!fence) return -EINVAL; + if (eb->fences[n].value) { + err = dma_fence_chain_find_seqno(&fence, eb->fences[n].value); + if (err) + return err; + + if (!fence) + continue; + } + err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); if (err < 0) @@ -2313,7 +2433,17 @@ signal_fence_array(struct i915_execbuffer *eb) if (!(flags & I915_EXEC_FENCE_SIGNAL)) continue; - drm_syncobj_replace_fence(syncobj, fence); + if (eb->fences[n].chain_fence) { + drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence, + fence, eb->fences[n].value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + eb->fences[n].chain_fence = NULL; + } else { + drm_syncobj_replace_fence(syncobj, fence); + } } } @@ -2358,7 +2488,32 @@ static void eb_request_add(struct i915_execbuffer *eb) mutex_unlock(&tl->mutex); } +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) +{ + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + struct i915_execbuffer *eb = data; + + /* Timeline fences are incompatible with the fence array flag. */ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + /* + * Prevent the drm_i915_gem_execbuffer_ext_timeline_fences structure + * to be included multiple times. + */ + if (eb->extension_flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) + return -EINVAL; + + if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) + return -EFAULT; + + eb->extension_flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); + + return get_timeline_fence_array(&timeline_fences, eb); +} + static const i915_user_extension_fn execbuf_extensions[] = { + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, }; static int @@ -2462,7 +2617,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (err) goto err_out_fence; - err = get_fence_array(args, &eb); + err = get_legacy_fence_array(args, &eb); if (err) goto err_arr_fence; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8b9bd929ba45..068447f565a9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1846,7 +1846,8 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_GEM | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 421613219ae9..f96032c60a12 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0efded7b15f0..021276c39842 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -619,6 +619,13 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1047,9 +1054,41 @@ struct drm_i915_gem_exec_fence { }; enum drm_i915_gem_execbuffer_ext { + /** + * See drm_i915_gem_execbuf_ext_timeline_fences. + */ + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1, + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ }; +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 fence_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of length + * fence_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of length fence_count. Values + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs -- 2.28.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 17+ messages in thread
end of thread, other threads:[~2020-08-03 14:01 UTC | newest] Thread overview: 17+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota 2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota 2020-04-08 16:29 ` Lionel Landwerlin 2020-04-08 17:00 ` Venkata Sandeep Dhanalakota 2020-04-08 17:14 ` Lionel Landwerlin 2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota 2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork 2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork 2020-04-07 8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork -- strict thread matches above, loose matches on Subject: below -- 2020-07-31 13:45 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin 2020-07-31 13:45 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Lionel Landwerlin 2020-07-31 14:30 ` Chris Wilson 2020-07-31 19:06 ` Lionel Landwerlin 2020-08-01 9:21 ` Daniel Vetter 2020-07-31 14:32 ` Chris Wilson 2020-07-31 19:11 ` Lionel Landwerlin 2020-08-03 9:05 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin 2020-08-03 9:05 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Lionel Landwerlin 2020-08-03 14:01 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin 2020-08-03 14:01 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Lionel Landwerlin
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.