* [PATCH] drm/syncobj: add sync obj wait interface. (v6)
@ 2017-07-06 1:04 Dave Airlie
[not found] ` <20170706010409.9373-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Dave Airlie @ 2017-07-06 1:04 UTC (permalink / raw)
To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
From: Dave Airlie <airlied@redhat.com>
This interface will allow sync object to be used to back
Vulkan fences. This API is pretty much the vulkan fence waiting
API, and I've ported the code from amdgpu.
v2: accept relative timeout, pass remaining time back
to userspace.
v3: return to absolute timeouts.
v4: absolute zero = poll,
rewrite any/all code to have same operation for arrays
return -EINVAL for 0 fences.
v4.1: fixup fences allocation check, use u64_to_user_ptr
v5: move to sec/nsec, and use timespec64 for calcs.
v6: use -ETIME and drop the out status flag. (-ETIME
is suggested by ickle, I can feel a shed painting)
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
drivers/gpu/drm/drm_internal.h | 2 +
drivers/gpu/drm/drm_ioctl.c | 2 +
drivers/gpu/drm/drm_syncobj.c | 142 +++++++++++++++++++++++++++++++++++++++++
include/uapi/drm/drm.h | 13 ++++
4 files changed, 159 insertions(+)
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 5cecc97..d71b50d 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private);
int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private);
+int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index f1e5681..385ce74 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
};
#define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 89441bc..2d5a7a1 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1,5 +1,7 @@
/*
* Copyright 2017 Red Hat
+ * Parts ported from amdgpu (fence wait code).
+ * Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -31,6 +33,9 @@
* that contain an optional fence. The fence can be updated with a new
* fence, or be NULL.
*
+ * syncobj's can be waited upon, where it will wait for the underlying
+ * fence.
+ *
* syncobj's can be export to fd's and back, these fd's are opaque and
* have no other use case, except passing the syncobj between processes.
*
@@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
return drm_syncobj_fd_to_handle(file_private, args->fd,
&args->handle);
}
+
+/**
+ * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value
+ *
+ * @timeout_sec: timeout sec component, 0 for poll
+ * @timeout_nsec: timeout nsec component in ns, 0 for poll
+ * both must be 0 for poll.
+ *
+ * Calculate the timeout in jiffies from an absolute time in sec/nsec.
+ */
+static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec, uint64_t timeout_nsec)
+{
+ struct timespec64 abs_timeout, timeout, max_jiffy_timespec;
+ unsigned long timeout_jiffies;
+
+ /* make 0 timeout means poll - absolute 0 doesn't seem valid */
+ if (timeout_sec == 0 && timeout_nsec == 0)
+ return 0;
+
+ abs_timeout.tv_sec = timeout_sec;
+ abs_timeout.tv_nsec = timeout_nsec;
+
+ /* clamp timeout if it's to large */
+ if (!timespec64_valid_strict(&abs_timeout))
+ return MAX_SCHEDULE_TIMEOUT - 1;
+
+ timeout = timespec64_sub(abs_timeout, ktime_to_timespec64(ktime_get()));
+ if (!timespec64_valid(&timeout))
+ return 0;
+
+ jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec);
+ if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0)
+ return MAX_SCHEDULE_TIMEOUT - 1;
+
+ timeout_jiffies = timespec64_to_jiffies(&timeout);
+ /* clamp timeout to avoid infinite timeout */
+ if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)
+ return MAX_SCHEDULE_TIMEOUT - 1;
+
+ return timeout_jiffies + 1;
+}
+
+static int drm_syncobj_wait_fences(struct drm_device *dev,
+ struct drm_file *file_private,
+ struct drm_syncobj_wait *wait,
+ struct dma_fence **fences)
+{
+ unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec, wait->timeout_nsec);
+ int ret = 0;
+ uint32_t first = ~0;
+
+ if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
+ int i;
+ for (i = 0; i < wait->count_handles; i++) {
+ ret = dma_fence_wait_timeout(fences[i], true, timeout);
+
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ break;
+ timeout = ret;
+ }
+ first = 0;
+ } else {
+ ret = dma_fence_wait_any_timeout(fences,
+ wait->count_handles,
+ true, timeout,
+ &first);
+ }
+
+ if (ret < 0)
+ return ret;
+
+ wait->first_signaled = first;
+ if (ret == 0)
+ return -ETIME;
+ return 0;
+}
+
+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_private)
+{
+ struct drm_syncobj_wait *args = data;
+ uint32_t *handles;
+ struct dma_fence **fences;
+ int ret = 0;
+ int i;
+
+ if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+ return -ENODEV;
+
+ if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+ return -EINVAL;
+
+ if (args->count_handles == 0)
+ return -EINVAL;
+
+ /* Get the handles from userspace */
+ handles = kmalloc_array(args->count_handles, sizeof(uint32_t),
+ GFP_KERNEL);
+ if (handles == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(handles,
+ u64_to_user_ptr(args->handles),
+ sizeof(uint32_t) * args->count_handles)) {
+ ret = -EFAULT;
+ goto err_free_handles;
+ }
+
+ fences = kcalloc(args->count_handles,
+ sizeof(struct dma_fence *), GFP_KERNEL);
+ if (!fences) {
+ ret = -ENOMEM;
+ goto err_free_handles;
+ }
+
+ for (i = 0; i < args->count_handles; i++) {
+ ret = drm_syncobj_fence_get(file_private, handles[i],
+ &fences[i]);
+ if (ret)
+ goto err_free_fence_array;
+ }
+
+ ret = drm_syncobj_wait_fences(dev, file_private,
+ args, fences);
+
+err_free_fence_array:
+ for (i = 0; i < args->count_handles; i++)
+ dma_fence_put(fences[i]);
+ kfree(fences);
+err_free_handles:
+ kfree(handles);
+
+ return ret;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 101593a..91746a7 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -718,6 +718,18 @@ struct drm_syncobj_handle {
__u32 pad;
};
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+struct drm_syncobj_wait {
+ __u64 handles;
+ /* absolute timeout */
+ __s64 timeout_sec;
+ __s64 timeout_nsec;
+ __u32 count_handles;
+ __u32 flags;
+ __u32 first_signaled; /* only valid when not waiting all */
+ __u32 pad;
+};
+
#if defined(__cplusplus)
}
#endif
@@ -840,6 +852,7 @@ extern "C" {
#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy)
#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle)
#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait)
/**
* Device specific ioctls should only be in their respective headers
--
2.9.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 19+ messages in thread[parent not found: <20170706010409.9373-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <20170706010409.9373-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> @ 2017-07-10 15:28 ` Jason Ekstrand 2017-07-10 15:45 ` Christian König 0 siblings, 1 reply; 19+ messages in thread From: Jason Ekstrand @ 2017-07-10 15:28 UTC (permalink / raw) To: Dave Airlie; +Cc: amd-gfx mailing list, Maling list - DRI developers [-- Attachment #1.1: Type: text/plain, Size: 10346 bytes --] On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote: > From: Dave Airlie <airlied-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> > > This interface will allow sync object to be used to back > Vulkan fences. This API is pretty much the vulkan fence waiting > API, and I've ported the code from amdgpu. > > v2: accept relative timeout, pass remaining time back > to userspace. > v3: return to absolute timeouts. > v4: absolute zero = poll, > rewrite any/all code to have same operation for arrays > return -EINVAL for 0 fences. > v4.1: fixup fences allocation check, use u64_to_user_ptr > v5: move to sec/nsec, and use timespec64 for calcs. > v6: use -ETIME and drop the out status flag. (-ETIME > is suggested by ickle, I can feel a shed painting) > > Signed-off-by: Dave Airlie <airlied-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> > --- > drivers/gpu/drm/drm_internal.h | 2 + > drivers/gpu/drm/drm_ioctl.c | 2 + > drivers/gpu/drm/drm_syncobj.c | 142 ++++++++++++++++++++++++++++++ > +++++++++++ > include/uapi/drm/drm.h | 13 ++++ > 4 files changed, 159 insertions(+) > > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_ > internal.h > index 5cecc97..d71b50d 100644 > --- a/drivers/gpu/drm/drm_internal.h > +++ b/drivers/gpu/drm/drm_internal.h > @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device > *dev, void *data, > struct drm_file *file_private); > int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private); > diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c > index f1e5681..385ce74 100644 > --- a/drivers/gpu/drm/drm_ioctl.c > +++ b/drivers/gpu/drm/drm_ioctl.c > @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, > drm_syncobj_fd_to_handle_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, > + DRM_UNLOCKED|DRM_RENDER_ALLOW), > }; > > #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 89441bc..2d5a7a1 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -1,5 +1,7 @@ > /* > * Copyright 2017 Red Hat > + * Parts ported from amdgpu (fence wait code). > + * Copyright 2016 Advanced Micro Devices, Inc. > * > * Permission is hereby granted, free of charge, to any person obtaining a > * copy of this software and associated documentation files (the > "Software"), > @@ -31,6 +33,9 @@ > * that contain an optional fence. The fence can be updated with a new > * fence, or be NULL. > * > + * syncobj's can be waited upon, where it will wait for the underlying > + * fence. > + * > * syncobj's can be export to fd's and back, these fd's are opaque and > * have no other use case, except passing the syncobj between processes. > * > @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device > *dev, void *data, > return drm_syncobj_fd_to_handle(file_private, args->fd, > &args->handle); > } > + > +/** > + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute > value > + * > + * @timeout_sec: timeout sec component, 0 for poll > + * @timeout_nsec: timeout nsec component in ns, 0 for poll > + * both must be 0 for poll. > + * > + * Calculate the timeout in jiffies from an absolute time in sec/nsec. > + */ > +static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec, > uint64_t timeout_nsec) > +{ > + struct timespec64 abs_timeout, timeout, max_jiffy_timespec; > + unsigned long timeout_jiffies; > + > + /* make 0 timeout means poll - absolute 0 doesn't seem valid */ > + if (timeout_sec == 0 && timeout_nsec == 0) > + return 0; > + > + abs_timeout.tv_sec = timeout_sec; > + abs_timeout.tv_nsec = timeout_nsec; > + > + /* clamp timeout if it's to large */ > + if (!timespec64_valid_strict(&abs_timeout)) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + timeout = timespec64_sub(abs_timeout, > ktime_to_timespec64(ktime_get())); > + if (!timespec64_valid(&timeout)) > + return 0; > + > + jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec); > + if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + timeout_jiffies = timespec64_to_jiffies(&timeout); > + /* clamp timeout to avoid infinite timeout */ > + if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + return timeout_jiffies + 1; > +} > + > +static int drm_syncobj_wait_fences(struct drm_device *dev, > + struct drm_file *file_private, > + struct drm_syncobj_wait *wait, > + struct dma_fence **fences) > +{ > + unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec, > wait->timeout_nsec); > + int ret = 0; > + uint32_t first = ~0; > + > + if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { > + int i; > + for (i = 0; i < wait->count_handles; i++) { > + ret = dma_fence_wait_timeout(fences[i], true, > timeout); > + > + if (ret < 0) > + return ret; > + if (ret == 0) > + break; > + timeout = ret; > + } > + first = 0; > + } else { > + ret = dma_fence_wait_any_timeout(fences, > + wait->count_handles, > + true, timeout, > + &first); > + } > + > + if (ret < 0) > + return ret; > + > + wait->first_signaled = first; > + if (ret == 0) > + return -ETIME; > + return 0; > +} > + > +int > +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_wait *args = data; > + uint32_t *handles; > + struct dma_fence **fences; > + int ret = 0; > + int i; > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -ENODEV; > + > + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ > ALL) > + return -EINVAL; > + > + if (args->count_handles == 0) > + return -EINVAL; > + > + /* Get the handles from userspace */ > + handles = kmalloc_array(args->count_handles, sizeof(uint32_t), > + GFP_KERNEL); > + if (handles == NULL) > + return -ENOMEM; > + > + if (copy_from_user(handles, > + u64_to_user_ptr(args->handles), > + sizeof(uint32_t) * args->count_handles)) { > + ret = -EFAULT; > + goto err_free_handles; > + } > + > + fences = kcalloc(args->count_handles, > + sizeof(struct dma_fence *), GFP_KERNEL); > + if (!fences) { > + ret = -ENOMEM; > + goto err_free_handles; > + } > + > + for (i = 0; i < args->count_handles; i++) { > + ret = drm_syncobj_fence_get(file_private, handles[i], > + &fences[i]); > + if (ret) > + goto err_free_fence_array; > + } > + > + ret = drm_syncobj_wait_fences(dev, file_private, > + args, fences); > So, reading some CTS tests again, and I think we have a problem here. The Vulkan spec allows you to wait on a fence that is in the unsignaled state. In theory, you could have thread A start waiting on a fence before thread B submits the work which triggers that fence. This means that the dma_fence may not exist yet when vkWaitForFences gets called. If we really want to support the full Vulkan usage, we need to somehow support missing dma_fences by waiting for the dma_fence to show up. Unfortunately, I don't know enough about the internal kernel APIs to know what that would look like. > + > +err_free_fence_array: > + for (i = 0; i < args->count_handles; i++) > + dma_fence_put(fences[i]); > + kfree(fences); > +err_free_handles: > + kfree(handles); > + > + return ret; > +} > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 101593a..91746a7 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -718,6 +718,18 @@ struct drm_syncobj_handle { > __u32 pad; > }; > > +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) > +struct drm_syncobj_wait { > + __u64 handles; > + /* absolute timeout */ > + __s64 timeout_sec; > + __s64 timeout_nsec; > + __u32 count_handles; > + __u32 flags; > + __u32 first_signaled; /* only valid when not waiting all */ > + __u32 pad; > +}; > + > #if defined(__cplusplus) > } > #endif > @@ -840,6 +852,7 @@ extern "C" { > #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct > drm_syncobj_destroy) > #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct > drm_syncobj_handle) > #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct > drm_syncobj_handle) > +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct > drm_syncobj_wait) > > /** > * Device specific ioctls should only be in their respective headers > -- > 2.9.4 > > _______________________________________________ > dri-devel mailing list > dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel > [-- Attachment #1.2: Type: text/html, Size: 13176 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) 2017-07-10 15:28 ` Jason Ekstrand @ 2017-07-10 15:45 ` Christian König 2017-07-10 15:52 ` Jason Ekstrand 0 siblings, 1 reply; 19+ messages in thread From: Christian König @ 2017-07-10 15:45 UTC (permalink / raw) To: Jason Ekstrand, Dave Airlie Cc: Maling list - DRI developers, amd-gfx mailing list [-- Attachment #1.1: Type: text/plain, Size: 11758 bytes --] Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: > On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com > <mailto:airlied@gmail.com>> wrote: > > From: Dave Airlie <airlied@redhat.com <mailto:airlied@redhat.com>> > > This interface will allow sync object to be used to back > Vulkan fences. This API is pretty much the vulkan fence waiting > API, and I've ported the code from amdgpu. > > v2: accept relative timeout, pass remaining time back > to userspace. > v3: return to absolute timeouts. > v4: absolute zero = poll, > rewrite any/all code to have same operation for arrays > return -EINVAL for 0 fences. > v4.1: fixup fences allocation check, use u64_to_user_ptr > v5: move to sec/nsec, and use timespec64 for calcs. > v6: use -ETIME and drop the out status flag. (-ETIME > is suggested by ickle, I can feel a shed painting) > > Signed-off-by: Dave Airlie <airlied@redhat.com > <mailto:airlied@redhat.com>> > --- > drivers/gpu/drm/drm_internal.h | 2 + > drivers/gpu/drm/drm_ioctl.c | 2 + > drivers/gpu/drm/drm_syncobj.c | 142 > +++++++++++++++++++++++++++++++++++++++++ > include/uapi/drm/drm.h | 13 ++++ > 4 files changed, 159 insertions(+) > > diff --git a/drivers/gpu/drm/drm_internal.h > b/drivers/gpu/drm/drm_internal.h > index 5cecc97..d71b50d 100644 > --- a/drivers/gpu/drm/drm_internal.h > +++ b/drivers/gpu/drm/drm_internal.h > @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct > drm_device *dev, void *data, > struct drm_file *file_private); > int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void > *data, > struct drm_file *file_private); > +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private); > diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c > index f1e5681..385ce74 100644 > --- a/drivers/gpu/drm/drm_ioctl.c > +++ b/drivers/gpu/drm/drm_ioctl.c > @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc > drm_ioctls[] = { > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, > drm_syncobj_fd_to_handle_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, > + DRM_UNLOCKED|DRM_RENDER_ALLOW), > }; > > #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) > diff --git a/drivers/gpu/drm/drm_syncobj.c > b/drivers/gpu/drm/drm_syncobj.c > index 89441bc..2d5a7a1 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -1,5 +1,7 @@ > /* > * Copyright 2017 Red Hat > + * Parts ported from amdgpu (fence wait code). > + * Copyright 2016 Advanced Micro Devices, Inc. > * > * Permission is hereby granted, free of charge, to any person > obtaining a > * copy of this software and associated documentation files (the > "Software"), > @@ -31,6 +33,9 @@ > * that contain an optional fence. The fence can be updated with > a new > * fence, or be NULL. > * > + * syncobj's can be waited upon, where it will wait for the > underlying > + * fence. > + * > * syncobj's can be export to fd's and back, these fd's are > opaque and > * have no other use case, except passing the syncobj between > processes. > * > @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct > drm_device *dev, void *data, > return drm_syncobj_fd_to_handle(file_private, args->fd, > &args->handle); > } > + > +/** > + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from > absolute value > + * > + * @timeout_sec: timeout sec component, 0 for poll > + * @timeout_nsec: timeout nsec component in ns, 0 for poll > + * both must be 0 for poll. > + * > + * Calculate the timeout in jiffies from an absolute time in > sec/nsec. > + */ > +static unsigned long drm_timeout_abs_to_jiffies(int64_t > timeout_sec, uint64_t timeout_nsec) > +{ > + struct timespec64 abs_timeout, timeout, max_jiffy_timespec; > + unsigned long timeout_jiffies; > + > + /* make 0 timeout means poll - absolute 0 doesn't seem > valid */ > + if (timeout_sec == 0 && timeout_nsec == 0) > + return 0; > + > + abs_timeout.tv_sec = timeout_sec; > + abs_timeout.tv_nsec = timeout_nsec; > + > + /* clamp timeout if it's to large */ > + if (!timespec64_valid_strict(&abs_timeout)) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + timeout = timespec64_sub(abs_timeout, > ktime_to_timespec64(ktime_get())); > + if (!timespec64_valid(&timeout)) > + return 0; > + > + jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec); > + if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + timeout_jiffies = timespec64_to_jiffies(&timeout); > + /* clamp timeout to avoid infinite timeout */ > + if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + return timeout_jiffies + 1; > +} > + > +static int drm_syncobj_wait_fences(struct drm_device *dev, > + struct drm_file *file_private, > + struct drm_syncobj_wait *wait, > + struct dma_fence **fences) > +{ > + unsigned long timeout = > drm_timeout_abs_to_jiffies(wait->timeout_sec, wait->timeout_nsec); > + int ret = 0; > + uint32_t first = ~0; > + > + if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { > + int i; > + for (i = 0; i < wait->count_handles; i++) { > + ret = dma_fence_wait_timeout(fences[i], > true, timeout); > + > + if (ret < 0) > + return ret; > + if (ret == 0) > + break; > + timeout = ret; > + } > + first = 0; > + } else { > + ret = dma_fence_wait_any_timeout(fences, > + wait->count_handles, > + true, timeout, > + &first); > + } > + > + if (ret < 0) > + return ret; > + > + wait->first_signaled = first; > + if (ret == 0) > + return -ETIME; > + return 0; > +} > + > +int > +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_wait *args = data; > + uint32_t *handles; > + struct dma_fence **fences; > + int ret = 0; > + int i; > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -ENODEV; > + > + if (args->flags != 0 && args->flags != > DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) > + return -EINVAL; > + > + if (args->count_handles == 0) > + return -EINVAL; > + > + /* Get the handles from userspace */ > + handles = kmalloc_array(args->count_handles, sizeof(uint32_t), > + GFP_KERNEL); > + if (handles == NULL) > + return -ENOMEM; > + > + if (copy_from_user(handles, > + u64_to_user_ptr(args->handles), > + sizeof(uint32_t) * args->count_handles)) { > + ret = -EFAULT; > + goto err_free_handles; > + } > + > + fences = kcalloc(args->count_handles, > + sizeof(struct dma_fence *), GFP_KERNEL); > + if (!fences) { > + ret = -ENOMEM; > + goto err_free_handles; > + } > + > + for (i = 0; i < args->count_handles; i++) { > + ret = drm_syncobj_fence_get(file_private, handles[i], > + &fences[i]); > + if (ret) > + goto err_free_fence_array; > + } > + > + ret = drm_syncobj_wait_fences(dev, file_private, > + args, fences); > > > So, reading some CTS tests again, and I think we have a problem here. > The Vulkan spec allows you to wait on a fence that is in the > unsignaled state. At least on the closed source driver that would be illegal as far as I know. You can't wait on a semaphore before the signal operation is send down to the kernel. Regards, Christian. > In theory, you could have thread A start waiting on a fence before > thread B submits the work which triggers that fence. This means that > the dma_fence may not exist yet when vkWaitForFences gets called. If > we really want to support the full Vulkan usage, we need to somehow > support missing dma_fences by waiting for the dma_fence to show up. > Unfortunately, I don't know enough about the internal kernel APIs to > know what that would look like. > > + > +err_free_fence_array: > + for (i = 0; i < args->count_handles; i++) > + dma_fence_put(fences[i]); > + kfree(fences); > +err_free_handles: > + kfree(handles); > + > + return ret; > +} > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 101593a..91746a7 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -718,6 +718,18 @@ struct drm_syncobj_handle { > __u32 pad; > }; > > +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) > +struct drm_syncobj_wait { > + __u64 handles; > + /* absolute timeout */ > + __s64 timeout_sec; > + __s64 timeout_nsec; > + __u32 count_handles; > + __u32 flags; > + __u32 first_signaled; /* only valid when not waiting all */ > + __u32 pad; > +}; > + > #if defined(__cplusplus) > } > #endif > @@ -840,6 +852,7 @@ extern "C" { > #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct > drm_syncobj_destroy) > #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct > drm_syncobj_handle) > #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct > drm_syncobj_handle) > +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct > drm_syncobj_wait) > > /** > * Device specific ioctls should only be in their respective headers > -- > 2.9.4 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > <mailto:dri-devel@lists.freedesktop.org> > https://lists.freedesktop.org/mailman/listinfo/dri-devel > <https://lists.freedesktop.org/mailman/listinfo/dri-devel> > > > > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx [-- Attachment #1.2: Type: text/html, Size: 19285 bytes --] [-- Attachment #2: Type: text/plain, Size: 160 bytes --] _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) 2017-07-10 15:45 ` Christian König @ 2017-07-10 15:52 ` Jason Ekstrand [not found] ` <CAOFGe96gaX2OjsXr8bNBUiqwyKAQfMSycH7PXgYAt_BBJ1UHkQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 19+ messages in thread From: Jason Ekstrand @ 2017-07-10 15:52 UTC (permalink / raw) To: Christian König; +Cc: Maling list - DRI developers, amd-gfx mailing list [-- Attachment #1.1: Type: text/plain, Size: 11877 bytes --] On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: > > On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com> wrote: > >> From: Dave Airlie <airlied@redhat.com> >> >> This interface will allow sync object to be used to back >> Vulkan fences. This API is pretty much the vulkan fence waiting >> API, and I've ported the code from amdgpu. >> >> v2: accept relative timeout, pass remaining time back >> to userspace. >> v3: return to absolute timeouts. >> v4: absolute zero = poll, >> rewrite any/all code to have same operation for arrays >> return -EINVAL for 0 fences. >> v4.1: fixup fences allocation check, use u64_to_user_ptr >> v5: move to sec/nsec, and use timespec64 for calcs. >> v6: use -ETIME and drop the out status flag. (-ETIME >> is suggested by ickle, I can feel a shed painting) >> >> Signed-off-by: Dave Airlie <airlied@redhat.com> >> --- >> drivers/gpu/drm/drm_internal.h | 2 + >> drivers/gpu/drm/drm_ioctl.c | 2 + >> drivers/gpu/drm/drm_syncobj.c | 142 ++++++++++++++++++++++++++++++ >> +++++++++++ >> include/uapi/drm/drm.h | 13 ++++ >> 4 files changed, 159 insertions(+) >> >> diff --git a/drivers/gpu/drm/drm_internal.h >> b/drivers/gpu/drm/drm_internal.h >> index 5cecc97..d71b50d 100644 >> --- a/drivers/gpu/drm/drm_internal.h >> +++ b/drivers/gpu/drm/drm_internal.h >> @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device >> *dev, void *data, >> struct drm_file *file_private); >> int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private); >> +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *file_private); >> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c >> index f1e5681..385ce74 100644 >> --- a/drivers/gpu/drm/drm_ioctl.c >> +++ b/drivers/gpu/drm/drm_ioctl.c >> @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, >> drm_syncobj_fd_to_handle_ioctl, >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, >> + DRM_UNLOCKED|DRM_RENDER_ALLOW), >> }; >> >> #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj. >> c >> index 89441bc..2d5a7a1 100644 >> --- a/drivers/gpu/drm/drm_syncobj.c >> +++ b/drivers/gpu/drm/drm_syncobj.c >> @@ -1,5 +1,7 @@ >> /* >> * Copyright 2017 Red Hat >> + * Parts ported from amdgpu (fence wait code). >> + * Copyright 2016 Advanced Micro Devices, Inc. >> * >> * Permission is hereby granted, free of charge, to any person obtaining >> a >> * copy of this software and associated documentation files (the >> "Software"), >> @@ -31,6 +33,9 @@ >> * that contain an optional fence. The fence can be updated with a new >> * fence, or be NULL. >> * >> + * syncobj's can be waited upon, where it will wait for the underlying >> + * fence. >> + * >> * syncobj's can be export to fd's and back, these fd's are opaque and >> * have no other use case, except passing the syncobj between processes. >> * >> @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device >> *dev, void *data, >> return drm_syncobj_fd_to_handle(file_private, args->fd, >> &args->handle); >> } >> + >> +/** >> + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute >> value >> + * >> + * @timeout_sec: timeout sec component, 0 for poll >> + * @timeout_nsec: timeout nsec component in ns, 0 for poll >> + * both must be 0 for poll. >> + * >> + * Calculate the timeout in jiffies from an absolute time in sec/nsec. >> + */ >> +static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec, >> uint64_t timeout_nsec) >> +{ >> + struct timespec64 abs_timeout, timeout, max_jiffy_timespec; >> + unsigned long timeout_jiffies; >> + >> + /* make 0 timeout means poll - absolute 0 doesn't seem valid */ >> + if (timeout_sec == 0 && timeout_nsec == 0) >> + return 0; >> + >> + abs_timeout.tv_sec = timeout_sec; >> + abs_timeout.tv_nsec = timeout_nsec; >> + >> + /* clamp timeout if it's to large */ >> + if (!timespec64_valid_strict(&abs_timeout)) >> + return MAX_SCHEDULE_TIMEOUT - 1; >> + >> + timeout = timespec64_sub(abs_timeout, >> ktime_to_timespec64(ktime_get())); >> + if (!timespec64_valid(&timeout)) >> + return 0; >> + >> + jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec); >> + if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0) >> + return MAX_SCHEDULE_TIMEOUT - 1; >> + >> + timeout_jiffies = timespec64_to_jiffies(&timeout); >> + /* clamp timeout to avoid infinite timeout */ >> + if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT) >> + return MAX_SCHEDULE_TIMEOUT - 1; >> + >> + return timeout_jiffies + 1; >> +} >> + >> +static int drm_syncobj_wait_fences(struct drm_device *dev, >> + struct drm_file *file_private, >> + struct drm_syncobj_wait *wait, >> + struct dma_fence **fences) >> +{ >> + unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec, >> wait->timeout_nsec); >> + int ret = 0; >> + uint32_t first = ~0; >> + >> + if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { >> + int i; >> + for (i = 0; i < wait->count_handles; i++) { >> + ret = dma_fence_wait_timeout(fences[i], true, >> timeout); >> + >> + if (ret < 0) >> + return ret; >> + if (ret == 0) >> + break; >> + timeout = ret; >> + } >> + first = 0; >> + } else { >> + ret = dma_fence_wait_any_timeout(fences, >> + wait->count_handles, >> + true, timeout, >> + &first); >> + } >> + >> + if (ret < 0) >> + return ret; >> + >> + wait->first_signaled = first; >> + if (ret == 0) >> + return -ETIME; >> + return 0; >> +} >> + >> +int >> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *file_private) >> +{ >> + struct drm_syncobj_wait *args = data; >> + uint32_t *handles; >> + struct dma_fence **fences; >> + int ret = 0; >> + int i; >> + >> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >> + return -ENODEV; >> + >> + if (args->flags != 0 && args->flags != >> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) >> + return -EINVAL; >> + >> + if (args->count_handles == 0) >> + return -EINVAL; >> + >> + /* Get the handles from userspace */ >> + handles = kmalloc_array(args->count_handles, sizeof(uint32_t), >> + GFP_KERNEL); >> + if (handles == NULL) >> + return -ENOMEM; >> + >> + if (copy_from_user(handles, >> + u64_to_user_ptr(args->handles), >> + sizeof(uint32_t) * args->count_handles)) { >> + ret = -EFAULT; >> + goto err_free_handles; >> + } >> + >> + fences = kcalloc(args->count_handles, >> + sizeof(struct dma_fence *), GFP_KERNEL); >> + if (!fences) { >> + ret = -ENOMEM; >> + goto err_free_handles; >> + } >> + >> + for (i = 0; i < args->count_handles; i++) { >> + ret = drm_syncobj_fence_get(file_private, handles[i], >> + &fences[i]); >> + if (ret) >> + goto err_free_fence_array; >> + } >> + >> + ret = drm_syncobj_wait_fences(dev, file_private, >> + args, fences); >> > > So, reading some CTS tests again, and I think we have a problem here. The > Vulkan spec allows you to wait on a fence that is in the unsignaled state. > > > At least on the closed source driver that would be illegal as far as I > know. > Then they are doing workarounds in userspace. There are definitely CTS tests for this: https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 > You can't wait on a semaphore before the signal operation is send down to > the kerel. > We (Intel) deal with this today by tracking whether or not the fence has been submitted and using a condition variable in userspace to sort it all out. If we ever want to share fences across processes (which we do), then this needs to be sorted in the kernel. --Jason > Regards, > Christian. > > > In theory, you could have thread A start waiting on a fence before > thread B submits the work which triggers that fence. This means that the > dma_fence may not exist yet when vkWaitForFences gets called. If we really > want to support the full Vulkan usage, we need to somehow support missing > dma_fences by waiting for the dma_fence to show up. Unfortunately, I don't > know enough about the internal kernel APIs to know what that would look > like. > > > >> + >> +err_free_fence_array: >> + for (i = 0; i < args->count_handles; i++) >> + dma_fence_put(fences[i]); >> + kfree(fences); >> +err_free_handles: >> + kfree(handles); >> + >> + return ret; >> +} >> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >> index 101593a..91746a7 100644 >> --- a/include/uapi/drm/drm.h >> +++ b/include/uapi/drm/drm.h >> @@ -718,6 +718,18 @@ struct drm_syncobj_handle { >> __u32 pad; >> }; >> >> +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) >> +struct drm_syncobj_wait { >> + __u64 handles; >> + /* absolute timeout */ >> + __s64 timeout_sec; >> + __s64 timeout_nsec; >> + __u32 count_handles; >> + __u32 flags; >> + __u32 first_signaled; /* only valid when not waiting all */ >> + __u32 pad; >> +}; >> + >> #if defined(__cplusplus) >> } >> #endif >> @@ -840,6 +852,7 @@ extern "C" { >> #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct >> drm_syncobj_destroy) >> #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct >> drm_syncobj_handle) >> #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct >> drm_syncobj_handle) >> +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct >> drm_syncobj_wait) >> >> /** >> * Device specific ioctls should only be in their respective headers >> -- >> 2.9.4 >> >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel >> > > > > _______________________________________________ > amd-gfx mailing listamd-gfx@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/amd-gfx > > > [-- Attachment #1.2: Type: text/html, Size: 20948 bytes --] [-- Attachment #2: Type: text/plain, Size: 160 bytes --] _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <CAOFGe96gaX2OjsXr8bNBUiqwyKAQfMSycH7PXgYAt_BBJ1UHkQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* RE: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <CAOFGe96gaX2OjsXr8bNBUiqwyKAQfMSycH7PXgYAt_BBJ1UHkQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2017-07-10 15:58 ` Xie, AlexBin [not found] ` <DM5PR12MB12574F31128759A47DA1C7B7F2A90-2J9CzHegvk/NHlLGalgXawdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org> 2017-07-10 16:15 ` Christian König 1 sibling, 1 reply; 19+ messages in thread From: Xie, AlexBin @ 2017-07-10 15:58 UTC (permalink / raw) To: Jason Ekstrand, Christian König Cc: Dave Airlie, amd-gfx mailing list, Maling list - DRI developers [-- Attachment #1.1: Type: text/plain, Size: 11783 bytes --] I understand this discussion from closes source driver terminology. If a process is killed before it sends out the signaling command, will some part of the GPU be in a waiting situation forever? Alex Bin Xie From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Jason Ekstrand Sent: Monday, July 10, 2017 11:53 AM To: Christian König <deathsimple@vodafone.de> Cc: Dave Airlie <airlied@gmail.com>; Maling list - DRI developers <dri-devel@lists.freedesktop.org>; amd-gfx mailing list <amd-gfx@lists.freedesktop.org> Subject: Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de<mailto:deathsimple@vodafone.de>> wrote: Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com<mailto:airlied@gmail.com>> wrote: From: Dave Airlie <airlied@redhat.com<mailto:airlied@redhat.com>> This interface will allow sync object to be used to back Vulkan fences. This API is pretty much the vulkan fence waiting API, and I've ported the code from amdgpu. v2: accept relative timeout, pass remaining time back to userspace. v3: return to absolute timeouts. v4: absolute zero = poll, rewrite any/all code to have same operation for arrays return -EINVAL for 0 fences. v4.1: fixup fences allocation check, use u64_to_user_ptr v5: move to sec/nsec, and use timespec64 for calcs. v6: use -ETIME and drop the out status flag. (-ETIME is suggested by ickle, I can feel a shed painting) Signed-off-by: Dave Airlie <airlied@redhat.com<mailto:airlied@redhat.com>> --- drivers/gpu/drm/drm_internal.h | 2 + drivers/gpu/drm/drm_ioctl.c | 2 + drivers/gpu/drm/drm_syncobj.c | 142 +++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 13 ++++ 4 files changed, 159 insertions(+) diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 5cecc97..d71b50d 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index f1e5681..385ce74 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 89441bc..2d5a7a1 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1,5 +1,7 @@ /* * Copyright 2017 Red Hat + * Parts ported from amdgpu (fence wait code). + * Copyright 2016 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +33,9 @@ * that contain an optional fence. The fence can be updated with a new * fence, or be NULL. * + * syncobj's can be waited upon, where it will wait for the underlying + * fence. + * * syncobj's can be export to fd's and back, these fd's are opaque and * have no other use case, except passing the syncobj between processes. * @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, return drm_syncobj_fd_to_handle(file_private, args->fd, &args->handle); } + +/** + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value + * + * @timeout_sec: timeout sec component, 0 for poll + * @timeout_nsec: timeout nsec component in ns, 0 for poll + * both must be 0 for poll. + * + * Calculate the timeout in jiffies from an absolute time in sec/nsec. + */ +static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec, uint64_t timeout_nsec) +{ + struct timespec64 abs_timeout, timeout, max_jiffy_timespec; + unsigned long timeout_jiffies; + + /* make 0 timeout means poll - absolute 0 doesn't seem valid */ + if (timeout_sec == 0 && timeout_nsec == 0) + return 0; + + abs_timeout.tv_sec = timeout_sec; + abs_timeout.tv_nsec = timeout_nsec; + + /* clamp timeout if it's to large */ + if (!timespec64_valid_strict(&abs_timeout)) + return MAX_SCHEDULE_TIMEOUT - 1; + + timeout = timespec64_sub(abs_timeout, ktime_to_timespec64(ktime_get())); + if (!timespec64_valid(&timeout)) + return 0; + + jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec); + if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0) + return MAX_SCHEDULE_TIMEOUT - 1; + + timeout_jiffies = timespec64_to_jiffies(&timeout); + /* clamp timeout to avoid infinite timeout */ + if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT) + return MAX_SCHEDULE_TIMEOUT - 1; + + return timeout_jiffies + 1; +} + +static int drm_syncobj_wait_fences(struct drm_device *dev, + struct drm_file *file_private, + struct drm_syncobj_wait *wait, + struct dma_fence **fences) +{ + unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec, wait->timeout_nsec); + int ret = 0; + uint32_t first = ~0; + + if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { + int i; + for (i = 0; i < wait->count_handles; i++) { + ret = dma_fence_wait_timeout(fences[i], true, timeout); + + if (ret < 0) + return ret; + if (ret == 0) + break; + timeout = ret; + } + first = 0; + } else { + ret = dma_fence_wait_any_timeout(fences, + wait->count_handles, + true, timeout, + &first); + } + + if (ret < 0) + return ret; + + wait->first_signaled = first; + if (ret == 0) + return -ETIME; + return 0; +} + +int +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_wait *args = data; + uint32_t *handles; + struct dma_fence **fences; + int ret = 0; + int i; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + /* Get the handles from userspace */ + handles = kmalloc_array(args->count_handles, sizeof(uint32_t), + GFP_KERNEL); + if (handles == NULL) + return -ENOMEM; + + if (copy_from_user(handles, + u64_to_user_ptr(args->handles), + sizeof(uint32_t) * args->count_handles)) { + ret = -EFAULT; + goto err_free_handles; + } + + fences = kcalloc(args->count_handles, + sizeof(struct dma_fence *), GFP_KERNEL); + if (!fences) { + ret = -ENOMEM; + goto err_free_handles; + } + + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_fence_get(file_private, handles[i], + &fences[i]); + if (ret) + goto err_free_fence_array; + } + + ret = drm_syncobj_wait_fences(dev, file_private, + args, fences); So, reading some CTS tests again, and I think we have a problem here. The Vulkan spec allows you to wait on a fence that is in the unsignaled state. At least on the closed source driver that would be illegal as far as I know. Then they are doing workarounds in userspace. There are definitely CTS tests for this: https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 You can't wait on a semaphore before the signal operation is send down to the kerel. We (Intel) deal with this today by tracking whether or not the fence has been submitted and using a condition variable in userspace to sort it all out. If we ever want to share fences across processes (which we do), then this needs to be sorted in the kernel. --Jason Regards, Christian. In theory, you could have thread A start waiting on a fence before thread B submits the work which triggers that fence. This means that the dma_fence may not exist yet when vkWaitForFences gets called. If we really want to support the full Vulkan usage, we need to somehow support missing dma_fences by waiting for the dma_fence to show up. Unfortunately, I don't know enough about the internal kernel APIs to know what that would look like. + +err_free_fence_array: + for (i = 0; i < args->count_handles; i++) + dma_fence_put(fences[i]); + kfree(fences); +err_free_handles: + kfree(handles); + + return ret; +} diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 101593a..91746a7 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -718,6 +718,18 @@ struct drm_syncobj_handle { __u32 pad; }; +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_sec; + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -840,6 +852,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) /** * Device specific ioctls should only be in their respective headers -- 2.9.4 _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org> https://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> https://lists.freedesktop.org/mailman/listinfo/amd-gfx [-- Attachment #1.2: Type: text/html, Size: 25387 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 19+ messages in thread
[parent not found: <DM5PR12MB12574F31128759A47DA1C7B7F2A90-2J9CzHegvk/NHlLGalgXawdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <DM5PR12MB12574F31128759A47DA1C7B7F2A90-2J9CzHegvk/NHlLGalgXawdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org> @ 2017-07-10 16:13 ` Christian König [not found] ` <60c420a9-eab6-a9e0-0306-893053d82a5d-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 0 siblings, 1 reply; 19+ messages in thread From: Christian König @ 2017-07-10 16:13 UTC (permalink / raw) To: Xie, AlexBin, Jason Ekstrand Cc: Dave Airlie, amd-gfx mailing list, Maling list - DRI developers [-- Attachment #1.1: Type: text/plain, Size: 15895 bytes --] Am 10.07.2017 um 17:58 schrieb Xie, AlexBin: > > I understand this discussion from closes source driver terminology. > > If a process is killed before it sends out the signaling command, will > some part of the GPU be in a waiting situation forever? > Yes, exactly that's the problem here and the reason why that even Microsoft forbids that under windows. Christian. > Alex Bin Xie > > *From:*amd-gfx [mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org] *On > Behalf Of *Jason Ekstrand > *Sent:* Monday, July 10, 2017 11:53 AM > *To:* Christian König <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> > *Cc:* Dave Airlie <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>; Maling list - DRI developers > <dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; amd-gfx mailing list > <amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> > *Subject:* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) > > On Mon, Jul 10, 2017 at 8:45 AM, Christian König > <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org <mailto:deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>> wrote: > > Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: > > On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org > <mailto:airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>> wrote: > > From: Dave Airlie <airlied-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org > <mailto:airlied-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>> > > This interface will allow sync object to be used to back > Vulkan fences. This API is pretty much the vulkan fence > waiting > API, and I've ported the code from amdgpu. > > v2: accept relative timeout, pass remaining time back > to userspace. > v3: return to absolute timeouts. > v4: absolute zero = poll, > rewrite any/all code to have same operation for arrays > return -EINVAL for 0 fences. > v4.1: fixup fences allocation check, use u64_to_user_ptr > v5: move to sec/nsec, and use timespec64 for calcs. > v6: use -ETIME and drop the out status flag. (-ETIME > is suggested by ickle, I can feel a shed painting) > > Signed-off-by: Dave Airlie <airlied-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org > <mailto:airlied-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>> > --- > drivers/gpu/drm/drm_internal.h | 2 + > drivers/gpu/drm/drm_ioctl.c | 2 + > drivers/gpu/drm/drm_syncobj.c | 142 > +++++++++++++++++++++++++++++++++++++++++ > include/uapi/drm/drm.h | 13 ++++ > 4 files changed, 159 insertions(+) > > diff --git a/drivers/gpu/drm/drm_internal.h > b/drivers/gpu/drm/drm_internal.h > index 5cecc97..d71b50d 100644 > --- a/drivers/gpu/drm/drm_internal.h > +++ b/drivers/gpu/drm/drm_internal.h > @@ -157,3 +157,5 @@ int > drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, > void *data, > struct drm_file *file_private); > int drm_syncobj_fd_to_handle_ioctl(struct drm_device > *dev, void *data, > struct drm_file *file_private); > +int drm_syncobj_wait_ioctl(struct drm_device *dev, void > *data, > + struct drm_file *file_private); > diff --git a/drivers/gpu/drm/drm_ioctl.c > b/drivers/gpu/drm/drm_ioctl.c > index f1e5681..385ce74 100644 > --- a/drivers/gpu/drm/drm_ioctl.c > +++ b/drivers/gpu/drm/drm_ioctl.c > @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc > drm_ioctls[] = { > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, > drm_syncobj_fd_to_handle_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, > drm_syncobj_wait_ioctl, > + DRM_UNLOCKED|DRM_RENDER_ALLOW), > }; > > #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) > diff --git a/drivers/gpu/drm/drm_syncobj.c > b/drivers/gpu/drm/drm_syncobj.c > index 89441bc..2d5a7a1 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -1,5 +1,7 @@ > /* > * Copyright 2017 Red Hat > + * Parts ported from amdgpu (fence wait code). > + * Copyright 2016 Advanced Micro Devices, Inc. > * > * Permission is hereby granted, free of charge, to any > person obtaining a > * copy of this software and associated documentation > files (the "Software"), > @@ -31,6 +33,9 @@ > * that contain an optional fence. The fence can be > updated with a new > * fence, or be NULL. > * > + * syncobj's can be waited upon, where it will wait for > the underlying > + * fence. > + * > * syncobj's can be export to fd's and back, these fd's > are opaque and > * have no other use case, except passing the syncobj > between processes. > * > @@ -451,3 +456,140 @@ > drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, > void *data, > return drm_syncobj_fd_to_handle(file_private, > args->fd, > &args->handle); > } > + > +/** > + * drm_timeout_abs_to_jiffies - calculate jiffies timeout > from absolute value > + * > + * @timeout_sec: timeout sec component, 0 for poll > + * @timeout_nsec: timeout nsec component in ns, 0 for poll > + * both must be 0 for poll. > + * > + * Calculate the timeout in jiffies from an absolute time > in sec/nsec. > + */ > +static unsigned long drm_timeout_abs_to_jiffies(int64_t > timeout_sec, uint64_t timeout_nsec) > +{ > + struct timespec64 abs_timeout, timeout, > max_jiffy_timespec; > + unsigned long timeout_jiffies; > + > + /* make 0 timeout means poll - absolute 0 doesn't > seem valid */ > + if (timeout_sec == 0 && timeout_nsec == 0) > + return 0; > + > + abs_timeout.tv_sec = timeout_sec; > + abs_timeout.tv_nsec = timeout_nsec; > + > + /* clamp timeout if it's to large */ > + if (!timespec64_valid_strict(&abs_timeout)) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + timeout = timespec64_sub(abs_timeout, > ktime_to_timespec64(ktime_get())); > + if (!timespec64_valid(&timeout)) > + return 0; > + > + jiffies_to_timespec64(MAX_JIFFY_OFFSET, > &max_jiffy_timespec); > + if (timespec64_compare(&timeout, > &max_jiffy_timespec) >= 0) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + timeout_jiffies = timespec64_to_jiffies(&timeout); > + /* clamp timeout to avoid infinite timeout */ > + if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + return timeout_jiffies + 1; > +} > + > +static int drm_syncobj_wait_fences(struct drm_device *dev, > + struct drm_file *file_private, > + struct drm_syncobj_wait *wait, > + struct dma_fence **fences) > +{ > + unsigned long timeout = > drm_timeout_abs_to_jiffies(wait->timeout_sec, > wait->timeout_nsec); > + int ret = 0; > + uint32_t first = ~0; > + > + if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { > + int i; > + for (i = 0; i < wait->count_handles; i++) { > + ret = > dma_fence_wait_timeout(fences[i], true, timeout); > + > + if (ret < 0) > + return ret; > + if (ret == 0) > + break; > + timeout = ret; > + } > + first = 0; > + } else { > + ret = dma_fence_wait_any_timeout(fences, > + wait->count_handles, > + true, timeout, > + &first); > + } > + > + if (ret < 0) > + return ret; > + > + wait->first_signaled = first; > + if (ret == 0) > + return -ETIME; > + return 0; > +} > + > +int > +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_wait *args = data; > + uint32_t *handles; > + struct dma_fence **fences; > + int ret = 0; > + int i; > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -ENODEV; > + > + if (args->flags != 0 && args->flags != > DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) > + return -EINVAL; > + > + if (args->count_handles == 0) > + return -EINVAL; > + > + /* Get the handles from userspace */ > + handles = kmalloc_array(args->count_handles, > sizeof(uint32_t), > + GFP_KERNEL); > + if (handles == NULL) > + return -ENOMEM; > + > + if (copy_from_user(handles, > + u64_to_user_ptr(args->handles), > + sizeof(uint32_t) * args->count_handles)) { > + ret = -EFAULT; > + goto err_free_handles; > + } > + > + fences = kcalloc(args->count_handles, > + sizeof(struct dma_fence *), > GFP_KERNEL); > + if (!fences) { > + ret = -ENOMEM; > + goto err_free_handles; > + } > + > + for (i = 0; i < args->count_handles; i++) { > + ret = drm_syncobj_fence_get(file_private, > handles[i], > + &fences[i]); > + if (ret) > + goto err_free_fence_array; > + } > + > + ret = drm_syncobj_wait_fences(dev, file_private, > + args, fences); > > So, reading some CTS tests again, and I think we have a > problem here. The Vulkan spec allows you to wait on a fence > that is in the unsignaled state. > > At least on the closed source driver that would be illegal as far > as I know. > > Then they are doing workarounds in userspace. There are definitely > CTS tests for this: > > https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 > > You can't wait on a semaphore before the signal operation is send > down to the kerel. > > We (Intel) deal with this today by tracking whether or not the fence > has been submitted and using a condition variable in userspace to sort > it all out. If we ever want to share fences across processes (which > we do), then this needs to be sorted in the kernel. > > --Jason > > Regards, > Christian. > > > > > In theory, you could have thread A start waiting on a fence > before thread B submits the work which triggers that fence. > This means that the dma_fence may not exist yet when > vkWaitForFences gets called. If we really want to support the > full Vulkan usage, we need to somehow support missing > dma_fences by waiting for the dma_fence to show up. > Unfortunately, I don't know enough about the internal kernel > APIs to know what that would look like. > > + > +err_free_fence_array: > + for (i = 0; i < args->count_handles; i++) > + dma_fence_put(fences[i]); > + kfree(fences); > +err_free_handles: > + kfree(handles); > + > + return ret; > +} > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 101593a..91746a7 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -718,6 +718,18 @@ struct drm_syncobj_handle { > __u32 pad; > }; > > +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) > +struct drm_syncobj_wait { > + __u64 handles; > + /* absolute timeout */ > + __s64 timeout_sec; > + __s64 timeout_nsec; > + __u32 count_handles; > + __u32 flags; > + __u32 first_signaled; /* only valid when not > waiting all */ > + __u32 pad; > +}; > + > #if defined(__cplusplus) > } > #endif > @@ -840,6 +852,7 @@ extern "C" { > #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct > drm_syncobj_destroy) > #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, > struct drm_syncobj_handle) > #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, > struct drm_syncobj_handle) > +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct > drm_syncobj_wait) > > /** > * Device specific ioctls should only be in their > respective headers > -- > 2.9.4 > > _______________________________________________ > dri-devel mailing list > dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org > <mailto:dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> > https://lists.freedesktop.org/mailman/listinfo/dri-devel > > _______________________________________________ > > amd-gfx mailing list > > amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org > <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> > > https://lists.freedesktop.org/mailman/listinfo/amd-gfx > [-- Attachment #1.2: Type: text/html, Size: 40041 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <60c420a9-eab6-a9e0-0306-893053d82a5d-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <60c420a9-eab6-a9e0-0306-893053d82a5d-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-07-10 23:15 ` Jason Ekstrand 0 siblings, 0 replies; 19+ messages in thread From: Jason Ekstrand @ 2017-07-10 23:15 UTC (permalink / raw) To: Christian König Cc: Dave Airlie, amd-gfx mailing list, Maling list - DRI developers, Xie, AlexBin [-- Attachment #1.1: Type: text/plain, Size: 13572 bytes --] On Mon, Jul 10, 2017 at 9:13 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 10.07.2017 um 17:58 schrieb Xie, AlexBin: > > I understand this discussion from closes source driver terminology. > > > > If a process is killed before it sends out the signaling command, will > some part of the GPU be in a waiting situation forever? > > > Yes, exactly that's the problem here and the reason why that even > Microsoft forbids that under windows. > To be clear, we are only discussing wait-before-submit for client CPU waits. The GPU will not be waiting, only some userspace process. If that process is a compositor, it should take measures to avoid getting hung up by bad clients. What this will *not* cause is the GPU (or kernel GPU scheduling) to get hung up on waiting for some unsubmitted thing. > Christian. > > > > > Alex Bin Xie > > *From:* amd-gfx [mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org > <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>] *On Behalf Of *Jason Ekstrand > *Sent:* Monday, July 10, 2017 11:53 AM > *To:* Christian König <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> <deathsimple@vodafone.de> > *Cc:* Dave Airlie <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>; Maling list - > DRI developers <dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> > <dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; amd-gfx mailing list > <amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> > *Subject:* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) > > > > On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de> > wrote: > > Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: > > On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote: > > From: Dave Airlie <airlied-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> > > This interface will allow sync object to be used to back > Vulkan fences. This API is pretty much the vulkan fence waiting > API, and I've ported the code from amdgpu. > > v2: accept relative timeout, pass remaining time back > to userspace. > v3: return to absolute timeouts. > v4: absolute zero = poll, > rewrite any/all code to have same operation for arrays > return -EINVAL for 0 fences. > v4.1: fixup fences allocation check, use u64_to_user_ptr > v5: move to sec/nsec, and use timespec64 for calcs. > v6: use -ETIME and drop the out status flag. (-ETIME > is suggested by ickle, I can feel a shed painting) > > Signed-off-by: Dave Airlie <airlied-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> > --- > drivers/gpu/drm/drm_internal.h | 2 + > drivers/gpu/drm/drm_ioctl.c | 2 + > drivers/gpu/drm/drm_syncobj.c | 142 ++++++++++++++++++++++++++++++ > +++++++++++ > include/uapi/drm/drm.h | 13 ++++ > 4 files changed, 159 insertions(+) > > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_ > internal.h > index 5cecc97..d71b50d 100644 > --- a/drivers/gpu/drm/drm_internal.h > +++ b/drivers/gpu/drm/drm_internal.h > @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device > *dev, void *data, > struct drm_file *file_private); > int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private); > diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c > index f1e5681..385ce74 100644 > --- a/drivers/gpu/drm/drm_ioctl.c > +++ b/drivers/gpu/drm/drm_ioctl.c > @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, > drm_syncobj_fd_to_handle_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, > + DRM_UNLOCKED|DRM_RENDER_ALLOW), > }; > > #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 89441bc..2d5a7a1 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -1,5 +1,7 @@ > /* > * Copyright 2017 Red Hat > + * Parts ported from amdgpu (fence wait code). > + * Copyright 2016 Advanced Micro Devices, Inc. > * > * Permission is hereby granted, free of charge, to any person obtaining a > * copy of this software and associated documentation files (the > "Software"), > @@ -31,6 +33,9 @@ > * that contain an optional fence. The fence can be updated with a new > * fence, or be NULL. > * > + * syncobj's can be waited upon, where it will wait for the underlying > + * fence. > + * > * syncobj's can be export to fd's and back, these fd's are opaque and > * have no other use case, except passing the syncobj between processes. > * > @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device > *dev, void *data, > return drm_syncobj_fd_to_handle(file_private, args->fd, > &args->handle); > } > + > +/** > + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute > value > + * > + * @timeout_sec: timeout sec component, 0 for poll > + * @timeout_nsec: timeout nsec component in ns, 0 for poll > + * both must be 0 for poll. > + * > + * Calculate the timeout in jiffies from an absolute time in sec/nsec. > + */ > +static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec, > uint64_t timeout_nsec) > +{ > + struct timespec64 abs_timeout, timeout, max_jiffy_timespec; > + unsigned long timeout_jiffies; > + > + /* make 0 timeout means poll - absolute 0 doesn't seem valid */ > + if (timeout_sec == 0 && timeout_nsec == 0) > + return 0; > + > + abs_timeout.tv_sec = timeout_sec; > + abs_timeout.tv_nsec = timeout_nsec; > + > + /* clamp timeout if it's to large */ > + if (!timespec64_valid_strict(&abs_timeout)) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + timeout = timespec64_sub(abs_timeout, > ktime_to_timespec64(ktime_get())); > + if (!timespec64_valid(&timeout)) > + return 0; > + > + jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec); > + if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + timeout_jiffies = timespec64_to_jiffies(&timeout); > + /* clamp timeout to avoid infinite timeout */ > + if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT) > + return MAX_SCHEDULE_TIMEOUT - 1; > + > + return timeout_jiffies + 1; > +} > + > +static int drm_syncobj_wait_fences(struct drm_device *dev, > + struct drm_file *file_private, > + struct drm_syncobj_wait *wait, > + struct dma_fence **fences) > +{ > + unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec, > wait->timeout_nsec); > + int ret = 0; > + uint32_t first = ~0; > + > + if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { > + int i; > + for (i = 0; i < wait->count_handles; i++) { > + ret = dma_fence_wait_timeout(fences[i], true, > timeout); > + > + if (ret < 0) > + return ret; > + if (ret == 0) > + break; > + timeout = ret; > + } > + first = 0; > + } else { > + ret = dma_fence_wait_any_timeout(fences, > + wait->count_handles, > + true, timeout, > + &first); > + } > + > + if (ret < 0) > + return ret; > + > + wait->first_signaled = first; > + if (ret == 0) > + return -ETIME; > + return 0; > +} > + > +int > +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_wait *args = data; > + uint32_t *handles; > + struct dma_fence **fences; > + int ret = 0; > + int i; > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -ENODEV; > + > + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ > ALL) > + return -EINVAL; > + > + if (args->count_handles == 0) > + return -EINVAL; > + > + /* Get the handles from userspace */ > + handles = kmalloc_array(args->count_handles, sizeof(uint32_t), > + GFP_KERNEL); > + if (handles == NULL) > + return -ENOMEM; > + > + if (copy_from_user(handles, > + u64_to_user_ptr(args->handles), > + sizeof(uint32_t) * args->count_handles)) { > + ret = -EFAULT; > + goto err_free_handles; > + } > + > + fences = kcalloc(args->count_handles, > + sizeof(struct dma_fence *), GFP_KERNEL); > + if (!fences) { > + ret = -ENOMEM; > + goto err_free_handles; > + } > + > + for (i = 0; i < args->count_handles; i++) { > + ret = drm_syncobj_fence_get(file_private, handles[i], > + &fences[i]); > + if (ret) > + goto err_free_fence_array; > + } > + > + ret = drm_syncobj_wait_fences(dev, file_private, > + args, fences); > > > > So, reading some CTS tests again, and I think we have a problem here. The > Vulkan spec allows you to wait on a fence that is in the unsignaled state. > > > > At least on the closed source driver that would be illegal as far as I > know. > > > > Then they are doing workarounds in userspace. There are definitely CTS > tests for this: > > https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/ > modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 > > > > You can't wait on a semaphore before the signal operation is send down to > the kerel. > > > > We (Intel) deal with this today by tracking whether or not the fence has > been submitted and using a condition variable in userspace to sort it all > out. If we ever want to share fences across processes (which we do), then > this needs to be sorted in the kernel. > > --Jason > > > > > > Regards, > Christian. > > > > > In theory, you could have thread A start waiting on a fence before > thread B submits the work which triggers that fence. This means that the > dma_fence may not exist yet when vkWaitForFences gets called. If we really > want to support the full Vulkan usage, we need to somehow support missing > dma_fences by waiting for the dma_fence to show up. Unfortunately, I don't > know enough about the internal kernel APIs to know what that would look > like. > > > > + > +err_free_fence_array: > + for (i = 0; i < args->count_handles; i++) > + dma_fence_put(fences[i]); > + kfree(fences); > +err_free_handles: > + kfree(handles); > + > + return ret; > +} > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 101593a..91746a7 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -718,6 +718,18 @@ struct drm_syncobj_handle { > __u32 pad; > }; > > +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) > +struct drm_syncobj_wait { > + __u64 handles; > + /* absolute timeout */ > + __s64 timeout_sec; > + __s64 timeout_nsec; > + __u32 count_handles; > + __u32 flags; > + __u32 first_signaled; /* only valid when not waiting all */ > + __u32 pad; > +}; > + > #if defined(__cplusplus) > } > #endif > @@ -840,6 +852,7 @@ extern "C" { > #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct > drm_syncobj_destroy) > #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct > drm_syncobj_handle) > #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct > drm_syncobj_handle) > +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct > drm_syncobj_wait) > > /** > * Device specific ioctls should only be in their respective headers > -- > 2.9.4 > > _______________________________________________ > dri-devel mailing list > dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel > > > > > > _______________________________________________ > > amd-gfx mailing list > > amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org > > https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > > > > > > [-- Attachment #1.2: Type: text/html, Size: 38754 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <CAOFGe96gaX2OjsXr8bNBUiqwyKAQfMSycH7PXgYAt_BBJ1UHkQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 2017-07-10 15:58 ` Xie, AlexBin @ 2017-07-10 16:15 ` Christian König [not found] ` <fe27d2f8-760b-c2d4-9474-7eaaae560086-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 1 sibling, 1 reply; 19+ messages in thread From: Christian König @ 2017-07-10 16:15 UTC (permalink / raw) To: Jason Ekstrand Cc: Dave Airlie, Maling list - DRI developers, amd-gfx mailing list [-- Attachment #1.1: Type: text/plain, Size: 1583 bytes --] Am 10.07.2017 um 17:52 schrieb Jason Ekstrand: > On Mon, Jul 10, 2017 at 8:45 AM, Christian König > <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org <mailto:deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>> wrote: > > Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: >> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org >> <mailto:airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>> wrote: >> [SNIP] >> So, reading some CTS tests again, and I think we have a problem >> here. The Vulkan spec allows you to wait on a fence that is in >> the unsignaled state. > > At least on the closed source driver that would be illegal as far > as I know. > > > Then they are doing workarounds in userspace. There are definitely > CTS tests for this: > > https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 > > You can't wait on a semaphore before the signal operation is send > down to the kerel. > > > We (Intel) deal with this today by tracking whether or not the fence > has been submitted and using a condition variable in userspace to sort > it all out. Which sounds exactly like what AMD is doing in it's drivers as well. > If we ever want to share fences across processes (which we do), then > this needs to be sorted in the kernel. That would clearly get a NAK from my side, even Microsoft forbids wait before signal because you can easily end up in deadlock situations. Regards, Christian. [-- Attachment #1.2: Type: text/html, Size: 4431 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <fe27d2f8-760b-c2d4-9474-7eaaae560086-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <fe27d2f8-760b-c2d4-9474-7eaaae560086-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-07-10 21:09 ` Jason Ekstrand 2017-07-11 2:36 ` Michel Dänzer 2017-07-11 7:22 ` Daniel Vetter 0 siblings, 2 replies; 19+ messages in thread From: Jason Ekstrand @ 2017-07-10 21:09 UTC (permalink / raw) To: Christian König Cc: Dave Airlie, Maling list - DRI developers, amd-gfx mailing list [-- Attachment #1.1: Type: text/plain, Size: 2388 bytes --] On Mon, Jul 10, 2017 at 9:15 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 10.07.2017 um 17:52 schrieb Jason Ekstrand: > > On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de> > wrote: > >> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: >> >> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote: >> [SNIP] >> So, reading some CTS tests again, and I think we have a problem here. >> The Vulkan spec allows you to wait on a fence that is in the unsignaled >> state. >> >> >> At least on the closed source driver that would be illegal as far as I >> know. >> > > Then they are doing workarounds in userspace. There are definitely CTS > tests for this: > > https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/ > modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 > > >> You can't wait on a semaphore before the signal operation is send down to >> the kerel. >> > > We (Intel) deal with this today by tracking whether or not the fence has > been submitted and using a condition variable in userspace to sort it all > out. > > > Which sounds exactly like what AMD is doing in it's drivers as well. > Which doesn't work cross-process so... > If we ever want to share fences across processes (which we do), then this > needs to be sorted in the kernel. > > > That would clearly get a NAK from my side, even Microsoft forbids wait > before signal because you can easily end up in deadlock situations. > Please don't NAK things that are required by the API specification and CTS tests. That makes it very hard for people like me to get their jobs done. :-) Now, as for whether or not it's a good idea. First off, we do have timeouts an a status querying mechanism so an application can just set a timeout of 1s and do something if it times out. Second, if the application is a compositor or something else that doesn't trust its client, it shouldn't be using the OPAQUE_FD mechanism of Vulkan semaphore/fence sharing anyway. For those scenarios, they can require the untrusted client to use FENCE_FD (sync file) and they have all of the usual guarantees about when the work got submitted, etc. Also, I'm more than happy to put this all behind a flag so it's not the default behavior. --Jason [-- Attachment #1.2: Type: text/html, Size: 5825 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) 2017-07-10 21:09 ` Jason Ekstrand @ 2017-07-11 2:36 ` Michel Dänzer 2017-07-11 7:17 ` Christian König 2017-07-11 7:22 ` Daniel Vetter 1 sibling, 1 reply; 19+ messages in thread From: Michel Dänzer @ 2017-07-11 2:36 UTC (permalink / raw) To: Jason Ekstrand, Christian König Cc: amd-gfx mailing list, Maling list - DRI developers On 11/07/17 06:09 AM, Jason Ekstrand wrote: > On Mon, Jul 10, 2017 at 9:15 AM, Christian König > <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: > > Am 10.07.2017 um 17:52 schrieb Jason Ekstrand: >> On Mon, Jul 10, 2017 at 8:45 AM, Christian König >> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: >> >> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: >>> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie >>> <airlied@gmail.com <mailto:airlied@gmail.com>> wrote: >>> [SNIP] >>> So, reading some CTS tests again, and I think we have a >>> problem here. The Vulkan spec allows you to wait on a fence >>> that is in the unsignaled state. >> >> At least on the closed source driver that would be illegal as >> far as I know. >> >> >> Then they are doing workarounds in userspace. There are >> definitely CTS tests for this: >> >> https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 >> <https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74> >> >> >> You can't wait on a semaphore before the signal operation is >> send down to the kerel. >> >> >> We (Intel) deal with this today by tracking whether or not the >> fence has been submitted and using a condition variable in >> userspace to sort it all out. > > Which sounds exactly like what AMD is doing in it's drivers as well. > > > Which doesn't work cross-process so... Surely it can be made to work by providing suitable kernel APIs to userspace? >> If we ever want to share fences across processes (which we do), >> then this needs to be sorted in the kernel. > > That would clearly get a NAK from my side, even Microsoft forbids > wait before signal because you can easily end up in deadlock situations. > > Please don't NAK things that are required by the API specification and > CTS tests. There is no requirement for every aspect of the Vulkan API specification to be mirrored 1:1 in the kernel <-> userspace API. We have to work out what makes sense at each level. > That makes it very hard for people like me to get their jobs done. :-) Jason, that's uncalled for. Christian is also just doing his job here. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) 2017-07-11 2:36 ` Michel Dänzer @ 2017-07-11 7:17 ` Christian König [not found] ` <c7a44542-acca-09e1-5ea5-bf9938921e54-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 0 siblings, 1 reply; 19+ messages in thread From: Christian König @ 2017-07-11 7:17 UTC (permalink / raw) To: Michel Dänzer, Jason Ekstrand Cc: amd-gfx mailing list, Maling list - DRI developers Am 11.07.2017 um 04:36 schrieb Michel Dänzer: > On 11/07/17 06:09 AM, Jason Ekstrand wrote: >> On Mon, Jul 10, 2017 at 9:15 AM, Christian König >> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: >> >> Am 10.07.2017 um 17:52 schrieb Jason Ekstrand: >>> On Mon, Jul 10, 2017 at 8:45 AM, Christian König >>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: >>> >>> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: >>>> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie >>>> <airlied@gmail.com <mailto:airlied@gmail.com>> wrote: >>>> [SNIP] >>>> So, reading some CTS tests again, and I think we have a >>>> problem here. The Vulkan spec allows you to wait on a fence >>>> that is in the unsignaled state. >>> At least on the closed source driver that would be illegal as >>> far as I know. >>> >>> >>> Then they are doing workarounds in userspace. There are >>> definitely CTS tests for this: >>> >>> https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 >>> <https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74> >>> >>> >>> You can't wait on a semaphore before the signal operation is >>> send down to the kerel. >>> >>> >>> We (Intel) deal with this today by tracking whether or not the >>> fence has been submitted and using a condition variable in >>> userspace to sort it all out. >> Which sounds exactly like what AMD is doing in it's drivers as well. >> >> >> Which doesn't work cross-process so... > Surely it can be made to work by providing suitable kernel APIs to > userspace? Well, that's exactly what Jason proposed to do, but I'm not very keen of that. >>> If we ever want to share fences across processes (which we do), >>> then this needs to be sorted in the kernel. >> That would clearly get a NAK from my side, even Microsoft forbids >> wait before signal because you can easily end up in deadlock situations. >> >> Please don't NAK things that are required by the API specification and >> CTS tests. > There is no requirement for every aspect of the Vulkan API specification > to be mirrored 1:1 in the kernel <-> userspace API. We have to work out > what makes sense at each level. Exactly, if we have a synchronization problem between two processes that should be solved in userspace. E.g. if process A hasn't submitted it's work to the kernel it should flush it's commands before sending a flip event to the compositor. We can attach something to the fd making it possible for an X shared memory fence to be transported with it if that makes live easier. This way the waiter implementation can still chose what to do and/or wait async for the client to have it's flushes completed etc... Regards, Christian. _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <c7a44542-acca-09e1-5ea5-bf9938921e54-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <c7a44542-acca-09e1-5ea5-bf9938921e54-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-07-11 15:43 ` Jason Ekstrand [not found] ` <CAOFGe95cnFJCEAO6k8emWFWaAZdMPrdHjQos02peuWe6Ve1wvw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 19+ messages in thread From: Jason Ekstrand @ 2017-07-11 15:43 UTC (permalink / raw) To: Christian König Cc: Michel Dänzer, amd-gfx mailing list, Maling list - DRI developers [-- Attachment #1.1: Type: text/plain, Size: 4604 bytes --] On Tue, Jul 11, 2017 at 12:17 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 11.07.2017 um 04:36 schrieb Michel Dänzer: > >> On 11/07/17 06:09 AM, Jason Ekstrand wrote: >> >>> On Mon, Jul 10, 2017 at 9:15 AM, Christian König >>> <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org <mailto:deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>> wrote: >>> >>> Am 10.07.2017 um 17:52 schrieb Jason Ekstrand: >>> >>>> On Mon, Jul 10, 2017 at 8:45 AM, Christian König >>>> <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org <mailto:deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>> wrote: >>>> >>>> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: >>>> >>>>> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie >>>>> <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>> wrote: >>>>> [SNIP] >>>>> So, reading some CTS tests again, and I think we have a >>>>> problem here. The Vulkan spec allows you to wait on a fence >>>>> that is in the unsignaled state. >>>>> >>>> At least on the closed source driver that would be illegal as >>>> far as I know. >>>> >>>> >>>> Then they are doing workarounds in userspace. There are >>>> definitely CTS tests for this: >>>> >>>> https://github.com/KhronosGroup/VK-GL-CTS/blob/master/ >>>> external/vulkancts/modules/vulkan/synchronization/vktSync >>>> hronizationBasicFenceTests.cpp#L74 >>>> <https://github.com/KhronosGroup/VK-GL-CTS/blob/master/ >>>> external/vulkancts/modules/vulkan/synchronization/vktSync >>>> hronizationBasicFenceTests.cpp#L74> >>>> >>>> You can't wait on a semaphore before the signal operation is >>>> send down to the kerel. >>>> >>>> >>>> We (Intel) deal with this today by tracking whether or not the >>>> fence has been submitted and using a condition variable in >>>> userspace to sort it all out. >>>> >>> Which sounds exactly like what AMD is doing in it's drivers as well. >>> >>> >>> Which doesn't work cross-process so... >>> >> Surely it can be made to work by providing suitable kernel APIs to >> userspace? >> > > Well, that's exactly what Jason proposed to do, but I'm not very keen of > that. > > If we ever want to share fences across processes (which we do), >>>> then this needs to be sorted in the kernel. >>>> >>> That would clearly get a NAK from my side, even Microsoft forbids >>> wait before signal because you can easily end up in deadlock >>> situations. >>> >>> Please don't NAK things that are required by the API specification and >>> CTS tests. >>> >> There is no requirement for every aspect of the Vulkan API specification >> to be mirrored 1:1 in the kernel <-> userspace API. We have to work out >> what makes sense at each level. >> > > Exactly, if we have a synchronization problem between two processes that > should be solved in userspace. > > E.g. if process A hasn't submitted it's work to the kernel it should flush > it's commands before sending a flip event to the compositor. > Ok, I think there is some confusion here on what is being proposed. Here are some things that are *not* being proposed: 1. This does *not* allow a client to block another client's GPU work indefinitely. This is entirely for a CPU wait API to allow for a "wait for submit" as well as a "wait for finish". 2. This is *not* for system compositors that need to be robust against malicious clients. The expected use for the OPAQUE_FD is two very tightly integrated processes which trust each other but need to be able to share synchronization primitives. One of the things they need to be able to do (as per the Vulkan API) with those synchronization primitives is a "wait for submit and finish" operation. I'm happy for the kernel to have separate APIs for "wait for submit" and "wait for finish" if that's more palatable but i don't really see why there is such a strong reaction to the "wait for submit and finish" behavior. Could we do this "in userspace"? Yes, with added kernel API. we would need some way of strapping a second FD onto a syncobj or combining two FDs into one to send across the wire or something like that, then add a shared memory segment, and then pile on a bunch of code to do cross-process condition variables and state tracking. I really don't see how that's a better solution than adding a flag to the kernel API to just do what we want. [-- Attachment #1.2: Type: text/html, Size: 7037 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <CAOFGe95cnFJCEAO6k8emWFWaAZdMPrdHjQos02peuWe6Ve1wvw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <CAOFGe95cnFJCEAO6k8emWFWaAZdMPrdHjQos02peuWe6Ve1wvw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2017-07-12 7:39 ` Christian König [not found] ` <2a0f44cc-b893-153c-9852-b1b4855e386e-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 0 siblings, 1 reply; 19+ messages in thread From: Christian König @ 2017-07-12 7:39 UTC (permalink / raw) To: Jason Ekstrand Cc: Michel Dänzer, amd-gfx mailing list, Maling list - DRI developers [-- Attachment #1.1: Type: text/plain, Size: 2882 bytes --] Am 11.07.2017 um 17:43 schrieb Jason Ekstrand: > On Tue, Jul 11, 2017 at 12:17 AM, Christian König > <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org <mailto:deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>> wrote: > > [SNIP] > > If we ever want to share fences across processes > (which we do), > then this needs to be sorted in the kernel. > > That would clearly get a NAK from my side, even > Microsoft forbids > wait before signal because you can easily end up in > deadlock situations. > > Please don't NAK things that are required by the API > specification and > CTS tests. > > There is no requirement for every aspect of the Vulkan API > specification > to be mirrored 1:1 in the kernel <-> userspace API. We have to > work out > what makes sense at each level. > > > Exactly, if we have a synchronization problem between two > processes that should be solved in userspace. > > E.g. if process A hasn't submitted it's work to the kernel it > should flush it's commands before sending a flip event to the > compositor. > > > Ok, I think there is some confusion here on what is being proposed. > Here are some things that are *not* being proposed: > > 1. This does *not* allow a client to block another client's GPU work > indefinitely. This is entirely for a CPU wait API to allow for a > "wait for submit" as well as a "wait for finish". Yeah, that is a rather good point. > 2. This is *not* for system compositors that need to be robust > against malicious clients. I can see the point, but I think the kernel interface should still be idiot prove even in the unlikely case the universe suddenly stops to produce idiots. > The expected use for the OPAQUE_FD is two very tightly integrated > processes which trust each other but need to be able to share > synchronization primitives. Well, that raises a really important question: What is the actual use case for this if not communication between client and compositor? > Could we do this "in userspace"? Yes, with added kernel API. we > would need some way of strapping a second FD onto a syncobj or > combining two FDs into one to send across the wire or something like > that, then add a shared memory segment, and then pile on a bunch of > code to do cross-process condition variables and state tracking. I > really don't see how that's a better solution than adding a flag to > the kernel API to just do what we want. Way to complicated. My thinking was rather to optionally allow a single page to be mmap()ed into the process address space from the fd and then put a futex, pthread_cond or X shared memory fence or anything like that into it. Regards, Christian. [-- Attachment #1.2: Type: text/html, Size: 5553 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <2a0f44cc-b893-153c-9852-b1b4855e386e-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <2a0f44cc-b893-153c-9852-b1b4855e386e-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-07-12 8:39 ` Dave Airlie 2017-07-12 15:53 ` Jason Ekstrand 0 siblings, 1 reply; 19+ messages in thread From: Dave Airlie @ 2017-07-12 8:39 UTC (permalink / raw) To: Christian König Cc: Maling list - DRI developers, Michel Dänzer, amd-gfx mailing list, Jason Ekstrand On 12 July 2017 at 17:39, Christian König <deathsimple@vodafone.de> wrote: > Am 11.07.2017 um 17:43 schrieb Jason Ekstrand: > > On Tue, Jul 11, 2017 at 12:17 AM, Christian König <deathsimple@vodafone.de> > wrote: >> >> [SNIP] >>>>> >>>>> If we ever want to share fences across processes (which we do), >>>>> then this needs to be sorted in the kernel. >>>> >>>> That would clearly get a NAK from my side, even Microsoft forbids >>>> wait before signal because you can easily end up in deadlock >>>> situations. >>>> >>>> Please don't NAK things that are required by the API specification and >>>> CTS tests. >>> >>> There is no requirement for every aspect of the Vulkan API specification >>> to be mirrored 1:1 in the kernel <-> userspace API. We have to work out >>> what makes sense at each level. >> >> >> Exactly, if we have a synchronization problem between two processes that >> should be solved in userspace. >> >> E.g. if process A hasn't submitted it's work to the kernel it should flush >> it's commands before sending a flip event to the compositor. > > > Ok, I think there is some confusion here on what is being proposed. Here > are some things that are *not* being proposed: > > 1. This does *not* allow a client to block another client's GPU work > indefinitely. This is entirely for a CPU wait API to allow for a "wait for > submit" as well as a "wait for finish". > > Yeah, that is a rather good point. > > 2. This is *not* for system compositors that need to be robust against > malicious clients. > > I can see the point, but I think the kernel interface should still be idiot > prove even in the unlikely case the universe suddenly stops to produce > idiots. > > The expected use for the OPAQUE_FD is two very tightly integrated processes > which trust each other but need to be able to share synchronization > primitives. > > Well, that raises a really important question: What is the actual use case > for this if not communication between client and compositor? VR clients and compositors. > Could we do this "in userspace"? Yes, with added kernel API. we would need > some way of strapping a second FD onto a syncobj or combining two FDs into > one to send across the wire or something like that, then add a shared memory > segment, and then pile on a bunch of code to do cross-process condition > variables and state tracking. I really don't see how that's a better > solution than adding a flag to the kernel API to just do what we want. > > Way to complicated. > > My thinking was rather to optionally allow a single page to be mmap()ed into > the process address space from the fd and then put a futex, pthread_cond or > X shared memory fence or anything like that into it. > Is that easier than just waiting in the kernel, I'm not sure how optimised we need this path to be. Dave. _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) 2017-07-12 8:39 ` Dave Airlie @ 2017-07-12 15:53 ` Jason Ekstrand [not found] ` <CAOFGe95mj1G_Ap0oOPFyatZWEbEk_Kwo8Wn_E+eYS16Nhz_huQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 19+ messages in thread From: Jason Ekstrand @ 2017-07-12 15:53 UTC (permalink / raw) To: Dave Airlie Cc: Maling list - DRI developers, Michel Dänzer, amd-gfx mailing list [-- Attachment #1.1: Type: text/plain, Size: 4893 bytes --] On Wed, Jul 12, 2017 at 1:39 AM, Dave Airlie <airlied@gmail.com> wrote: > On 12 July 2017 at 17:39, Christian König <deathsimple@vodafone.de> wrote: > > Am 11.07.2017 um 17:43 schrieb Jason Ekstrand: > > > > On Tue, Jul 11, 2017 at 12:17 AM, Christian König < > deathsimple@vodafone.de> > > wrote: > >> > >> [SNIP] > >>>>> > >>>>> If we ever want to share fences across processes (which we do), > >>>>> then this needs to be sorted in the kernel. > >>>> > >>>> That would clearly get a NAK from my side, even Microsoft forbids > >>>> wait before signal because you can easily end up in deadlock > >>>> situations. > >>>> > >>>> Please don't NAK things that are required by the API specification and > >>>> CTS tests. > >>> > >>> There is no requirement for every aspect of the Vulkan API > specification > >>> to be mirrored 1:1 in the kernel <-> userspace API. We have to work out > >>> what makes sense at each level. > >> > >> > >> Exactly, if we have a synchronization problem between two processes that > >> should be solved in userspace. > >> > >> E.g. if process A hasn't submitted it's work to the kernel it should > flush > >> it's commands before sending a flip event to the compositor. > > > > > > Ok, I think there is some confusion here on what is being proposed. Here > > are some things that are *not* being proposed: > > > > 1. This does *not* allow a client to block another client's GPU work > > indefinitely. This is entirely for a CPU wait API to allow for a "wait > for > > submit" as well as a "wait for finish". > > > > Yeah, that is a rather good point. > > > > 2. This is *not* for system compositors that need to be robust against > > malicious clients. > > > > I can see the point, but I think the kernel interface should still be > idiot > > prove even in the unlikely case the universe suddenly stops to produce > > idiots. > Fair enough. Maybe I've spent too much time in the Vulkan world where being an idiot is, theoretically, disallowed. And, by "disallowed", I mean that you're free to be one with the understanding that your process may get straight-up killed on *any* API violation. > > The expected use for the OPAQUE_FD is two very tightly integrated > processes > > which trust each other but need to be able to share synchronization > > primitives. > > > > Well, that raises a really important question: What is the actual use > case > > for this if not communication between client and compositor? > > VR clients and compositors. > Yeah, that. Wouldn't they want the same security guarantees as your OS compositor? One would think, but none of the people making VR compositors seem to care all that much about the case of malicious clients. In general, they run a fairly closed platform where they aren't really running "arbitrary" apps on their compositor. Also, they tend to write both the client and server sides of the VR compositor protocol and the only thing the app touches is their API. I'm not going to try too hard to justify their lack of concern about deadlock, but there it is. > > Could we do this "in userspace"? Yes, with added kernel API. we would > need > > some way of strapping a second FD onto a syncobj or combining two FDs > into > > one to send across the wire or something like that, then add a shared > memory > > segment, and then pile on a bunch of code to do cross-process condition > > variables and state tracking. I really don't see how that's a better > > solution than adding a flag to the kernel API to just do what we want. > > > > Way to complicated. > > > > My thinking was rather to optionally allow a single page to be mmap()ed > into > > the process address space from the fd and then put a futex, pthread_cond > or > > X shared memory fence or anything like that into it. > A single page + fence sounds a lot like a DRM BO. One of my original plans for implementing the feature was to just use a single-page BO and do some userspace stuff in the mapped page. There are two problems here: 1) It could be very easy for a malicious client to map the page and then mess up whatever CPU data structure I use for the semaphore. I could probably make it robust but there is an attack vector there that's going to be tricky. 2) I have no way, on import, to tell the difference between a 4K memory object and a fence. Then syncobj came along and promised to solve all my problems... > Is that easier than just waiting in the kernel, I'm not sure how > optimised we need this path to be. > I don't think so. I think it's more-or-less the same code regardless of how it's done. The advantage of doing it in the kernel is that it's standardized (we don't have to both go write that userspace code) and it doesn't have the problems stated above. [-- Attachment #1.2: Type: text/html, Size: 6563 bytes --] [-- Attachment #2: Type: text/plain, Size: 160 bytes --] _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <CAOFGe95mj1G_Ap0oOPFyatZWEbEk_Kwo8Wn_E+eYS16Nhz_huQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <CAOFGe95mj1G_Ap0oOPFyatZWEbEk_Kwo8Wn_E+eYS16Nhz_huQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2017-07-12 16:45 ` Christian König 2017-07-12 17:02 ` Jason Ekstrand 0 siblings, 1 reply; 19+ messages in thread From: Christian König @ 2017-07-12 16:45 UTC (permalink / raw) To: Jason Ekstrand, Dave Airlie Cc: Michel Dänzer, amd-gfx mailing list, Maling list - DRI developers [-- Attachment #1.1: Type: text/plain, Size: 707 bytes --] Am 12.07.2017 um 17:53 schrieb Jason Ekstrand: > [SNIP] > > Is that easier than just waiting in the kernel, I'm not sure how > optimised we need this path to be. > > > I don't think so. I think it's more-or-less the same code regardless > of how it's done. The advantage of doing it in the kernel is that > it's standardized (we don't have to both go write that userspace code) > and it doesn't have the problems stated above. Ok, I'm convinced. The next price question is then how do we do it? I mean writing an IOCTL to wait for a fence to appear is simple, but do we also need to wait for a fence to change? As long as waits don't consume fences that might be rather tricky. Christian. [-- Attachment #1.2: Type: text/html, Size: 1702 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) 2017-07-12 16:45 ` Christian König @ 2017-07-12 17:02 ` Jason Ekstrand 0 siblings, 0 replies; 19+ messages in thread From: Jason Ekstrand @ 2017-07-12 17:02 UTC (permalink / raw) To: Christian König Cc: Michel Dänzer, amd-gfx mailing list, Maling list - DRI developers [-- Attachment #1.1: Type: text/plain, Size: 1711 bytes --] On Wed, Jul 12, 2017 at 9:45 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 12.07.2017 um 17:53 schrieb Jason Ekstrand: > > [SNIP] > > >> Is that easier than just waiting in the kernel, I'm not sure how >> optimised we need this path to be. >> > > I don't think so. I think it's more-or-less the same code regardless of > how it's done. The advantage of doing it in the kernel is that it's > standardized (we don't have to both go write that userspace code) and it > doesn't have the problems stated above. > > > Ok, I'm convinced. The next price question is then how do we do it? > > I mean writing an IOCTL to wait for a fence to appear is simple, but do we > also need to wait for a fence to change? > I don't think so. Taking a page from the Vulkan book, I think the "right" thing to do would be to have a reset ioctl that simply deletes the dma_fence and replaces it with NULL. Then the only behavior you care about is "wait for a fence to appear". The usage pattern becomes: 1) Submit work to signal the syncobj 2) Wait on the syncobj (this may happen before the submit) 3) Once everyone is done waiting, reset the syncobj If someone does a wait on a stale syncobj that has completed and not yet been reset, they return immediately. Yes, there is a possible race between 2 and 3, especially if 2 is waiting on multiple syncobjs. Ideally, we'd make it so that a reset in the middle of someone else's wait wouldn't cause them to wait until that syncobj gets triggerd a second time. However, I don't think it's a deal-breaker as any client that sets the "wait for submit" flag should know that it's liable to wait forever and set a timeout. [-- Attachment #1.2: Type: text/html, Size: 3119 bytes --] [-- Attachment #2: Type: text/plain, Size: 160 bytes --] _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) 2017-07-10 21:09 ` Jason Ekstrand 2017-07-11 2:36 ` Michel Dänzer @ 2017-07-11 7:22 ` Daniel Vetter [not found] ` <20170711072110.oziftm34o54plbvv-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org> 1 sibling, 1 reply; 19+ messages in thread From: Daniel Vetter @ 2017-07-11 7:22 UTC (permalink / raw) To: Jason Ekstrand; +Cc: amd-gfx mailing list, Maling list - DRI developers On Mon, Jul 10, 2017 at 02:09:42PM -0700, Jason Ekstrand wrote: > On Mon, Jul 10, 2017 at 9:15 AM, Christian König <deathsimple@vodafone.de> > wrote: > > > Am 10.07.2017 um 17:52 schrieb Jason Ekstrand: > > > > On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de> > > wrote: > > > >> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: > >> > >> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com> wrote: > >> [SNIP] > >> So, reading some CTS tests again, and I think we have a problem here. > >> The Vulkan spec allows you to wait on a fence that is in the unsignaled > >> state. > >> > >> > >> At least on the closed source driver that would be illegal as far as I > >> know. > >> > > > > Then they are doing workarounds in userspace. There are definitely CTS > > tests for this: > > > > https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/ > > modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74 > > > > > >> You can't wait on a semaphore before the signal operation is send down to > >> the kerel. > >> > > > > We (Intel) deal with this today by tracking whether or not the fence has > > been submitted and using a condition variable in userspace to sort it all > > out. > > > > > > Which sounds exactly like what AMD is doing in it's drivers as well. > > > > Which doesn't work cross-process so... > > > If we ever want to share fences across processes (which we do), then this > > needs to be sorted in the kernel. > > > > > > That would clearly get a NAK from my side, even Microsoft forbids wait > > before signal because you can easily end up in deadlock situations. > > > > Please don't NAK things that are required by the API specification and CTS > tests. That makes it very hard for people like me to get their jobs done. > :-) > > Now, as for whether or not it's a good idea. First off, we do have > timeouts an a status querying mechanism so an application can just set a > timeout of 1s and do something if it times out. Second, if the application > is a compositor or something else that doesn't trust its client, it > shouldn't be using the OPAQUE_FD mechanism of Vulkan semaphore/fence > sharing anyway. For those scenarios, they can require the untrusted client > to use FENCE_FD (sync file) and they have all of the usual guarantees about > when the work got submitted, etc. > > Also, I'm more than happy to put this all behind a flag so it's not the > default behavior. Android had a similar requirement to have a fence fd before the fence existed in hwc1, before they fixed that in hwc2. But it's probably still useful for deeply pipelined renderes with littel memory, aka tiled renderers on phones. The idea we've tossed around is to create a so-called future fence. In the kernel if you try to deref a future fence, the usual thing that happens is you'll block (interruptibly, which we can because fence lookup might fail), _until_ a real fence shows up and can be returned. That implements the uapi expectations without risking deadlocks in the kernel, albeit with a bit much blocking. Still better than doing the same in userspace (since in userspace you probably need to do that when importing the fence, not at execbuf time). 2nd step would then be to give drivers with a robust hand recover logic a special interface to be able to instantiate hw waits on such a future fence before the signalling part is queued up. As long as any waiters have robust hang recovery we still don't have a problem. Everyone else (e.g. drm display-only drivers, v4l nodes, camera ip, whatever else participates in the shared buffers and fences stuff) would still block until the real fence shows up. Similar idea should work for semaphores too. Gustavo did look into the future fence stuff iirc, I think there was even an rfc sometimes ago. Cheers, Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <20170711072110.oziftm34o54plbvv-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>]
* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6) [not found] ` <20170711072110.oziftm34o54plbvv-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org> @ 2017-07-11 19:44 ` Jason Ekstrand 0 siblings, 0 replies; 19+ messages in thread From: Jason Ekstrand @ 2017-07-11 19:44 UTC (permalink / raw) To: Daniel Vetter Cc: gustavo-THi1TnShQwVAfugRpC6u6w, Christian König, amd-gfx mailing list, Maling list - DRI developers [-- Attachment #1.1: Type: text/plain, Size: 3979 bytes --] On Tue, Jul 11, 2017 at 12:22 AM, Daniel Vetter <daniel-/w4YWyX8dFk@public.gmane.org> wrote: > On Mon, Jul 10, 2017 at 02:09:42PM -0700, Jason Ekstrand wrote: > > On Mon, Jul 10, 2017 at 9:15 AM, Christian König < > deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> > > wrote: > > > > > Am 10.07.2017 um 17:52 schrieb Jason Ekstrand: > > > > > > On Mon, Jul 10, 2017 at 8:45 AM, Christian König < > deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> > > > wrote: > > > > > >> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand: > > >> > > >> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> > wrote: > > >> [SNIP] > > >> So, reading some CTS tests again, and I think we have a problem here. > > >> The Vulkan spec allows you to wait on a fence that is in the > unsignaled > > >> state. > > >> > > >> > > >> At least on the closed source driver that would be illegal as far as I > > >> know. > > >> > > > > > > Then they are doing workarounds in userspace. There are definitely CTS > > > tests for this: > > > > > > https://github.com/KhronosGroup/VK-GL-CTS/blob/ > master/external/vulkancts/ > > > modules/vulkan/synchronization/vktSynchronizationBasicFenceTe > sts.cpp#L74 > > > > > > > > >> You can't wait on a semaphore before the signal operation is send > down to > > >> the kerel. > > >> > > > > > > We (Intel) deal with this today by tracking whether or not the fence > has > > > been submitted and using a condition variable in userspace to sort it > all > > > out. > > > > > > > > > Which sounds exactly like what AMD is doing in it's drivers as well. > > > > > > > Which doesn't work cross-process so... > > > > > If we ever want to share fences across processes (which we do), then > this > > > needs to be sorted in the kernel. > > > > > > > > > That would clearly get a NAK from my side, even Microsoft forbids wait > > > before signal because you can easily end up in deadlock situations. > > > > > > > Please don't NAK things that are required by the API specification and > CTS > > tests. That makes it very hard for people like me to get their jobs > done. > > :-) > > > > Now, as for whether or not it's a good idea. First off, we do have > > timeouts an a status querying mechanism so an application can just set a > > timeout of 1s and do something if it times out. Second, if the > application > > is a compositor or something else that doesn't trust its client, it > > shouldn't be using the OPAQUE_FD mechanism of Vulkan semaphore/fence > > sharing anyway. For those scenarios, they can require the untrusted > client > > to use FENCE_FD (sync file) and they have all of the usual guarantees > about > > when the work got submitted, etc. > > > > Also, I'm more than happy to put this all behind a flag so it's not the > > default behavior. > > Android had a similar requirement to have a fence fd before the fence > existed in hwc1, before they fixed that in hwc2. But it's probably still > useful for deeply pipelined renderes with littel memory, aka tiled > renderers on phones. > > The idea we've tossed around is to create a so-called future fence. In the > kernel if you try to deref a future fence, the usual thing that happens is > you'll block (interruptibly, which we can because fence lookup might > fail), _until_ a real fence shows up and can be returned. That implements > the uapi expectations without risking deadlocks in the kernel, albeit with > a bit much blocking. Still better than doing the same in userspace (since > in userspace you probably need to do that when importing the fence, not at > execbuf time). > Yes, I'm aware of the future fence idea. However, that's not really all that related. We're not talking about blocking GPU work here. We're talking about the CPU wait API having support for "wait for submit and signal" behavior instead of just "wait for signal". [-- Attachment #1.2: Type: text/html, Size: 5368 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2017-07-12 17:02 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-07-06 1:04 [PATCH] drm/syncobj: add sync obj wait interface. (v6) Dave Airlie
[not found] ` <20170706010409.9373-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-07-10 15:28 ` Jason Ekstrand
2017-07-10 15:45 ` Christian König
2017-07-10 15:52 ` Jason Ekstrand
[not found] ` <CAOFGe96gaX2OjsXr8bNBUiqwyKAQfMSycH7PXgYAt_BBJ1UHkQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-07-10 15:58 ` Xie, AlexBin
[not found] ` <DM5PR12MB12574F31128759A47DA1C7B7F2A90-2J9CzHegvk/NHlLGalgXawdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-07-10 16:13 ` Christian König
[not found] ` <60c420a9-eab6-a9e0-0306-893053d82a5d-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-07-10 23:15 ` Jason Ekstrand
2017-07-10 16:15 ` Christian König
[not found] ` <fe27d2f8-760b-c2d4-9474-7eaaae560086-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-07-10 21:09 ` Jason Ekstrand
2017-07-11 2:36 ` Michel Dänzer
2017-07-11 7:17 ` Christian König
[not found] ` <c7a44542-acca-09e1-5ea5-bf9938921e54-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-07-11 15:43 ` Jason Ekstrand
[not found] ` <CAOFGe95cnFJCEAO6k8emWFWaAZdMPrdHjQos02peuWe6Ve1wvw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-07-12 7:39 ` Christian König
[not found] ` <2a0f44cc-b893-153c-9852-b1b4855e386e-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-07-12 8:39 ` Dave Airlie
2017-07-12 15:53 ` Jason Ekstrand
[not found] ` <CAOFGe95mj1G_Ap0oOPFyatZWEbEk_Kwo8Wn_E+eYS16Nhz_huQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-07-12 16:45 ` Christian König
2017-07-12 17:02 ` Jason Ekstrand
2017-07-11 7:22 ` Daniel Vetter
[not found] ` <20170711072110.oziftm34o54plbvv-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2017-07-11 19:44 ` Jason Ekstrand
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.